From dbf9ad1f3ddcca7bbfa396f3fce0b8f34bb423e8 Mon Sep 17 00:00:00 2001 From: xoviat Date: Sun, 5 May 2019 13:09:39 -0500 Subject: [PATCH 001/134] tests: add windows compatibility --- ctest/CMakeLists.txt | 15 ++++++++++++--- test/CMakeLists.txt | 22 +++++++++++++++++++--- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/ctest/CMakeLists.txt b/ctest/CMakeLists.txt index 14c9d1944..022379d83 100644 --- a/ctest/CMakeLists.txt +++ b/ctest/CMakeLists.txt @@ -5,9 +5,18 @@ enable_language(Fortran) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DADD${BU} -DCBLAS") +if(WIN32) +FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.ps1 +"$ErrorActionPreference = \"Stop\"\n" +"Get-Content $args[1] | & $args[0]\n" +) +set(test_helper powershell -ExecutionPolicy Bypass "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.ps1") +else() FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh "$1 < $2\n" ) +set(test_helper sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh") +endif() foreach(float_type ${FLOAT_TYPES}) string(SUBSTRING ${float_type} 0 1 float_char_upper) @@ -18,7 +27,7 @@ foreach(float_type ${FLOAT_TYPES}) c_${float_char}blas1.c) target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}) add_test(NAME "x${float_char}cblat1" - COMMAND "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat1") + COMMAND $) #level2 add_executable(x${float_char}cblat2 @@ -30,7 +39,7 @@ foreach(float_type ${FLOAT_TYPES}) constant.c) target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}) add_test(NAME "x${float_char}cblat2" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat2" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2") + COMMAND ${test_helper} $ "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2") #level3 add_executable(x${float_char}cblat3 @@ -42,6 +51,6 @@ foreach(float_type ${FLOAT_TYPES}) constant.c) target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}) add_test(NAME "x${float_char}cblat3" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat3" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3") + COMMAND ${test_helper} $ "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3") endforeach() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index adeee3452..25a29030a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -15,6 +15,20 @@ target_link_libraries(${test_bin} ${OpenBLAS_LIBNAME}) endforeach() # $1 exec, $2 input, $3 output_result +if(WIN32) +FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.ps1 +"Remove-Item -Force $args[2]\n" +"$ErrorActionPreference = \"Stop\"\n" +"Get-Content $args[1] | & $args[0]\n" +"If (Get-Content $args[2] | %{$_ -match \"FATAL\"}) {\n" +"echo Error\n" +"exit 1\n" +"} else {\n" +"exit 0\n" +"}\n" +) +set(helper_prefix powershell -ExecutionPolicy Bypass "${CMAKE_CURRENT_BINARY_DIR}/test_helper.ps1") +else() FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh "rm -f $3\n" "$1 < $2\n" @@ -26,14 +40,16 @@ FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh "exit 0\n" "fi\n" ) +set(helper_prefix sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh") +endif() set(float_types s d c z) foreach(float_type ${float_types}) string(TOUPPER ${float_type} float_type_upper) add_test(NAME "${float_type}blas1" - COMMAND "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat1") + COMMAND $) add_test(NAME "${float_type}blas2" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat2" "${PROJECT_SOURCE_DIR}/test/${float_type}blat2.dat" ${float_type_upper}BLAT2.SUMM) + COMMAND ${helper_prefix} $ "${PROJECT_SOURCE_DIR}/test/${float_type}blat2.dat" ${float_type_upper}BLAT2.SUMM) add_test(NAME "${float_type}blas3" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat3" "${PROJECT_SOURCE_DIR}/test/${float_type}blat3.dat" ${float_type_upper}BLAT3.SUMM) + COMMAND ${helper_prefix} $ "${PROJECT_SOURCE_DIR}/test/${float_type}blat3.dat" ${float_type_upper}BLAT3.SUMM) endforeach() From 5163a85d4054ee377a20301831bff949505624bb Mon Sep 17 00:00:00 2001 From: xoviat Date: Sun, 5 May 2019 13:09:48 -0500 Subject: [PATCH 002/134] add gitignore directory --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index e9d08ca7e..79e116271 100644 --- a/.gitignore +++ b/.gitignore @@ -88,3 +88,4 @@ build.* benchmark/*.goto benchmark/smallscaling +.vscode \ No newline at end of file From 6cfd6195c5aae0813d4335863d55a0ecf7a5d3a8 Mon Sep 17 00:00:00 2001 From: xoviat Date: Sun, 5 May 2019 13:10:36 -0500 Subject: [PATCH 003/134] param: define constant as blaslong to prevent overflow --- param.h | 96 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 49 insertions(+), 47 deletions(-) diff --git a/param.h b/param.h index 4dcd96a75..71d423831 100644 --- a/param.h +++ b/param.h @@ -72,6 +72,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef PARAM_H #define PARAM_H +#include "common.h" + #ifdef OPTERON #define SNUMOPT 4 @@ -79,7 +81,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 64 #define GEMM_DEFAULT_OFFSET_B 256 -#define GEMM_DEFAULT_ALIGN 0x01ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x01ffffUL #define SGEMM_DEFAULT_UNROLL_N 4 #define DGEMM_DEFAULT_UNROLL_N 4 @@ -151,7 +153,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 64 #define GEMM_DEFAULT_OFFSET_B 832 -#define GEMM_DEFAULT_ALIGN 0x0fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL #define SGEMM_DEFAULT_UNROLL_N 4 #define DGEMM_DEFAULT_UNROLL_N 4 @@ -231,7 +233,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 64 #define GEMM_DEFAULT_OFFSET_B 832 -#define GEMM_DEFAULT_ALIGN 0x0fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL @@ -324,7 +326,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 64 #define GEMM_DEFAULT_OFFSET_B 832 -#define GEMM_DEFAULT_ALIGN 0x0fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL @@ -416,7 +418,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 64 #define GEMM_DEFAULT_OFFSET_B 832 -#define GEMM_DEFAULT_ALIGN 0x0fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL @@ -509,7 +511,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 64 #define GEMM_DEFAULT_OFFSET_B 832 -#define GEMM_DEFAULT_ALIGN 0x0fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL @@ -601,7 +603,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SYMV_P 8 @@ -719,7 +721,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 384 -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #define SGEMM_DEFAULT_UNROLL_N 4 #define DGEMM_DEFAULT_UNROLL_N 4 @@ -767,7 +769,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 256 -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #define SGEMM_DEFAULT_UNROLL_N 4 #define DGEMM_DEFAULT_UNROLL_N 4 @@ -814,7 +816,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 64 #define GEMM_DEFAULT_OFFSET_B 256 -#define GEMM_DEFAULT_ALIGN 0x01ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x01ffffUL #ifdef ARCH_X86 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -883,7 +885,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #ifdef HAVE_SSE #define SGEMM_DEFAULT_UNROLL_M 8 @@ -938,7 +940,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #ifdef CORE_YONAH #define SGEMM_DEFAULT_UNROLL_M 4 @@ -1004,7 +1006,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 32 -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #define SYMV_P 8 @@ -1061,7 +1063,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_B 256 #endif -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #define SYMV_P 8 @@ -1121,7 +1123,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 448 #define GEMM_DEFAULT_OFFSET_B 128 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SYMV_P 8 @@ -1194,7 +1196,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 128 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SYMV_P 8 @@ -1265,7 +1267,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 128 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SYMV_P 8 @@ -1337,7 +1339,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 32 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SYMV_P 8 @@ -1410,7 +1412,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SYMV_P 8 @@ -1503,7 +1505,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SYMV_P 8 @@ -1623,7 +1625,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SYMV_P 8 @@ -1746,7 +1748,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 64 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #define SYMV_P 8 @@ -1808,7 +1810,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 128 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 8 #define SGEMM_DEFAULT_UNROLL_N 8 @@ -1862,7 +1864,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 512 #define GEMM_DEFAULT_OFFSET_B 512 -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -1930,7 +1932,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 8192 -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -1957,7 +1959,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef PPCG4 #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 1024 -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -1988,7 +1990,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 2688 #define GEMM_DEFAULT_OFFSET_B 3072 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2029,7 +2031,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A (32 * 0) #define GEMM_DEFAULT_OFFSET_B (32 * 0) -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2065,7 +2067,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A (32 * 0) #define GEMM_DEFAULT_OFFSET_B (32 * 0) -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 8 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2100,7 +2102,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(POWER3) || defined(POWER4) || defined(POWER5) #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 2048 -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2173,7 +2175,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 384 #define GEMM_DEFAULT_OFFSET_B 1024 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2205,7 +2207,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 65536 -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 8 @@ -2269,7 +2271,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 2048 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 2 #define SGEMM_DEFAULT_UNROLL_N 8 @@ -2301,7 +2303,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 2048 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2332,7 +2334,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 2 #define SGEMM_DEFAULT_UNROLL_N 8 @@ -2368,7 +2370,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 8 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2409,7 +2411,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 2 #define SGEMM_DEFAULT_UNROLL_N 2 @@ -2450,7 +2452,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG) 0x03fffUL #ifdef HAVE_MSA #define SGEMM_DEFAULT_UNROLL_M 8 @@ -2502,7 +2504,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2543,7 +2545,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 2 @@ -2584,7 +2586,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SYMV_P 16 @@ -2750,7 +2752,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 2 #define SGEMM_DEFAULT_UNROLL_N 2 @@ -2791,7 +2793,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2832,7 +2834,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2873,7 +2875,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 2 #define SGEMM_DEFAULT_UNROLL_N 2 @@ -2912,7 +2914,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 8 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2994,7 +2996,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL #define SGEMM_DEFAULT_UNROLL_N 2 #define DGEMM_DEFAULT_UNROLL_N 2 From 9031ebd7d50d903ad2372001f4d20908f0c0bf20 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 12 Dec 2020 23:28:20 +0100 Subject: [PATCH 004/134] Update version to 0.3.13.dev --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 12730e0e3..c5ba3ceed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5) project(OpenBLAS C ASM) set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MINOR_VERSION 3) -set(OpenBLAS_PATCH_VERSION 13) +set(OpenBLAS_PATCH_VERSION 13.dev) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") # Adhere to GNU filesystem layout conventions From 87315e8a8d1f27684d886c31742d95d98886aa8a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 12 Dec 2020 23:28:49 +0100 Subject: [PATCH 005/134] Update version to 0.3.13.dev --- Makefile.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.rule b/Makefile.rule index e4b82104e..c68c20923 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.3.13 +VERSION = 0.3.13.dev # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library From ad63647446b88f747a058a65f375372434c8f2b0 Mon Sep 17 00:00:00 2001 From: Joshie Date: Sun, 13 Dec 2020 09:06:14 +0000 Subject: [PATCH 006/134] Define BLAS acronym in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 267df5358..6c6322c32 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Drone CI: [![Build Status](https://cloud.drone.io/api/badges/xianyi/OpenBLAS/sta ## Introduction -OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. +OpenBLAS is an optimized Basic Linear Algebra Subprograms library based on GotoBLAS2 1.13 BSD version. Please read the documentation on the OpenBLAS wiki pages: . From 2fb11f873bfb5d690cbe096d81a837ede4cfa63f Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Sun, 13 Dec 2020 10:41:45 -0600 Subject: [PATCH 007/134] POWER10: Improve copy performance This patch aligns the stores to 32 byte boundary for scopy and dcopy before entering into vector pair loop. For ccopy, changed the store instructions to stxv to improve performance of unaligned cases. --- kernel/power/ccopy_microk_power10.c | 115 ++++++++++++++++++++++++++++ kernel/power/ccopy_power10.c | 4 +- kernel/power/copy_microk_power10.c | 25 +++--- kernel/power/dcopy_power10.c | 16 ++-- kernel/power/scopy_power10.c | 15 +++- 5 files changed, 152 insertions(+), 23 deletions(-) create mode 100644 kernel/power/ccopy_microk_power10.c diff --git a/kernel/power/ccopy_microk_power10.c b/kernel/power/ccopy_microk_power10.c new file mode 100644 index 000000000..6c80f9cd4 --- /dev/null +++ b/kernel/power/ccopy_microk_power10.c @@ -0,0 +1,115 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#define HAVE_KERNEL 1 + +static void copy_kernel (BLASLONG n, FLOAT *x, FLOAT *y) +{ + __asm__ + ( + "lxvp 32, 0(%2) \n\t" + "lxvp 34, 32(%2) \n\t" + "lxvp 36, 64(%2) \n\t" + "lxvp 38, 96(%2) \n\t" + "lxvp 40, 128(%2) \n\t" + "lxvp 42, 160(%2) \n\t" + "lxvp 44, 192(%2) \n\t" + "lxvp 46, 224(%2) \n\t" + + "addi %2, %2, 256 \n\t" + "addic. %1, %1, -32 \n\t" + "ble two%= \n\t" + + ".align 5 \n" + "one%=: \n\t" + + "stxv 33, 0(%3) \n\t" + "stxv 32, 16(%3) \n\t" + "stxv 35, 32(%3) \n\t" + "stxv 34, 48(%3) \n\t" + "stxv 37, 64(%3) \n\t" + "stxv 36, 80(%3) \n\t" + "stxv 39, 96(%3) \n\t" + "stxv 38, 112(%3) \n\t" + "lxvp 32, 0(%2) \n\t" + "lxvp 34, 32(%2) \n\t" + "lxvp 36, 64(%2) \n\t" + "lxvp 38, 96(%2) \n\t" + + "stxv 41, 128(%3) \n\t" + "stxv 40, 144(%3) \n\t" + "stxv 43, 160(%3) \n\t" + "stxv 42, 176(%3) \n\t" + "stxv 45, 192(%3) \n\t" + "stxv 44, 208(%3) \n\t" + "stxv 47, 224(%3) \n\t" + "stxv 46, 240(%3) \n\t" + "lxvp 40, 128(%2) \n\t" + "lxvp 42, 160(%2) \n\t" + "lxvp 44, 192(%2) \n\t" + "lxvp 46, 224(%2) \n\t" + + + "addi %3, %3, 256 \n\t" + "addi %2, %2, 256 \n\t" + + "addic. %1, %1, -32 \n\t" + "bgt one%= \n" + + "two%=: \n\t" + + "stxv 33, 0(%3) \n\t" + "stxv 32, 16(%3) \n\t" + "stxv 35, 32(%3) \n\t" + "stxv 34, 48(%3) \n\t" + "stxv 37, 64(%3) \n\t" + "stxv 36, 80(%3) \n\t" + "stxv 39, 96(%3) \n\t" + "stxv 38, 112(%3) \n\t" + "stxv 41, 128(%3) \n\t" + "stxv 40, 144(%3) \n\t" + "stxv 43, 160(%3) \n\t" + "stxv 42, 176(%3) \n\t" + "stxv 45, 192(%3) \n\t" + "stxv 44, 208(%3) \n\t" + "stxv 47, 224(%3) \n\t" + "stxv 46, 240(%3) \n\t" + + "#n=%1 x=%4=%2 y=%0=%3" + : + "=m" (*y), + "+r" (n), // 1 + "+b" (x), // 2 + "+b" (y) // 3 + : + "m" (*x) + : + "cr0", + "vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39", + "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47" + ); +} diff --git a/kernel/power/ccopy_power10.c b/kernel/power/ccopy_power10.c index a5877cd12..41c510460 100644 --- a/kernel/power/ccopy_power10.c +++ b/kernel/power/ccopy_power10.c @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if defined(__VEC__) || defined(__ALTIVEC__) -#include "copy_microk_power10.c" +#include "ccopy_microk_power10.c" #endif #ifndef HAVE_KERNEL @@ -86,7 +86,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) if ( (inc_x == 1) && (inc_y == 1 )) { - BLASLONG n1 = n & -64; + BLASLONG n1 = n & -32; if ( n1 > 0 ) { copy_kernel(n1, x, y); diff --git a/kernel/power/copy_microk_power10.c b/kernel/power/copy_microk_power10.c index c90dc3785..8bca1a1e7 100644 --- a/kernel/power/copy_microk_power10.c +++ b/kernel/power/copy_microk_power10.c @@ -62,38 +62,39 @@ static void copy_kernel (BLASLONG n, FLOAT *x, FLOAT *y) "one%=: \n\t" "stxvp 32, 0(%3) \n\t" - "lxvp 32, 0(%2) \n\t" "stxvp 34, 32(%3) \n\t" - "lxvp 34, 32(%2) \n\t" "stxvp 36, 64(%3) \n\t" - "lxvp 36, 64(%2) \n\t" "stxvp 38, 96(%3) \n\t" + "lxvp 32, 0(%2) \n\t" + "lxvp 34, 32(%2) \n\t" + "lxvp 36, 64(%2) \n\t" "lxvp 38, 96(%2) \n\t" "stxvp 40, 128(%3) \n\t" - "lxvp 40, 128(%2) \n\t" "stxvp 42, 160(%3) \n\t" - "lxvp 42, 160(%2) \n\t" "stxvp 44, 192(%3) \n\t" - "lxvp 44, 192(%2) \n\t" "stxvp 46, 224(%3) \n\t" + "lxvp 40, 128(%2) \n\t" + "lxvp 42, 160(%2) \n\t" + "lxvp 44, 192(%2) \n\t" "lxvp 46, 224(%2) \n\t" "stxvp 48, 256(%3) \n\t" - "lxvp 48, 256(%2) \n\t" "stxvp 50, 288(%3) \n\t" - "lxvp 50, 288(%2) \n\t" "stxvp 52, 320(%3) \n\t" - "lxvp 52, 320(%2) \n\t" "stxvp 54, 352(%3) \n\t" + "lxvp 48, 256(%2) \n\t" + "lxvp 50, 288(%2) \n\t" + "lxvp 52, 320(%2) \n\t" "lxvp 54, 352(%2) \n\t" + "stxvp 56, 384(%3) \n\t" - "lxvp 56, 384(%2) \n\t" "stxvp 58, 416(%3) \n\t" - "lxvp 58, 416(%2) \n\t" "stxvp 60, 448(%3) \n\t" - "lxvp 60, 448(%2) \n\t" "stxvp 62, 480(%3) \n\t" + "lxvp 56, 384(%2) \n\t" + "lxvp 58, 416(%2) \n\t" + "lxvp 60, 448(%2) \n\t" "lxvp 62, 480(%2) \n\t" "addi %3, %3, 512 \n\t" diff --git a/kernel/power/dcopy_power10.c b/kernel/power/dcopy_power10.c index cd10b7136..6c5eb4d77 100644 --- a/kernel/power/dcopy_power10.c +++ b/kernel/power/dcopy_power10.c @@ -85,12 +85,18 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) if ( (inc_x == 1) && (inc_y == 1 )) { - - BLASLONG n1 = n & -64; - if ( n1 > 0 ) + if ( n >= 64 ) { - copy_kernel(n1, x, y); - i=n1; + BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3; + for (i = 0; i < align; i++) { + y[i] = x[i] ; + } + } + BLASLONG n1 = (n-i) & -64; + if ( n1 ) + { + copy_kernel(n1, &x[i], &y[i]); + i += n1; } while(i < n) diff --git a/kernel/power/scopy_power10.c b/kernel/power/scopy_power10.c index 298a8998a..3398ce827 100644 --- a/kernel/power/scopy_power10.c +++ b/kernel/power/scopy_power10.c @@ -86,11 +86,18 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) if ( (inc_x == 1) && (inc_y == 1 )) { - BLASLONG n1 = n & -128; - if ( n1 > 0 ) + if ( n >= 128 ) { - copy_kernel (n1, x, y); - i=n1; + BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7; + for (i = 0; i < align; i++) { + y[i] = x[i] ; + } + } + BLASLONG n1 = (n-i) & -128; + if ( n1 ) + { + copy_kernel(n1, &x[i], &y[i]); + i += n1; } while(i < n) From 00ce35336ee1eb1089f30d1e117a8a6a933f9654 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 13 Dec 2020 21:28:01 +0100 Subject: [PATCH 008/134] Fix spurious removal of a trailing character from the hostarch string on x86_64 --- c_check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_check b/c_check index 970d475d7..9c8b1abac 100644 --- a/c_check +++ b/c_check @@ -5,7 +5,7 @@ # Checking cross compile $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); -$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); +$hostarch = `uname -m | sed -e s/i.86/x86/`; $hostarch = `uname -p` if ($hostos eq "AIX" || $hostos eq "SunOS"); chop($hostarch); $hostarch = "x86_64" if ($hostarch eq "amd64"); From b03dc011be97b1a841aff6aa644e51a223cb404b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 14 Dec 2020 19:21:52 +0100 Subject: [PATCH 009/134] Fix undefined CC variable in clang check --- f_check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/f_check b/f_check index 42241ae10..d20b96081 100644 --- a/f_check +++ b/f_check @@ -330,7 +330,7 @@ if ($link ne "") { $flags =~ s/\@/\,/g; $linker_L .= "-Wl,". $flags . " " ; } - if ($flags =~ /-lgomp/ && $CC =~ /clang/) { + if ($flags =~ /-lgomp/ && $ENV{"CC"} =~ /clang/) { $flags = "-lomp"; } From 0f7776af0b65134d18cdc0935b8591441741853b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 14 Dec 2020 22:30:36 +0100 Subject: [PATCH 010/134] Add Intel Rocket Lake --- cpuid_x86.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/cpuid_x86.c b/cpuid_x86.c index 84c12ff43..aca37da45 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1436,6 +1436,15 @@ int get_cpuname(void){ return CPUTYPE_SANDYBRIDGE; else return CPUTYPE_NEHALEM; + case 7: // Rocket Lake + if(support_avx512()) + return CPUTYPE_SKYLAKEX; + if(support_avx2()) + return CPUTYPE_HASWELL; + if(support_avx()) + return CPUTYPE_SANDYBRIDGE; + else + return CPUTYPE_NEHALEM; } break; } @@ -2014,6 +2023,19 @@ int get_coretype(void){ #endif else return CORE_NEHALEM; + case 7:// Rocket Lake +#ifndef NO_AVX512 + if(support_avx512()) + return CORE_SKYLAKEX; +#endif +#ifndef NO_AVX2 + if(support_avx2()) + return CORE_HASWELL; +#endif + if(support_avx()) + return CORE_SANDYBRIDGE; + else + return CORE_NEHALEM; } case 5: switch (model) { From 865676682dc0c249fc89ec5713bb9695df277ff2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 14 Dec 2020 22:40:23 +0100 Subject: [PATCH 011/134] Add Intel Rocket Lake --- driver/others/dynamic.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index 58f4d8b59..7845d6951 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -656,7 +656,7 @@ static gotoblas_t *get_coretype(void){ } } case 10: - if (model == 5 || model == 6) { + if (model == 5 || model == 6) { if(support_avx2()) return &gotoblas_HASWELL; if(support_avx()) { @@ -666,7 +666,20 @@ static gotoblas_t *get_coretype(void){ openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } - } + } + if (model == 7) { + if (support_avx512()) + return &gotoblas_SKYLAKEX; + if(support_avx2()) + return &gotoblas_HASWELL; + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { + openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); + return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. + } + } return NULL; } case 0xf: From abef2ea770ce54349195506db84a3d64f65676a6 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 17 Dec 2020 11:32:27 +0100 Subject: [PATCH 012/134] Move -fma option setting to kernel/Makefile.L1 --- Makefile.x86_64 | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Makefile.x86_64 b/Makefile.x86_64 index 00967bcb6..175db823d 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -32,12 +32,6 @@ CCOMMON_OPT += -mavx2 FCOMMON_OPT += -mavx2 endif endif -ifndef OLDGCC -ifdef HAVE_FMA3 -CCOMMON_OPT += -mfma -FCOMMON_OPT += -mfma -endif -endif ifeq ($(CORE), SKYLAKEX) ifndef DYNAMIC_ARCH From c73d8ee40ddd9c3f2cc311b7c45955a234a563c4 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 17 Dec 2020 11:34:05 +0100 Subject: [PATCH 013/134] Conditionally add -mfma to compiler options where needed --- kernel/Makefile.L1 | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/Makefile.L1 b/kernel/Makefile.L1 index 7ad94118a..09337363d 100644 --- a/kernel/Makefile.L1 +++ b/kernel/Makefile.L1 @@ -1,3 +1,11 @@ +FMAFLAG= +ifndef OLDGCC +ifdef HAVE_FMA3 +FMAFLAG = -mfma +endif +endif + + ### AMAX ### ifndef SAMAXKERNEL @@ -828,10 +836,10 @@ $(KDIR)xnrm2_k$(TSUFFIX).$(SUFFIX) $(KDIR)xnrm2_k$(TPSUFFIX).$(PSUFFIX) : $(KE $(CC) $(CFLAGS) -DCOMPLEX -c -DXDOUBLE $< -o $@ $(KDIR)srot_k$(TSUFFIX).$(SUFFIX) $(KDIR)srot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SROTKERNEL) - $(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -UDOUBLE $< -o $@ + $(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -UDOUBLE $< -o $@ $(KDIR)drot_k$(TSUFFIX).$(SUFFIX) $(KDIR)drot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DROTKERNEL) - $(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@ + $(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@ $(KDIR)qrot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qrot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QROTKERNEL) $(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@ From e40416567a1f58414a7221a0f013109b681307fc Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 19 Dec 2020 22:06:56 +0100 Subject: [PATCH 014/134] Add version printout for PGI/NVIDIA compiler --- Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index 54dd3be41..de0735c4a 100644 --- a/Makefile +++ b/Makefile @@ -59,6 +59,9 @@ endif @$(CC) --version > /dev/null 2>&1;\ if [ $$? -eq 0 ]; then \ cverinfo=`$(CC) --version | sed -n '1p'`; \ + if [ -z "$${cverinfo}" ]; then \ + cverinfo=`$(CC) --version | sed -n '2p'`; \ + fi; \ echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\ else \ echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\ @@ -67,6 +70,9 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) @$(FC) --version > /dev/null 2>&1;\ if [ $$? -eq 0 ]; then \ fverinfo=`$(FC) --version | sed -n '1p'`; \ + if [ -z "$${fverinfo}" ]; then \ + fverinfo=`$(FC) --version | sed -n '2p'`; \ + fi; \ echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\ else \ echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\ From b212a2fb9f956b56f7a55d9019f61ffa8bb56092 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 19 Dec 2020 22:08:37 +0100 Subject: [PATCH 015/134] Add/modify "PGI" compiler options for NVIDIA SDK 20.11 --- Makefile.system | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/Makefile.system b/Makefile.system index 5adde36d8..45d02ba5c 100644 --- a/Makefile.system +++ b/Makefile.system @@ -181,7 +181,7 @@ endif # On x86_64 build getarch with march=native unless the compiler is PGI. This is required to detect AVX512 support in getarch. ifeq ($(HOSTARCH), x86_64) -ifeq ($(findstring pgcc,$(HOSTCC)),) +ifeq ($(findstring pgcc,$(HOSTCC))$(findstring nvc,$(HOSTCC)),) GETARCH_FLAGS += -march=native endif endif @@ -847,9 +847,19 @@ endif endif ifeq ($(C_COMPILER), PGI) +PGCVERSIONGT20 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -d "." -f 1` \> 20) +PGCVERSIONGTEQ20 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -d "." -f 1` \>= 20) +PGCMINORVERSIONGE11 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -c 4-5` == 11) +PGCVERSIONCHECK := $(PGCVERSIONGT20)$(PGCVERSIONEQ20)$(PGCMINORVERSIONGE11) +ifeq ($(PGCVERSIONCHECK), $(filter $(PGCVERSIONCHECK), 110 111 011)) +NEWPGI := 1 +endif ifdef BINARY64 ifeq ($(ARCH), x86_64) -CCOMMON_OPT += -tp p7-64 -D__MMX__ -Mnollvm +CCOMMON_OPT += -tp p7-64 +ifneq ($(NEWPGI),1) +CCOMMON_OPT += -D__MMX__ -Mnollvm +endif else ifeq ($(ARCH), power) ifeq ($(CORE), POWER8) @@ -1040,7 +1050,7 @@ endif else FCOMMON_OPT += -tp p7 endif -FCOMMON_OPT += -Mrecursive +FCOMMON_OPT += -Mrecursive -Kieee ifeq ($(USE_OPENMP), 1) FCOMMON_OPT += -mp endif From b859b6e79dc16907c4fd614a9857cc97e66f05ff Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 19 Dec 2020 22:09:57 +0100 Subject: [PATCH 016/134] Add nvfortran --- f_check | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/f_check b/f_check index d20b96081..e9aca4ff9 100644 --- a/f_check +++ b/f_check @@ -32,7 +32,7 @@ if ($compiler eq "") { "xlf95", "xlf90", "xlf", "ppuf77", "ppuf95", "ppuf90", "ppuxlf", "pathf90", "pathf95", - "pgf95", "pgf90", "pgf77", + "pgf95", "pgf90", "pgf77", "pgfortran", "nvfortran", "flang", "egfortran", "ifort"); @@ -64,7 +64,6 @@ if ($compiler eq "") { if (!$?) { $data = `$compiler -O2 -S ftest.f > /dev/null 2>&1 && cat ftest.s && rm -f ftest.s`; - if ($data =~ /zhoge_/) { $bu = "_"; } @@ -87,7 +86,7 @@ if ($compiler eq "") { if ($compiler =~ /flang/) { $vendor = FLANG; $openmp = "-fopenmp"; - } elsif ($compiler =~ /pgf/) { + } elsif ($compiler =~ /pgf/ || $compiler =~ /nvf/) { $vendor = PGI; $openmp = "-mp"; } else { @@ -123,7 +122,7 @@ if ($compiler eq "") { $openmp = "-mp"; } - if ($data =~ /PGF/) { + if ($data =~ /PGF/ || $data =~ /NVF/) { $vendor = PGI; $openmp = "-mp"; } @@ -177,7 +176,7 @@ if ($compiler eq "") { $openmp = "-mp"; } - if ($compiler =~ /pgf/) { + if ($compiler =~ /pgf/ || $compiler =~ /nvf/) { $vendor = PGI; $bu = "_"; $openmp = "-mp"; @@ -330,7 +329,7 @@ if ($link ne "") { $flags =~ s/\@/\,/g; $linker_L .= "-Wl,". $flags . " " ; } - if ($flags =~ /-lgomp/ && $ENV{"CC"} =~ /clang/) { + if ($flags =~ /-lgomp/ && $CC =~ /clang/) { $flags = "-lomp"; } From 005cce5507c39b70ba040cd9c44a54bef17368c3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 19 Dec 2020 22:11:49 +0100 Subject: [PATCH 017/134] Amend SkylakeX options to support the NVIDIA compiler --- kernel/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/Makefile b/kernel/Makefile index 4e86546b9..1a6c9413f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -36,7 +36,7 @@ ifeq ($(TARGET_CORE), COOPERLAKE) ifeq ($(GCCVERSIONGTEQ10), 1) override CFLAGS += -march=cooperlake else - override CFLAGS += -march=skylake-avx512 + override CFLAGS += -march=skylake-avx512 -mavx512f endif ifeq ($(OSNAME), CYGWIN_NT) override CFLAGS += -fno-asynchronous-unwind-tables @@ -47,7 +47,7 @@ ifeq ($(TARGET_CORE), COOPERLAKE) endif endif else ifeq ($(TARGET_CORE), SKYLAKEX) - override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -march=skylake-avx512 + override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -march=skylake-avx512 -mavx512f ifeq ($(OSNAME), CYGWIN_NT) override CFLAGS += -fno-asynchronous-unwind-tables endif From 114eb159a4b0d83a76ab837952516e7fadc21a30 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 19 Dec 2020 22:15:58 +0100 Subject: [PATCH 018/134] Disable FMA intrinsics in the srot kernel when the compiler is PGI/NVIDIA --- kernel/x86_64/srot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/srot.c b/kernel/x86_64/srot.c index 3de586cb8..3264d251a 100644 --- a/kernel/x86_64/srot.c +++ b/kernel/x86_64/srot.c @@ -13,7 +13,7 @@ static void srot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s) { BLASLONG i = 0; -#if V_SIMD && (defined(HAVE_FMA3) || V_SIMD > 128) +#if V_SIMD && !defined(C_PGI) && (defined(HAVE_FMA3) || V_SIMD > 128) const int vstep = v_nlanes_f32; const int unrollx4 = n & (-vstep * 4); const int unrollx = n & -vstep; From 75b1f3becc236f269a332e6233f2eab35d46f683 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 19 Dec 2020 23:17:40 +0100 Subject: [PATCH 019/134] Limit POWERPC DYNAMIC_CORE list to P8 and P9 for NVIDIA compilers --- Makefile.system | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Makefile.system b/Makefile.system index 45d02ba5c..ce3a819a8 100644 --- a/Makefile.system +++ b/Makefile.system @@ -663,6 +663,7 @@ endif endif # ARCH zarch ifeq ($(ARCH), power) +ifneq ($(C_COMPILER), PGI) DYNAMIC_CORE = POWER6 DYNAMIC_CORE += POWER8 ifneq ($(C_COMPILER), GCC) @@ -689,6 +690,10 @@ else $(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.) endif endif +else +DYNAMIC_CORE = POWER8 +DYNAMIC_CORE += POWER9 +endif endif # If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty @@ -1039,12 +1044,18 @@ ifeq ($(ARCH), x86_64) FCOMMON_OPT += -tp p7-64 else ifeq ($(ARCH), power) +ifeq ($(CORE), POWER6) +$(warning NVIDIA HPC compilers do not support POWER6.) +endif ifeq ($(CORE), POWER8) FCOMMON_OPT += -tp pwr8 endif ifeq ($(CORE), POWER9) FCOMMON_OPT += -tp pwr9 endif +ifeq ($(CORE), POWER10) +$(warning NVIDIA HPC compilers do not support POWER10.) +endif endif endif else From 91c3f86c2bc47a8ebecbcea8af5cca6e38d5295b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 19 Dec 2020 23:19:05 +0100 Subject: [PATCH 020/134] NVIDIA compiler does not yet support POWER10 --- Makefile.power | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile.power b/Makefile.power index c7e972290..946f55232 100644 --- a/Makefile.power +++ b/Makefile.power @@ -10,9 +10,11 @@ USE_OPENMP = 1 endif ifeq ($(CORE), POWER10) +ifneq ($(C_COMPILER), PGI) CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math endif +endif ifeq ($(CORE), POWER9) ifneq ($(C_COMPILER), PGI) From 17c16f2a71cf957f4a4c74050da0825f6ebe203f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 19 Dec 2020 23:21:22 +0100 Subject: [PATCH 021/134] Implement builtin_cpu_is and limit cpu choices to P8 and P9 for NVIDIA compilers --- driver/others/dynamic_power.c | 151 ++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index a2f56d839..f9feeb6e8 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -27,7 +27,9 @@ static char *corename[] = { #define NUM_CORETYPES 4 char *gotoblas_corename(void) { +#ifndef C_PGI if (gotoblas == &gotoblas_POWER6) return corename[1]; +#endif if (gotoblas == &gotoblas_POWER8) return corename[2]; #if (!defined __GNUC__) || ( __GNUC__ >= 6) if (gotoblas == &gotoblas_POWER9) return corename[3]; @@ -38,10 +40,157 @@ char *gotoblas_corename(void) { return corename[0]; } +#ifdef C_PGI +/* + * NV HPC compilers do not yet implement __builtin_cpu_is(). + * Fake a version here for use in the CPU detection code below. + * + * Strategy here is to first check the CPU to see what it actually is, + * and then test the input to see if what the CPU actually is matches + * what was requested. + */ + +#include + +/* + * Define POWER processor version table. + * + * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time + */ + +#define CPU_UNKNOWN 0 +#define CPU_POWER5 5 +#define CPU_POWER6 6 +#define CPU_POWER8 8 +#define CPU_POWER9 9 +#define CPU_POWER10 10 + +static struct { + uint32_t pvr_mask; + uint32_t pvr_value; + const char* cpu_name; + uint32_t cpu_type; +} pvrPOWER [] = { + + { /* POWER6 in P5+ mode; 2.04-compliant processor */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000001, + .cpu_name = "POWER5+", + .cpu_type = CPU_POWER5, + }, + + { /* Power6 aka POWER6X*/ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003e0000, + .cpu_name = "POWER6 (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power7 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003f0000, + .cpu_name = "POWER7 (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power7+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004A0000, + .cpu_name = "POWER7+ (raw)", + .cpu_type = CPU_POWER6, + }, + + { /* Power8E */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004b0000, + .cpu_name = "POWER8E (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power8NVL */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004c0000, + .cpu_name = "POWER8NVL (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power8 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004d0000, + .cpu_name = "POWER8 (raw)", + .cpu_type = CPU_POWER8, + }, + + { /* Power9 DD2.0 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0200, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power9 DD 2.1 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0201, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power9 DD2.2 or later */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004e0000, + .cpu_name = "POWER9 (raw)", + .cpu_type = CPU_POWER9, + }, + + { /* Power10 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00800000, + .cpu_name = "POWER10 (raw)", + .cpu_type = CPU_POWER10, + }, + + { /* End of table, pvr_mask and pvr_value must be zero */ + .pvr_mask = 0x0, + .pvr_value = 0x0, + .cpu_name = "Unknown", + .cpu_type = CPU_UNKNOWN, + }, +}; + +static int __builtin_cpu_is(const char *cpu) { + int i; + uint32_t pvr; + uint32_t cpu_type; + + asm("mfpvr %0" : "=r"(pvr)); + + for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) { + if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) { + break; + } + } + +#if defined(DEBUG) + printf("%s: returning CPU=%s, cpu_type=%p\n", __func__, + pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type); +#endif + cpu_type = pvrPOWER[i].cpu_type; + + if (!strcmp(cpu, "power8")) + return cpu_type == CPU_POWER8; + if (!strcmp(cpu, "power9")) + return cpu_type == CPU_POWER9; + return 0; +} + +#endif /* C_PGI */ + static gotoblas_t *get_coretype(void) { +#ifndef C_PGI if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x")) return &gotoblas_POWER6; +#endif if (__builtin_cpu_is("power8")) return &gotoblas_POWER8; #if (!defined __GNUC__) || ( __GNUC__ >= 6) @@ -77,7 +226,9 @@ static gotoblas_t *force_coretype(char * coretype) { switch (found) { +#ifndef C_PGI case 1: return (&gotoblas_POWER6); +#endif case 2: return (&gotoblas_POWER8); #if (!defined __GNUC__) || ( __GNUC__ >= 6) case 3: return (&gotoblas_POWER9); From 6f4698ee1fda9b569ed51c214dc51aed4774b21a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 21 Dec 2020 07:41:18 +0100 Subject: [PATCH 022/134] Temporarily revert to the old nrm2 kernel --- kernel/arm64/KERNEL.NEOVERSEN1 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/arm64/KERNEL.NEOVERSEN1 b/kernel/arm64/KERNEL.NEOVERSEN1 index ea010db42..074d72153 100644 --- a/kernel/arm64/KERNEL.NEOVERSEN1 +++ b/kernel/arm64/KERNEL.NEOVERSEN1 @@ -91,10 +91,10 @@ IDAMAXKERNEL = iamax_thunderx2t99.c ICAMAXKERNEL = izamax_thunderx2t99.c IZAMAXKERNEL = izamax_thunderx2t99.c -SNRM2KERNEL = scnrm2_thunderx2t99.c -DNRM2KERNEL = dznrm2_thunderx2t99.c -CNRM2KERNEL = scnrm2_thunderx2t99.c -ZNRM2KERNEL = dznrm2_thunderx2t99.c +SNRM2KERNEL = nrm2.S +DNRM2KERNEL = nrm2.S +CNRM2KERNEL = znrm2.S +ZNRM2KERNEL = znrm2.S DDOTKERNEL = dot_thunderx2t99.c SDOTKERNEL = dot_thunderx2t99.c From 2768bc1764fe61fcebb6a0e5f906811f7460ed07 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 21 Dec 2020 07:42:51 +0100 Subject: [PATCH 023/134] Temporarily revert to the old nrm2 kernels --- kernel/arm64/KERNEL.THUNDERX2T99 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/arm64/KERNEL.THUNDERX2T99 b/kernel/arm64/KERNEL.THUNDERX2T99 index a20d0d4a6..8333f60e6 100644 --- a/kernel/arm64/KERNEL.THUNDERX2T99 +++ b/kernel/arm64/KERNEL.THUNDERX2T99 @@ -153,12 +153,12 @@ IDAMAXKERNEL = iamax_thunderx2t99.c ICAMAXKERNEL = izamax_thunderx2t99.c IZAMAXKERNEL = izamax_thunderx2t99.c -SNRM2KERNEL = scnrm2_thunderx2t99.c -CNRM2KERNEL = scnrm2_thunderx2t99.c +SNRM2KERNEL = nrm2.S +CNRM2KERNEL = nrm2.S #DNRM2KERNEL = dznrm2_thunderx2t99_fast.c #ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c -DNRM2KERNEL = dznrm2_thunderx2t99.c -ZNRM2KERNEL = dznrm2_thunderx2t99.c +DNRM2KERNEL = znrm2.S +ZNRM2KERNEL = znrm2.S DDOTKERNEL = dot_thunderx2t99.c From 8631e2976a01d074b207db0c58618c01c9998d35 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 21 Dec 2020 07:45:13 +0100 Subject: [PATCH 024/134] Temporarily revert to the old nrm2 kernels --- kernel/arm64/KERNEL.THUNDERX3T110 | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/kernel/arm64/KERNEL.THUNDERX3T110 b/kernel/arm64/KERNEL.THUNDERX3T110 index a20d0d4a6..4cdd8769f 100644 --- a/kernel/arm64/KERNEL.THUNDERX3T110 +++ b/kernel/arm64/KERNEL.THUNDERX3T110 @@ -153,13 +153,16 @@ IDAMAXKERNEL = iamax_thunderx2t99.c ICAMAXKERNEL = izamax_thunderx2t99.c IZAMAXKERNEL = izamax_thunderx2t99.c -SNRM2KERNEL = scnrm2_thunderx2t99.c -CNRM2KERNEL = scnrm2_thunderx2t99.c -#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c -#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c -DNRM2KERNEL = dznrm2_thunderx2t99.c -ZNRM2KERNEL = dznrm2_thunderx2t99.c - +#SNRM2KERNEL = scnrm2_thunderx2t99.c +#CNRM2KERNEL = scnrm2_thunderx2t99.c +##DNRM2KERNEL = dznrm2_thunderx2t99_fast.c +##ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c +#DNRM2KERNEL = dznrm2_thunderx2t99.c +#ZNRM2KERNEL = dznrm2_thunderx2t99.c +SNRM2KERNEL = nrm2.S +DNRM2KERNEL = nrm2.S +CNRM2KERNEL = znrm2.S +ZNRM2KERNEL = znrm2.S DDOTKERNEL = dot_thunderx2t99.c SDOTKERNEL = dot_thunderx2t99.c From 9a38592c79ee4e4b3a38e18092e880e4e92481c7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 27 Dec 2020 21:55:08 +0100 Subject: [PATCH 025/134] Add pointers to the netlib documentation and Gilbert Strang's linear algebra primers --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6c6322c32..fed3936ee 100644 --- a/README.md +++ b/README.md @@ -13,10 +13,14 @@ Drone CI: [![Build Status](https://cloud.drone.io/api/badges/xianyi/OpenBLAS/sta ## Introduction -OpenBLAS is an optimized Basic Linear Algebra Subprograms library based on GotoBLAS2 1.13 BSD version. +OpenBLAS is an optimized BLAS (Basic Linear Algebra Subprograms) library based on GotoBLAS2 1.13 BSD version. Please read the documentation on the OpenBLAS wiki pages: . +For a general introduction to the BLAS routines, please refer to the extensive documentation of their reference implementation hosted at netlib: +. On that site you will likewise find documentation for the reference implementation of the higher-level library LAPACK - the **L**inear **A**lgebra **Pack**age that comes included with OpenBLAS. If you are looking for a general primer or refresher on Linear Algebra, the set of six +20-minute lecture videos by Prof. Gilbert Strang on either MIT OpenCourseWare or Youtube may be helpful. + ## Binary Packages We provide official binary packages for the following platform: From 0a535e58d857cb3b6d2cd73db7b4197c64c82836 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Tue, 29 Dec 2020 12:06:39 +0000 Subject: [PATCH 026/134] getarch.c: define OPENBLAS_SUPPORTED for riscv64 --- getarch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/getarch.c b/getarch.c index 29671736e..f48944f36 100644 --- a/getarch.c +++ b/getarch.c @@ -1375,6 +1375,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef __riscv #include "cpuid_riscv64.c" +#define OPENBLAS_SUPPORTED #endif #ifdef __arm__ From 1b2508362b9033468eb98ea4146e31ab50d14fa3 Mon Sep 17 00:00:00 2001 From: Ashwin Sekhar T K Date: Fri, 1 Jan 2021 02:09:40 -0800 Subject: [PATCH 027/134] arm64: Fix nrm2 for input vectors with Inf Fix double precision nrm2 kernels returning NaN when the input vectors contain Inf/-Inf. --- kernel/arm64/KERNEL.NEOVERSEN1 | 8 ++++---- kernel/arm64/KERNEL.THUNDERX2T99 | 8 ++++---- kernel/arm64/KERNEL.THUNDERX3T110 | 17 +++++++---------- kernel/arm64/dznrm2_thunderx2t99.c | 28 +++++++++++++++++++++++++++- 4 files changed, 42 insertions(+), 19 deletions(-) diff --git a/kernel/arm64/KERNEL.NEOVERSEN1 b/kernel/arm64/KERNEL.NEOVERSEN1 index 074d72153..ea010db42 100644 --- a/kernel/arm64/KERNEL.NEOVERSEN1 +++ b/kernel/arm64/KERNEL.NEOVERSEN1 @@ -91,10 +91,10 @@ IDAMAXKERNEL = iamax_thunderx2t99.c ICAMAXKERNEL = izamax_thunderx2t99.c IZAMAXKERNEL = izamax_thunderx2t99.c -SNRM2KERNEL = nrm2.S -DNRM2KERNEL = nrm2.S -CNRM2KERNEL = znrm2.S -ZNRM2KERNEL = znrm2.S +SNRM2KERNEL = scnrm2_thunderx2t99.c +DNRM2KERNEL = dznrm2_thunderx2t99.c +CNRM2KERNEL = scnrm2_thunderx2t99.c +ZNRM2KERNEL = dznrm2_thunderx2t99.c DDOTKERNEL = dot_thunderx2t99.c SDOTKERNEL = dot_thunderx2t99.c diff --git a/kernel/arm64/KERNEL.THUNDERX2T99 b/kernel/arm64/KERNEL.THUNDERX2T99 index 8333f60e6..a20d0d4a6 100644 --- a/kernel/arm64/KERNEL.THUNDERX2T99 +++ b/kernel/arm64/KERNEL.THUNDERX2T99 @@ -153,12 +153,12 @@ IDAMAXKERNEL = iamax_thunderx2t99.c ICAMAXKERNEL = izamax_thunderx2t99.c IZAMAXKERNEL = izamax_thunderx2t99.c -SNRM2KERNEL = nrm2.S -CNRM2KERNEL = nrm2.S +SNRM2KERNEL = scnrm2_thunderx2t99.c +CNRM2KERNEL = scnrm2_thunderx2t99.c #DNRM2KERNEL = dznrm2_thunderx2t99_fast.c #ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c -DNRM2KERNEL = znrm2.S -ZNRM2KERNEL = znrm2.S +DNRM2KERNEL = dznrm2_thunderx2t99.c +ZNRM2KERNEL = dznrm2_thunderx2t99.c DDOTKERNEL = dot_thunderx2t99.c diff --git a/kernel/arm64/KERNEL.THUNDERX3T110 b/kernel/arm64/KERNEL.THUNDERX3T110 index 4cdd8769f..a20d0d4a6 100644 --- a/kernel/arm64/KERNEL.THUNDERX3T110 +++ b/kernel/arm64/KERNEL.THUNDERX3T110 @@ -153,16 +153,13 @@ IDAMAXKERNEL = iamax_thunderx2t99.c ICAMAXKERNEL = izamax_thunderx2t99.c IZAMAXKERNEL = izamax_thunderx2t99.c -#SNRM2KERNEL = scnrm2_thunderx2t99.c -#CNRM2KERNEL = scnrm2_thunderx2t99.c -##DNRM2KERNEL = dznrm2_thunderx2t99_fast.c -##ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c -#DNRM2KERNEL = dznrm2_thunderx2t99.c -#ZNRM2KERNEL = dznrm2_thunderx2t99.c -SNRM2KERNEL = nrm2.S -DNRM2KERNEL = nrm2.S -CNRM2KERNEL = znrm2.S -ZNRM2KERNEL = znrm2.S +SNRM2KERNEL = scnrm2_thunderx2t99.c +CNRM2KERNEL = scnrm2_thunderx2t99.c +#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c +#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c +DNRM2KERNEL = dznrm2_thunderx2t99.c +ZNRM2KERNEL = dznrm2_thunderx2t99.c + DDOTKERNEL = dot_thunderx2t99.c SDOTKERNEL = dot_thunderx2t99.c diff --git a/kernel/arm64/dznrm2_thunderx2t99.c b/kernel/arm64/dznrm2_thunderx2t99.c index b94f0cffc..b021a2832 100644 --- a/kernel/arm64/dznrm2_thunderx2t99.c +++ b/kernel/arm64/dznrm2_thunderx2t99.c @@ -58,6 +58,7 @@ extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n #define CUR_MAXINV "d8" #define CUR_MAXINV_V "v8.2d" #define CUR_MAX_V "v8.2d" +#define REGINF "d9" static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, double *ssq, double *scale) @@ -79,8 +80,10 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, " ble 9f //nrm2_kernel_L999 \n" "1: //nrm2_kernel_F_BEGIN: \n" + " mov x6, #0x7FF0000000000000 //+Infinity \n" " fmov "REGZERO", xzr \n" " fmov "REGONE", #1.0 \n" + " fmov "REGINF", x6 \n" " lsl "INC_X", "INC_X", #"INC_SHIFT" \n" " mov "J", "N" \n" " cmp "J", xzr \n" @@ -104,6 +107,8 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, " ldr d4, ["X"] \n" " fabs d4, d4 \n" " fmax "CUR_MAX", "SCALE", d4 \n" + " fcmp "CUR_MAX", "REGINF" \n" + " beq 10f \n" " fdiv "SCALE", "SCALE", "CUR_MAX" \n" " fmul "SCALE", "SCALE", "SCALE" \n" " fmul "SSQ", "SSQ", "SCALE" \n" @@ -116,6 +121,8 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, " ldr d3, ["X", #8] \n" " fabs d3, d3 \n" " fmax "CUR_MAX", "SCALE", d3 \n" + " fcmp "CUR_MAX", "REGINF" \n" + " beq 10f \n" " fdiv "SCALE", "SCALE", "CUR_MAX" \n" " fmul "SCALE", "SCALE", "SCALE" \n" " fmul "SSQ", "SSQ", "SCALE" \n" @@ -158,6 +165,8 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, " fmaxp v24.2d, v24.2d, v26.2d \n" " fmaxp v24.2d, v24.2d, v24.2d \n" " fmax "CUR_MAX", "SCALE", d24 \n" + " fcmp "CUR_MAX", "REGINF" \n" + " beq 10f \n" " fdiv "CUR_MAXINV", "REGONE", "CUR_MAX" \n" " //dup "CUR_MAX_V", v7.d[0] \n" " fdiv "SCALE", "SCALE", "CUR_MAX" \n" @@ -217,6 +226,8 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, " fmaxp v24.2d, v24.2d, v26.2d \n" " fmaxp v24.2d, v24.2d, v24.2d \n" " fmax "CUR_MAX", "SCALE", d24 \n" + " fcmp "CUR_MAX", "REGINF" \n" + " beq 10f \n" " fdiv "CUR_MAXINV", "REGONE", "CUR_MAX" \n" " //dup "CUR_MAX_V", v7.d[0] \n" " fdiv "SCALE", "SCALE", "CUR_MAX" \n" @@ -265,6 +276,8 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, " ldr d4, ["X"] \n" " fabs d4, d4 \n" " fmax "CUR_MAX", "SCALE", d4 \n" + " fcmp "CUR_MAX", "REGINF" \n" + " beq 10f \n" " fdiv "SCALE", "SCALE", "CUR_MAX" \n" " fmul "SCALE", "SCALE", "SCALE" \n" " fmul "SSQ", "SSQ", "SCALE" \n" @@ -276,6 +289,8 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, " ldr d3, ["X", #8] \n" " fabs d3, d3 \n" " fmax "CUR_MAX", "SCALE", d3 \n" + " fcmp "CUR_MAX", "REGINF" \n" + " beq 10f \n" " fdiv "SCALE", "SCALE", "CUR_MAX" \n" " fmul "SCALE", "SCALE", "SCALE" \n" " fmul "SSQ", "SSQ", "SCALE" \n" @@ -291,6 +306,11 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, "9: //nrm2_kernel_L999: \n" " str "SSQ", [%[SSQ_]] \n" " str "SCALE", [%[SCALE_]] \n" + " b 11f \n" + "10: \n" + " str "REGINF", [%[SSQ_]] \n" + " str "REGINF", [%[SCALE_]] \n" + "11: \n" : : [SSQ_] "r" (ssq), //%0 @@ -300,7 +320,7 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, [INCX_] "r" (inc_x) //%4 : "cc", "memory", - "x0", "x1", "x2", "x3", "x4", "x5", + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8" ); @@ -359,6 +379,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) cur_ssq = *ptr; cur_scale = *(ptr + 1); + if (cur_ssq == INFINITY) { + ssq = INFINITY; + scale = INFINITY; + break; + } + if (cur_scale != 0) { if (cur_scale > scale) { scale = (scale / cur_scale); From 7aa1ff8ff6d3f151292eeb86c629e4077b867ae0 Mon Sep 17 00:00:00 2001 From: pkubaj Date: Fri, 1 Jan 2021 21:19:57 +0000 Subject: [PATCH 028/134] Fix build on FreeBSD/powerpc64le --- Makefile.system | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile.system b/Makefile.system index ce3a819a8..ca0879fe6 100644 --- a/Makefile.system +++ b/Makefile.system @@ -21,6 +21,8 @@ ifeq ($(ARCH), amd64) override ARCH=x86_64 else ifeq ($(ARCH), powerpc64) override ARCH=power +else ifeq ($(ARCH), powerpc64le) +override ARCH=power else ifeq ($(ARCH), powerpc) override ARCH=power else ifeq ($(ARCH), i386) From 601b711c78a4a652820edacc16c6791a7f120c7d Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Fri, 8 Jan 2021 08:01:36 -0600 Subject: [PATCH 029/134] Optimize swap function for POWER10 This patch makes use of new POWER10 vector pair instructions for loads and stores. --- kernel/power/cswap.c | 4 +- kernel/power/cswap_microk_power10.c | 127 ++++++++++++++++++++++++++++ kernel/power/dswap.c | 22 ++++- kernel/power/sswap.c | 22 ++++- kernel/power/swap_microk_power10.c | 105 +++++++++++++++++++++++ kernel/power/zswap.c | 4 +- 6 files changed, 280 insertions(+), 4 deletions(-) create mode 100644 kernel/power/cswap_microk_power10.c create mode 100644 kernel/power/swap_microk_power10.c diff --git a/kernel/power/cswap.c b/kernel/power/cswap.c index 5144a2e93..4d9b9ccd6 100644 --- a/kernel/power/cswap.c +++ b/kernel/power/cswap.c @@ -36,9 +36,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined(POWER8) || defined(POWER9) || defined(POWER10) #if defined(__VEC__) || defined(__ALTIVEC__) +#if defined(POWER8) || defined(POWER9) #include "cswap_microk_power8.c" +#elif defined(POWER10) +#include "cswap_microk_power10.c" #endif #endif diff --git a/kernel/power/cswap_microk_power10.c b/kernel/power/cswap_microk_power10.c new file mode 100644 index 000000000..2a44a9e30 --- /dev/null +++ b/kernel/power/cswap_microk_power10.c @@ -0,0 +1,127 @@ +/*************************************************************************** +Copyright (c) 2021, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#if defined(DOUBLE) +#define HAVE_KERNEL_16 1 +static void zswap_kernel_16 (long n, double *x, double *y) +#else +#define HAVE_KERNEL_32 1 +static void cswap_kernel_32 (long n, float *x, float *y) +#endif +{ + __asm__ + ( + ".align 5 \n" + "one%=: \n\t" + "lxvp 32, 0(%4) \n\t" + "lxvp 34, 32(%4) \n\t" + "lxvp 36, 64(%4) \n\t" + "lxvp 38, 96(%4) \n\t" + + "lxvp 40, 128(%4) \n\t" + "lxvp 42, 160(%4) \n\t" + "lxvp 44, 192(%4) \n\t" + "lxvp 46, 224(%4) \n\t" + + "lxvp 48, 0(%3) \n\t" + "lxvp 50, 32(%3) \n\t" + "lxvp 52, 64(%3) \n\t" + "lxvp 54, 96(%3) \n\t" + + "lxvp 56, 128(%3) \n\t" + "lxvp 58, 160(%3) \n\t" + "lxvp 60, 192(%3) \n\t" + "lxvp 62, 224(%3) \n\t" + + + "stxv 33, 0(%3) \n\t" + "stxv 32, 16(%3) \n\t" + "stxv 35, 32(%3) \n\t" + "stxv 34, 48(%3) \n\t" + "stxv 37, 64(%3) \n\t" + "stxv 36, 80(%3) \n\t" + "stxv 39, 96(%3) \n\t" + "stxv 38, 112(%3) \n\t" + + "addi %3, %3, 128 \n\t" + + "stxv 41, 0(%3) \n\t" + "stxv 40, 16(%3) \n\t" + "stxv 43, 32(%3) \n\t" + "stxv 42, 48(%3) \n\t" + "stxv 45, 64(%3) \n\t" + "stxv 44, 80(%3) \n\t" + "stxv 47, 96(%3) \n\t" + "stxv 46, 112(%3) \n\t" + + "addi %3, %3, 128 \n\t" + + "stxv 49, 0(%4) \n\t" + "stxv 48, 16(%4) \n\t" + "stxv 51, 32(%4) \n\t" + "stxv 50, 48(%4) \n\t" + "stxv 53, 64(%4) \n\t" + "stxv 52, 80(%4) \n\t" + "stxv 55, 96(%4) \n\t" + "stxv 54, 112(%4) \n\t" + + "addi %4, %4, 128 \n\t" + + "stxv 57, 0(%4) \n\t" + "stxv 56, 16(%4) \n\t" + "stxv 59, 32(%4) \n\t" + "stxv 58, 48(%4) \n\t" + "stxv 61, 64(%4) \n\t" + "stxv 60, 80(%4) \n\t" + "stxv 63, 96(%4) \n\t" + "stxv 62, 112(%4) \n\t" + + "addi %4, %4, 128 \n\t" + +#if defined(DOUBLE) + "addic. %2, %2, -16 \n\t" +#else + "addic. %2, %2, -32 \n\t" +#endif + "bgt one%= \n" + + "#n=%2 x=%0=%3 y=%1=%4" + : + "+m" (*x), + "+m" (*y), + "+r" (n), // 2 + "+b" (x), // 3 + "+b" (y) // 4 + : + : + "cr0", + "vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39", + "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47", + "vs48","vs49","vs50","vs51","vs52","vs53","vs54","vs55", + "vs56","vs57","vs58","vs59","vs60","vs61","vs62","vs63" + ); +} diff --git a/kernel/power/dswap.c b/kernel/power/dswap.c index ff3f95c79..9e6229c6a 100644 --- a/kernel/power/dswap.c +++ b/kernel/power/dswap.c @@ -35,9 +35,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined(POWER8) || defined(POWER9) || defined(POWER10) #if defined(__VEC__) || defined(__ALTIVEC__) +#if defined(POWER8) || defined(POWER9) #include "dswap_microk_power8.c" +#elif defined(POWER10) +#include "swap_microk_power10.c" #endif #endif @@ -115,12 +117,30 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, if ( (inc_x == 1) && (inc_y == 1 )) { +#if defined(POWER10) + if ( n >= 32 ) + { + BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3; + for (i = 0; i < align; i++) { + temp = y[i]; + y[i] = x[i]; + x[i] = temp; + } + } + BLASLONG n1 = (n-i) & -32; + if ( n1 > 0 ) + { + dswap_kernel_32(n1,&x[i], &y[i]); + i+=n1; + } +#else BLASLONG n1 = n & -32; if ( n1 > 0 ) { dswap_kernel_32(n1, x, y); i=n1; } +#endif while(i < n) { diff --git a/kernel/power/sswap.c b/kernel/power/sswap.c index 44522f0a0..dd249fd36 100644 --- a/kernel/power/sswap.c +++ b/kernel/power/sswap.c @@ -35,9 +35,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined(POWER8) || defined(POWER9) || defined(POWER10) #if defined(__VEC__) || defined(__ALTIVEC__) +#if defined(POWER8) || defined(POWER9) #include "sswap_microk_power8.c" +#elif defined(POWER10) +#include "swap_microk_power10.c" #endif #endif @@ -115,12 +117,30 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, if ( (inc_x == 1) && (inc_y == 1 )) { +#if defined(POWER10) + if ( n >= 64 ) + { + BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7; + for (i = 0; i < align; i++) { + temp = y[i]; + y[i] = x[i]; + x[i] = temp; + } + } + BLASLONG n1 = (n-i) & -64; + if ( n1 > 0 ) + { + sswap_kernel_32(n1,&x[i], &y[i]); + i+=n1; + } +#else BLASLONG n1 = n & -32; if ( n1 > 0 ) { sswap_kernel_32(n1, x, y); i=n1; } +#endif while(i < n) { diff --git a/kernel/power/swap_microk_power10.c b/kernel/power/swap_microk_power10.c new file mode 100644 index 000000000..f9c1fee52 --- /dev/null +++ b/kernel/power/swap_microk_power10.c @@ -0,0 +1,105 @@ +/*************************************************************************** +Copyright (c) 2021, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ +#define HAVE_KERNEL_32 1 + +#if defined(DOUBLE) +static void dswap_kernel_32 (long n, double *x, double *y) +#else +static void sswap_kernel_32 (long n, float *x, float *y) +#endif +{ + __asm__ + ( + ".align 5 \n" + "one%=: \n\t" + + "lxvp 32, 0(%4) \n\t" + "lxvp 34, 32(%4) \n\t" + "lxvp 36, 64(%4) \n\t" + "lxvp 38, 96(%4) \n\t" + + "lxvp 40, 128(%4) \n\t" + "lxvp 42, 160(%4) \n\t" + "lxvp 44, 192(%4) \n\t" + "lxvp 46, 224(%4) \n\t" + + "lxvp 48, 0(%3) \n\t" + "lxvp 50, 32(%3) \n\t" + "lxvp 52, 64(%3) \n\t" + "lxvp 54, 96(%3) \n\t" + + "lxvp 56, 128(%3) \n\t" + "lxvp 58, 160(%3) \n\t" + "lxvp 60, 192(%3) \n\t" + "lxvp 62, 224(%3) \n\t" + + "stxvp 32, 0(%3) \n\t" + "stxvp 34, 32(%3) \n\t" + "stxvp 36, 64(%3) \n\t" + "stxvp 38, 96(%3) \n\t" + + "stxvp 40, 128(%3) \n\t" + "stxvp 42, 160(%3) \n\t" + "stxvp 44, 192(%3) \n\t" + "stxvp 46, 224(%3) \n\t" + + "stxvp 48, 0(%4) \n\t" + "stxvp 50, 32(%4) \n\t" + "stxvp 52, 64(%4) \n\t" + "stxvp 54, 96(%4) \n\t" + + "stxvp 56, 128(%4) \n\t" + "stxvp 58, 160(%4) \n\t" + "stxvp 60, 192(%4) \n\t" + "stxvp 62, 224(%4) \n\t" + + "addi %4, %4, 256 \n\t" + "addi %3, %3, 256 \n\t" + +#if defined(DOUBLE) + "addic. %2, %2, -32 \n\t" +#else + "addic. %2, %2, -64 \n\t" +#endif + "bgt one%= \n" + + "#n=%2 x=%0=%3 y=%1=%4" + : + "+m" (*x), + "+m" (*y), + "+r" (n), // 2 + "+b" (x), // 3 + "+b" (y) // 4 + : + : + "cr0", + "vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39", + "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47", + "vs48","vs49","vs50","vs51","vs52","vs53","vs54","vs55", + "vs56","vs57","vs58","vs59","vs60","vs61","vs62","vs63" + ); +} diff --git a/kernel/power/zswap.c b/kernel/power/zswap.c index 3a5a8eb83..6cd3d9664 100644 --- a/kernel/power/zswap.c +++ b/kernel/power/zswap.c @@ -36,9 +36,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined(POWER8) || defined(POWER9) || defined(POWER10) #if defined(__VEC__) || defined(__ALTIVEC__) +#if defined(POWER8) || defined(POWER9) #include "zswap_microk_power8.c" +#elif defined(POWER10) +#include "cswap_microk_power10.c" #endif #endif From b0beb0b1ca6469286dd69cdbeeb2c79d96ac66d0 Mon Sep 17 00:00:00 2001 From: "Chen, Guobing" Date: Mon, 11 Jan 2021 02:15:21 +0800 Subject: [PATCH 030/134] Initial code for Cooperlake BF16 GEMM kernel --- .../x86_64/sbgemm_block_microk_cooperlake.c | 426 ++++++++++++ .../sbgemm_microk_cooperlake_template.c | 625 ++++++++++++++++++ 2 files changed, 1051 insertions(+) create mode 100644 kernel/x86_64/sbgemm_block_microk_cooperlake.c create mode 100644 kernel/x86_64/sbgemm_microk_cooperlake_template.c diff --git a/kernel/x86_64/sbgemm_block_microk_cooperlake.c b/kernel/x86_64/sbgemm_block_microk_cooperlake.c new file mode 100644 index 000000000..2376fed02 --- /dev/null +++ b/kernel/x86_64/sbgemm_block_microk_cooperlake.c @@ -0,0 +1,426 @@ +#include "sbgemm.h" + +#include +// Walk around those intrinsics that missed by compiler +#define MM256_LOADU_EPI16(addr) \ + _mm256_maskz_loadu_epi16(~0, (addr)) +#define MM256_STOREU_EPI16(addr, reg) \ + _mm256_mask_storeu_epi16((addr), ~0, (reg)) + +#include +void print_block(BLASLONG m, BLASLONG n, bfloat16 * mat) +{ + printf("---- BLOCK %ld x %ld ----\n", m, n); + for (BLASLONG i=0; i> (32-m)); + __mmask32 tail_mask = *((__mmask32*) &tail_mask_value); + + __m512i array512_0, array512_1, array512_2, array512_3; + + BLASLONG idx_src_base0, idx_src_base1; + BLASLONG idx_target_base0, idx_target_base1; + + BLASLONG LDA_2x = 2*lda; + BLASLONG BF16_BLOCK_T_M_2x = 2*32; + idx_src_base0 = 0; + idx_src_base1 = lda; + idx_target_base0 = 0; + idx_target_base1 = 32; + for (BLASLONG idx_k = 0; idx_k < tag_k_2x; idx_k += 2) { + array512_0 = _mm512_maskz_loadu_epi16(tail_mask, &A[idx_src_base0]); + array512_1 = _mm512_maskz_loadu_epi16(tail_mask, &A[idx_src_base1]); + array512_2 = _mm512_unpacklo_epi16(array512_0, array512_1); + array512_3 = _mm512_unpackhi_epi16(array512_0, array512_1); + _mm512_storeu_si512(&block_A[idx_target_base0], array512_2); + _mm512_storeu_si512(&block_A[idx_target_base1], array512_3); + + idx_src_base0 += LDA_2x; + idx_src_base1 += LDA_2x; + idx_target_base0 += BF16_BLOCK_T_M_2x; + idx_target_base1 += BF16_BLOCK_T_M_2x; + } + + if (tag_k_2x != k) { + __m512i ZERO512 = _mm512_setzero_si512(); + array512_0 = _mm512_maskz_loadu_epi16(tail_mask, &A[idx_src_base0]); + array512_2 = _mm512_unpacklo_epi16(array512_0, ZERO512); + array512_3 = _mm512_unpackhi_epi16(array512_0, ZERO512); + _mm512_storeu_si512(&block_A[idx_target_base0], array512_2); + _mm512_storeu_si512(&block_A[idx_target_base1], array512_3); + } + +#ifdef DEBUG_PROFILE + print_block(BF16_BLOCK_THRES_K, BF16_BLOCK_THRES_M, block_A); +#endif +} + +void COL_MAJOR_INCOPY_KERNEL_Kx16(BLASLONG k, BLASLONG m, bfloat16 * A, BLASLONG lda, bfloat16 * block_A) +{ + BLASLONG tag_k_2x = k & (~1); + + __m256i array256_0, array256_1, array256_2, array256_3; + + BLASLONG idx_src_base0, idx_src_base1; + BLASLONG idx_target_base0; + + BLASLONG LDA_2x = 2*lda; + idx_src_base0 = 0; + idx_src_base1 = lda; + idx_target_base0 = 0; + for (BLASLONG idx_k = 0; idx_k < tag_k_2x; idx_k += 2) { + array256_0 = MM256_LOADU_EPI16(&A[idx_src_base0]); + array256_1 = MM256_LOADU_EPI16(&A[idx_src_base1]); + array256_2 = _mm256_unpacklo_epi16(array256_0, array256_1); + array256_3 = _mm256_unpackhi_epi16(array256_0, array256_1); + // Store in one row of block_B + MM256_STOREU_EPI16(&block_A[idx_target_base0], array256_2); + MM256_STOREU_EPI16(&block_A[idx_target_base0 + 16], array256_3); + + idx_src_base0 += LDA_2x; + idx_src_base1 += LDA_2x; + idx_target_base0 += 32; + } + + if (tag_k_2x != k) { + __m256i ZERO256 = _mm256_setzero_si256(); + array256_0 = MM256_LOADU_EPI16(&A[idx_src_base0]); + array256_2 = _mm256_unpacklo_epi16(array256_0, ZERO256); + array256_3 = _mm256_unpackhi_epi16(array256_0, ZERO256); + // Store in one row of block_B + MM256_STOREU_EPI16(&block_A[idx_target_base0], array256_2); + MM256_STOREU_EPI16(&block_A[idx_target_base0 + 16], array256_3); + } + +#ifdef DEBUG_PROFILE + print_block(BF16_BLOCK_THRES_K, BF16_BLOCK_THRES_M, block_A); +#endif +} + +void COL_MAJOR_INCOPY_KERNEL_Kx16m(BLASLONG k, BLASLONG m, bfloat16 * A, BLASLONG lda, bfloat16 * block_A) +{ + BLASLONG tag_k_2x = k & (~1); + unsigned short tail_mask_value = (((unsigned short)0xffff) >> (16-m)); + __mmask16 tail_mask = *((__mmask16*) &tail_mask_value); + + __m256i array256_0, array256_1, array256_2, array256_3; + + BLASLONG idx_src_base0, idx_src_base1; + BLASLONG idx_target_base0; + + BLASLONG LDA_2x = 2*lda; + idx_src_base0 = 0; + idx_src_base1 = lda; + idx_target_base0 = 0; + for (BLASLONG idx_k = 0; idx_k < tag_k_2x; idx_k += 2) { + array256_0 = _mm256_maskz_loadu_epi16(tail_mask, &A[idx_src_base0]); + array256_1 = _mm256_maskz_loadu_epi16(tail_mask, &A[idx_src_base1]); + array256_2 = _mm256_unpacklo_epi16(array256_0, array256_1); + array256_3 = _mm256_unpackhi_epi16(array256_0, array256_1); + // Store in one row of block_B + MM256_STOREU_EPI16(&block_A[idx_target_base0], array256_2); + MM256_STOREU_EPI16(&block_A[idx_target_base0 + 16], array256_3); + + idx_src_base0 += LDA_2x; + idx_src_base1 += LDA_2x; + idx_target_base0 += 32; + } + + if (tag_k_2x != k) { + __m256i ZERO256 = _mm256_setzero_si256(); + array256_0 = _mm256_maskz_loadu_epi16(tail_mask, &A[idx_src_base0]); + array256_2 = _mm256_unpacklo_epi16(array256_0, ZERO256); + array256_3 = _mm256_unpackhi_epi16(array256_0, ZERO256); + // Store in one row of block_B + MM256_STOREU_EPI16(&block_A[idx_target_base0], array256_2); + MM256_STOREU_EPI16(&block_A[idx_target_base0 + 16], array256_3); + } + +#ifdef DEBUG_PROFILE + print_block(BF16_BLOCK_THRES_K, BF16_BLOCK_THRES_M, block_A); +#endif +} + +void COL_MAJOR_ONCOPY_KERNEL_8x32(BLASLONG k, bfloat16 * B, BLASLONG ldb, bfloat16 * block_B) +{ + BLASLONG tag_k_32x = k & (~31); + BLASLONG idx_src_base0, idx_src_base1, idx_src_base2, idx_src_base3, idx_src_base4, idx_src_base5, idx_src_base6, idx_src_base7; + BLASLONG idx_target_base0; + + idx_src_base0 = 0; + idx_src_base1 = 1*ldb; + idx_src_base2 = 2*ldb; + idx_src_base3 = 3*ldb; + idx_src_base4 = 4*ldb; + idx_src_base5 = 5*ldb; + idx_src_base6 = 6*ldb; + idx_src_base7 = 7*ldb; + idx_target_base0 = 0; + + for (BLASLONG idx_k = 0; idx_k < tag_k_32x; idx_k += 32) { + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*0], _mm512_loadu_si512(&B[idx_src_base0+idx_k])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*1], _mm512_loadu_si512(&B[idx_src_base1+idx_k])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*2], _mm512_loadu_si512(&B[idx_src_base2+idx_k])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*3], _mm512_loadu_si512(&B[idx_src_base3+idx_k])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*4], _mm512_loadu_si512(&B[idx_src_base4+idx_k])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*5], _mm512_loadu_si512(&B[idx_src_base5+idx_k])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*6], _mm512_loadu_si512(&B[idx_src_base6+idx_k])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*7], _mm512_loadu_si512(&B[idx_src_base7+idx_k])); + idx_target_base0 += 32*8; + } + + if (tag_k_32x != k) { + unsigned int tail_mask_value = (((unsigned int)0xffffffff) >> (32-(k-tag_k_32x))); + __mmask32 tail_mask = *((__mmask32*) &tail_mask_value); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*0], _mm512_maskz_loadu_epi16(tail_mask, &B[idx_src_base0+tag_k_32x])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*1], _mm512_maskz_loadu_epi16(tail_mask, &B[idx_src_base1+tag_k_32x])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*2], _mm512_maskz_loadu_epi16(tail_mask, &B[idx_src_base2+tag_k_32x])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*3], _mm512_maskz_loadu_epi16(tail_mask, &B[idx_src_base3+tag_k_32x])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*4], _mm512_maskz_loadu_epi16(tail_mask, &B[idx_src_base4+tag_k_32x])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*5], _mm512_maskz_loadu_epi16(tail_mask, &B[idx_src_base5+tag_k_32x])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*6], _mm512_maskz_loadu_epi16(tail_mask, &B[idx_src_base6+tag_k_32x])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*7], _mm512_maskz_loadu_epi16(tail_mask, &B[idx_src_base7+tag_k_32x])); + } + +#ifdef DEBUG_PROFILE + print_block(BF16_BLOCK_THRES_N, BF16_BLOCK_THRES_K, block_B); +#endif +} + +void COL_MAJOR_ONCOPY_KERNEL_Nx32(BLASLONG n, BLASLONG k, bfloat16 * B, BLASLONG ldb, bfloat16 * block_B) +{ + BLASLONG tag_k_32x = k & (~31); + BLASLONG tag_n_2x = n & (~1); + BLASLONG idx_src_base0; + BLASLONG idx_target_base0; + + BLASLONG LDB_2x = 2*ldb; + + idx_target_base0 = 0; + + for (BLASLONG idx_k = 0; idx_k < tag_k_32x; idx_k += 32) { + idx_src_base0 = 0; + for (BLASLONG idx_n = 0; idx_n < tag_n_2x; idx_n += 2) { + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*0], _mm512_loadu_si512(&B[idx_src_base0 + idx_k])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*1], _mm512_loadu_si512(&B[idx_src_base0 + ldb + idx_k])); + idx_src_base0 += LDB_2x; + idx_target_base0 += 64; + } + + if (tag_n_2x != n) { + _mm512_storeu_si512(&block_B[idx_target_base0], _mm512_loadu_si512(&B[idx_src_base0 + idx_k])); + idx_target_base0 += 32; + } + } + + if (tag_k_32x != k) { + unsigned int tail_mask_value = (((unsigned int)0xffffffff) >> (32-(k-tag_k_32x))); + __mmask32 tail_mask = *((__mmask32*) &tail_mask_value); + idx_src_base0 = 0; + for (BLASLONG idx_n = 0; idx_n < tag_n_2x; idx_n += 2) { + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*0], _mm512_maskz_loadu_epi16(tail_mask, &B[idx_src_base0 + tag_k_32x])); + _mm512_storeu_si512(&block_B[idx_target_base0+ 32*1], _mm512_maskz_loadu_epi16(tail_mask, &B[idx_src_base0 + ldb + tag_k_32x])); + idx_src_base0 += LDB_2x; + idx_target_base0 += 64; + } + + if (tag_n_2x != n) { + _mm512_storeu_si512(&block_B[idx_target_base0], _mm512_maskz_loadu_epi16(tail_mask, &B[idx_src_base0 + tag_k_32x])); + } + } + +#ifdef DEBUG_PROFILE + print_block(BF16_BLOCK_THRES_N, BF16_BLOCK_THRES_K, block_B); +#endif +} + +// Scale matrix C while beta is not ZERO or ONE +void sbgemm_scal_operation(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc) +{ + BLASLONG tag_n_Nx = N & (~3); + BLASLONG tag_n_Mx = M & (~15); + + BLASLONG LDC4x = ldc*4; + BLASLONG idx_base_0 = 0; + BLASLONG idx_base_1 = ldc; + BLASLONG idx_base_2 = ldc*2; + BLASLONG idx_base_3 = ldc*3; + + unsigned short tail_mask_value = (((unsigned short)0xffff) >> (16-M+tag_n_Mx)); + __mmask16 tail_mask = *((__mmask16*) &tail_mask_value); + + __m512 array_512_0, array_512_1, array_512_2, array_512_3; + + __m512 BETAVECTOR = _mm512_set1_ps(beta); + + if (Order == CblasColMajor) { + for (BLASLONG idx_n = 0; idx_n < tag_n_Nx; idx_n += 4) { + for (BLASLONG idx_m = 0; idx_m < tag_n_Mx; idx_m += 16) { + array_512_0 = _mm512_loadu_ps(&C[idx_base_0+idx_m]); + array_512_1 = _mm512_loadu_ps(&C[idx_base_1+idx_m]); + array_512_2 = _mm512_loadu_ps(&C[idx_base_2+idx_m]); + array_512_3 = _mm512_loadu_ps(&C[idx_base_3+idx_m]); + + array_512_0 = _mm512_mul_ps(BETAVECTOR, array_512_0); + array_512_1 = _mm512_mul_ps(BETAVECTOR, array_512_1); + array_512_2 = _mm512_mul_ps(BETAVECTOR, array_512_2); + array_512_3 = _mm512_mul_ps(BETAVECTOR, array_512_3); + + _mm512_storeu_ps(&C[idx_base_0+idx_m], array_512_0); + _mm512_storeu_ps(&C[idx_base_1+idx_m], array_512_1); + _mm512_storeu_ps(&C[idx_base_2+idx_m], array_512_2); + _mm512_storeu_ps(&C[idx_base_3+idx_m], array_512_3); + } + + if (tag_n_Mx != M) { + array_512_0 = _mm512_maskz_loadu_ps(tail_mask, &C[idx_base_0+tag_n_Mx]); + array_512_1 = _mm512_maskz_loadu_ps(tail_mask, &C[idx_base_1+tag_n_Mx]); + array_512_2 = _mm512_maskz_loadu_ps(tail_mask, &C[idx_base_2+tag_n_Mx]); + array_512_3 = _mm512_maskz_loadu_ps(tail_mask, &C[idx_base_3+tag_n_Mx]); + + array_512_0 = _mm512_mul_ps(BETAVECTOR, array_512_0); + array_512_1 = _mm512_mul_ps(BETAVECTOR, array_512_1); + array_512_2 = _mm512_mul_ps(BETAVECTOR, array_512_2); + array_512_3 = _mm512_mul_ps(BETAVECTOR, array_512_3); + + _mm512_mask_storeu_ps(&C[idx_base_0+tag_n_Mx], tail_mask, array_512_0); + _mm512_mask_storeu_ps(&C[idx_base_1+tag_n_Mx], tail_mask, array_512_1); + _mm512_mask_storeu_ps(&C[idx_base_2+tag_n_Mx], tail_mask, array_512_2); + _mm512_mask_storeu_ps(&C[idx_base_3+tag_n_Mx], tail_mask, array_512_3); + } + + idx_base_0 += LDC4x; + idx_base_1 += LDC4x; + idx_base_2 += LDC4x; + idx_base_3 += LDC4x; + } + + if (tag_n_Nx != N) { + for (BLASLONG idx_n = tag_n_Nx; idx_n < N; idx_n++) { + for (BLASLONG idx_m = 0; idx_m < tag_n_Mx; idx_m += 16) { + array_512_0 = _mm512_loadu_ps(&C[idx_base_0+idx_m]); + array_512_0 = _mm512_mul_ps(BETAVECTOR, array_512_0); + _mm512_storeu_ps(&C[idx_base_0+idx_m], array_512_0); + } + + if (tag_n_Mx != M) { + array_512_0 = _mm512_maskz_loadu_ps(tail_mask, &C[idx_base_0+tag_n_Mx]); + array_512_0 = _mm512_mul_ps(BETAVECTOR, array_512_0); + _mm512_mask_storeu_ps(&C[idx_base_0+tag_n_Mx], tail_mask, array_512_0); + } + idx_base_0 += ldc; + } + } + } else { + + } +} + +// Scale matrix C while beta is not ZERO or ONE +void sbgemm_zero_operation(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, float *C, OPENBLAS_CONST blasint ldc) +{ + BLASLONG tag_n_Nx = N & (~3); + BLASLONG tag_n_Mx = M & (~15); + + BLASLONG LDC4x = ldc*4; + BLASLONG idx_base_0 = 0; + BLASLONG idx_base_1 = ldc; + BLASLONG idx_base_2 = ldc*2; + BLASLONG idx_base_3 = ldc*3; + + unsigned short tail_mask_value = (((unsigned short)0xffff) >> (16-M+tag_n_Mx)); + __mmask16 tail_mask = *((__mmask16*) &tail_mask_value); + + __m512 ZEROVECTOR = _mm512_setzero_ps(); + + if (Order == CblasColMajor) { + for (BLASLONG idx_n = 0; idx_n < tag_n_Nx; idx_n += 4) { + for (BLASLONG idx_m = 0; idx_m < tag_n_Mx; idx_m += 16) { + _mm512_storeu_ps(&C[idx_base_0+idx_m], ZEROVECTOR); + _mm512_storeu_ps(&C[idx_base_1+idx_m], ZEROVECTOR); + _mm512_storeu_ps(&C[idx_base_2+idx_m], ZEROVECTOR); + _mm512_storeu_ps(&C[idx_base_3+idx_m], ZEROVECTOR); + } + + if (tag_n_Mx != M) { + _mm512_mask_storeu_ps(&C[idx_base_0+tag_n_Mx], tail_mask, ZEROVECTOR); + _mm512_mask_storeu_ps(&C[idx_base_1+tag_n_Mx], tail_mask, ZEROVECTOR); + _mm512_mask_storeu_ps(&C[idx_base_2+tag_n_Mx], tail_mask, ZEROVECTOR); + _mm512_mask_storeu_ps(&C[idx_base_3+tag_n_Mx], tail_mask, ZEROVECTOR); + } + + idx_base_0 += LDC4x; + idx_base_1 += LDC4x; + idx_base_2 += LDC4x; + idx_base_3 += LDC4x; + } + + if (tag_n_Nx != N) { + for (BLASLONG idx_n = tag_n_Nx; idx_n < N; idx_n++) { + for (BLASLONG idx_m = 0; idx_m < tag_n_Mx; idx_m += 16) { + _mm512_storeu_ps(&C[idx_base_0+idx_m], ZEROVECTOR); + } + + if (tag_n_Mx != M) { + _mm512_mask_storeu_ps(&C[idx_base_0+tag_n_Mx], tail_mask, ZEROVECTOR); + } + idx_base_0 += ldc; + } + } + } else { + + } +} \ No newline at end of file diff --git a/kernel/x86_64/sbgemm_microk_cooperlake_template.c b/kernel/x86_64/sbgemm_microk_cooperlake_template.c new file mode 100644 index 000000000..dd4cb440b --- /dev/null +++ b/kernel/x86_64/sbgemm_microk_cooperlake_template.c @@ -0,0 +1,625 @@ +#include "sbgemm.h" +#include "bf16_common_macros.h" +#include + +#undef STORE16_COMPLETE_RESULT +#undef STORE16_MASK_COMPLETE_RESULT +#undef SBGEMM_BLOCK_KERNEL_32x8x32 +#undef SBGEMM_BLOCK_KERNEL_16x8x32 +#undef SBGEMM_BLOCK_KERNEL_32xNx32 +#undef SBGEMM_BLOCK_KERNEL_16xNx32 +#undef SBGEMM_BLOCKING_KERNEL_2 + +#ifndef ONE_ALPHA // ALPHA is not ONE + #define STORE16_COMPLETE_RESULT STORE16_COMPLETE_RESULT_ALPHA_ONE + #define STORE16_MASK_COMPLETE_RESULT STORE16_MASK_COMPLETE_RESULT_ALPHA_ONE + #define SBGEMM_BLOCK_KERNEL_32x8x32 sbgemm_block_kernel_32x8x32_alpha + #define SBGEMM_BLOCK_KERNEL_16x8x32 sbgemm_block_kernel_16x8x32_alpha + #define SBGEMM_BLOCK_KERNEL_32xNx32 sbgemm_block_kernel_32xNx32_alpha + #define SBGEMM_BLOCK_KERNEL_16xNx32 sbgemm_block_kernel_16xNx32_alpha + #define SBGEMM_BLOCKING_KERNEL_2 sbgemm_blocking_kernel_2_alpha +#else // ALPHA is ONE + #define STORE16_COMPLETE_RESULT STORE16_COMPLETE_RESULT_ONE_ONE + #define STORE16_MASK_COMPLETE_RESULT STORE16_MASK_COMPLETE_RESULT_ONE_ONE + #define SBGEMM_BLOCK_KERNEL_32x8x32 sbgemm_block_kernel_32x8x32_one + #define SBGEMM_BLOCK_KERNEL_16x8x32 sbgemm_block_kernel_16x8x32_one + #define SBGEMM_BLOCK_KERNEL_32xNx32 sbgemm_block_kernel_32xNx32_one + #define SBGEMM_BLOCK_KERNEL_16xNx32 sbgemm_block_kernel_16xNx32_one + #define SBGEMM_BLOCKING_KERNEL_2 sbgemm_blocking_kernel_2_one +#endif + + +// SBGEMM Kernel for 16> (32-m)); + __mmask16 tail_mask = *((__mmask16*) &tail_mask_value); + result_512_tmp_0 = _mm512_permutex2var_ps(result_512_0, shuffle_idx_base0, result_512_8); + result_512_tmp_1 = _mm512_permutex2var_ps(result_512_0, shuffle_idx_base1, result_512_8); + result_512_tmp_2 = _mm512_permutex2var_ps(result_512_1, shuffle_idx_base0, result_512_9); + result_512_tmp_3 = _mm512_permutex2var_ps(result_512_1, shuffle_idx_base1, result_512_9); + STORE16_COMPLETE_RESULT(result_512_tmp_0, (&C[ldc*0])) + STORE16_MASK_COMPLETE_RESULT(result_512_tmp_1, (&C[ldc*0+16]), tail_mask) + STORE16_COMPLETE_RESULT(result_512_tmp_2, (&C[ldc*1])) + STORE16_MASK_COMPLETE_RESULT(result_512_tmp_3, (&C[ldc*1+16]), tail_mask) + result_512_tmp_0 = _mm512_permutex2var_ps(result_512_2, shuffle_idx_base0, result_512_10); + result_512_tmp_1 = _mm512_permutex2var_ps(result_512_2, shuffle_idx_base1, result_512_10); + result_512_tmp_2 = _mm512_permutex2var_ps(result_512_3, shuffle_idx_base0, result_512_11); + result_512_tmp_3 = _mm512_permutex2var_ps(result_512_3, shuffle_idx_base1, result_512_11); + STORE16_COMPLETE_RESULT(result_512_tmp_0, (&C[ldc*2])) + STORE16_MASK_COMPLETE_RESULT(result_512_tmp_1, (&C[ldc*2+16]), tail_mask) + STORE16_COMPLETE_RESULT(result_512_tmp_2, (&C[ldc*3])) + STORE16_MASK_COMPLETE_RESULT(result_512_tmp_3, (&C[ldc*3+16]), tail_mask) + result_512_tmp_0 = _mm512_permutex2var_ps(result_512_4, shuffle_idx_base0, result_512_12); + result_512_tmp_1 = _mm512_permutex2var_ps(result_512_4, shuffle_idx_base1, result_512_12); + result_512_tmp_2 = _mm512_permutex2var_ps(result_512_5, shuffle_idx_base0, result_512_13); + result_512_tmp_3 = _mm512_permutex2var_ps(result_512_5, shuffle_idx_base1, result_512_13); + STORE16_COMPLETE_RESULT(result_512_tmp_0, (&C[ldc*4])) + STORE16_MASK_COMPLETE_RESULT(result_512_tmp_1, (&C[ldc*4+16]), tail_mask) + STORE16_COMPLETE_RESULT(result_512_tmp_2, (&C[ldc*5])) + STORE16_MASK_COMPLETE_RESULT(result_512_tmp_3, (&C[ldc*5+16]), tail_mask) + result_512_tmp_0 = _mm512_permutex2var_ps(result_512_6, shuffle_idx_base0, result_512_14); + result_512_tmp_1 = _mm512_permutex2var_ps(result_512_6, shuffle_idx_base1, result_512_14); + result_512_tmp_2 = _mm512_permutex2var_ps(result_512_7, shuffle_idx_base0, result_512_15); + result_512_tmp_3 = _mm512_permutex2var_ps(result_512_7, shuffle_idx_base1, result_512_15); + STORE16_COMPLETE_RESULT(result_512_tmp_0, (&C[ldc*6])) + STORE16_MASK_COMPLETE_RESULT(result_512_tmp_1, (&C[ldc*6+16]), tail_mask) + STORE16_COMPLETE_RESULT(result_512_tmp_2, (&C[ldc*7])) + STORE16_MASK_COMPLETE_RESULT(result_512_tmp_3, (&C[ldc*7+16]), tail_mask) + } else { + result_512_tmp_0 = _mm512_permutex2var_ps(result_512_0, shuffle_idx_base0, result_512_8); + result_512_tmp_1 = _mm512_permutex2var_ps(result_512_0, shuffle_idx_base1, result_512_8); + result_512_tmp_2 = _mm512_permutex2var_ps(result_512_1, shuffle_idx_base0, result_512_9); + result_512_tmp_3 = _mm512_permutex2var_ps(result_512_1, shuffle_idx_base1, result_512_9); + STORE16_COMPLETE_RESULT(result_512_tmp_0, (&C[ldc*0])) + STORE16_COMPLETE_RESULT(result_512_tmp_1, (&C[ldc*0+16])) + STORE16_COMPLETE_RESULT(result_512_tmp_2, (&C[ldc*1])) + STORE16_COMPLETE_RESULT(result_512_tmp_3, (&C[ldc*1+16])) + result_512_tmp_0 = _mm512_permutex2var_ps(result_512_2, shuffle_idx_base0, result_512_10); + result_512_tmp_1 = _mm512_permutex2var_ps(result_512_2, shuffle_idx_base1, result_512_10); + result_512_tmp_2 = _mm512_permutex2var_ps(result_512_3, shuffle_idx_base0, result_512_11); + result_512_tmp_3 = _mm512_permutex2var_ps(result_512_3, shuffle_idx_base1, result_512_11); + STORE16_COMPLETE_RESULT(result_512_tmp_0, (&C[ldc*2])) + STORE16_COMPLETE_RESULT(result_512_tmp_1, (&C[ldc*2+16])) + STORE16_COMPLETE_RESULT(result_512_tmp_2, (&C[ldc*3])) + STORE16_COMPLETE_RESULT(result_512_tmp_3, (&C[ldc*3+16])) + result_512_tmp_0 = _mm512_permutex2var_ps(result_512_4, shuffle_idx_base0, result_512_12); + result_512_tmp_1 = _mm512_permutex2var_ps(result_512_4, shuffle_idx_base1, result_512_12); + result_512_tmp_2 = _mm512_permutex2var_ps(result_512_5, shuffle_idx_base0, result_512_13); + result_512_tmp_3 = _mm512_permutex2var_ps(result_512_5, shuffle_idx_base1, result_512_13); + STORE16_COMPLETE_RESULT(result_512_tmp_0, (&C[ldc*4])) + STORE16_COMPLETE_RESULT(result_512_tmp_1, (&C[ldc*4+16])) + STORE16_COMPLETE_RESULT(result_512_tmp_2, (&C[ldc*5])) + STORE16_COMPLETE_RESULT(result_512_tmp_3, (&C[ldc*5+16])) + result_512_tmp_0 = _mm512_permutex2var_ps(result_512_6, shuffle_idx_base0, result_512_14); + result_512_tmp_1 = _mm512_permutex2var_ps(result_512_6, shuffle_idx_base1, result_512_14); + result_512_tmp_2 = _mm512_permutex2var_ps(result_512_7, shuffle_idx_base0, result_512_15); + result_512_tmp_3 = _mm512_permutex2var_ps(result_512_7, shuffle_idx_base1, result_512_15); + STORE16_COMPLETE_RESULT(result_512_tmp_0, (&C[ldc*6])) + STORE16_COMPLETE_RESULT(result_512_tmp_1, (&C[ldc*6+16])) + STORE16_COMPLETE_RESULT(result_512_tmp_2, (&C[ldc*7])) + STORE16_COMPLETE_RESULT(result_512_tmp_3, (&C[ldc*7+16])) + } +} + +// SBGEMM Kernel for M<=16, N=8, K can be any number, but the processing will take 32 as a base +#ifndef ONE_ALPHA // ALPHA is not ONE +void sbgemm_block_kernel_16x8x32_alpha(BLASLONG m, BLASLONG k, float alpha, bfloat16 *A, bfloat16 *B, float *C, int ldc) +#else // ALPHA is ONE +void sbgemm_block_kernel_16x8x32_one(BLASLONG m, BLASLONG k, float alpha, bfloat16 *A, bfloat16 *B, float *C, int ldc) +#endif +{ + int SHUFFLE_MAGIC_NO = 0x39; + BLASLONG tag_k_32x = k & (~31); + BLASLONG idxB_base = 0; + BLASLONG width = 32; + +#ifndef ONE_ALPHA + __m512 ALPHAVECTOR = _mm512_set1_ps(alpha); +#endif + + __m512i arrayA_512_0; + __m512i arrayB_512_0, arrayB_512_1, arrayB_512_2, arrayB_512_3, arrayB_512_4, arrayB_512_5, arrayB_512_6, arrayB_512_7; + __m512 result_512_0, result_512_1, result_512_2, result_512_3, result_512_4, result_512_5, result_512_6, result_512_7; + + result_512_0 = _mm512_setzero_ps(); + result_512_1 = _mm512_setzero_ps(); + result_512_2 = _mm512_setzero_ps(); + result_512_3 = _mm512_setzero_ps(); + result_512_4 = _mm512_setzero_ps(); + result_512_5 = _mm512_setzero_ps(); + result_512_6 = _mm512_setzero_ps(); + result_512_7 = _mm512_setzero_ps(); + + for (BLASLONG idx_k = 0; idx_k < k; idx_k += 32) { + // Load B with unroll 8 + idxB_base = idx_k << 3; + arrayB_512_0 = _mm512_loadu_si512(&B[idxB_base + 32*0]); + arrayB_512_1 = _mm512_loadu_si512(&B[idxB_base + 32*1]); + arrayB_512_2 = _mm512_loadu_si512(&B[idxB_base + 32*2]); + arrayB_512_3 = _mm512_loadu_si512(&B[idxB_base + 32*3]); + arrayB_512_4 = _mm512_loadu_si512(&B[idxB_base + 32*4]); + arrayB_512_5 = _mm512_loadu_si512(&B[idxB_base + 32*5]); + arrayB_512_6 = _mm512_loadu_si512(&B[idxB_base + 32*6]); + arrayB_512_7 = _mm512_loadu_si512(&B[idxB_base + 32*7]); + + if (idx_k == tag_k_32x) {width = k - tag_k_32x;} + + for (BLASLONG idx = 0; idx < width;) { + // Each two rows are a group for 32-pair bf16 elements + // Load two rows into a 512 register + arrayA_512_0 = _mm512_loadu_si512(&A[idx<<4]); + + result_512_0 = _mm512_dpbf16_ps(result_512_0, (__m512bh) arrayA_512_0, (__m512bh) _mm512_broadcastd_epi32(_mm512_castsi512_si128(arrayB_512_0))); + result_512_1 = _mm512_dpbf16_ps(result_512_1, (__m512bh) arrayA_512_0, (__m512bh) _mm512_broadcastd_epi32(_mm512_castsi512_si128(arrayB_512_1))); + result_512_2 = _mm512_dpbf16_ps(result_512_2, (__m512bh) arrayA_512_0, (__m512bh) _mm512_broadcastd_epi32(_mm512_castsi512_si128(arrayB_512_2))); + result_512_3 = _mm512_dpbf16_ps(result_512_3, (__m512bh) arrayA_512_0, (__m512bh) _mm512_broadcastd_epi32(_mm512_castsi512_si128(arrayB_512_3))); + result_512_4 = _mm512_dpbf16_ps(result_512_4, (__m512bh) arrayA_512_0, (__m512bh) _mm512_broadcastd_epi32(_mm512_castsi512_si128(arrayB_512_4))); + result_512_5 = _mm512_dpbf16_ps(result_512_5, (__m512bh) arrayA_512_0, (__m512bh) _mm512_broadcastd_epi32(_mm512_castsi512_si128(arrayB_512_5))); + result_512_6 = _mm512_dpbf16_ps(result_512_6, (__m512bh) arrayA_512_0, (__m512bh) _mm512_broadcastd_epi32(_mm512_castsi512_si128(arrayB_512_6))); + result_512_7 = _mm512_dpbf16_ps(result_512_7, (__m512bh) arrayA_512_0, (__m512bh) _mm512_broadcastd_epi32(_mm512_castsi512_si128(arrayB_512_7))); + + arrayB_512_0 = _mm512_shuffle_epi32(arrayB_512_0, SHUFFLE_MAGIC_NO); + arrayB_512_1 = _mm512_shuffle_epi32(arrayB_512_1, SHUFFLE_MAGIC_NO); + arrayB_512_2 = _mm512_shuffle_epi32(arrayB_512_2, SHUFFLE_MAGIC_NO); + arrayB_512_3 = _mm512_shuffle_epi32(arrayB_512_3, SHUFFLE_MAGIC_NO); + arrayB_512_4 = _mm512_shuffle_epi32(arrayB_512_4, SHUFFLE_MAGIC_NO); + arrayB_512_5 = _mm512_shuffle_epi32(arrayB_512_5, SHUFFLE_MAGIC_NO); + arrayB_512_6 = _mm512_shuffle_epi32(arrayB_512_6, SHUFFLE_MAGIC_NO); + arrayB_512_7 = _mm512_shuffle_epi32(arrayB_512_7, SHUFFLE_MAGIC_NO); + + idx += 2; + // Every 4 loops we need to switch to next 128 bits of arrayB registers + if ((idx & (~7)) == idx) { + arrayB_512_0 = _mm512_shuffle_i32x4(arrayB_512_0, arrayB_512_0, SHUFFLE_MAGIC_NO); + arrayB_512_1 = _mm512_shuffle_i32x4(arrayB_512_1, arrayB_512_1, SHUFFLE_MAGIC_NO); + arrayB_512_2 = _mm512_shuffle_i32x4(arrayB_512_2, arrayB_512_2, SHUFFLE_MAGIC_NO); + arrayB_512_3 = _mm512_shuffle_i32x4(arrayB_512_3, arrayB_512_3, SHUFFLE_MAGIC_NO); + arrayB_512_4 = _mm512_shuffle_i32x4(arrayB_512_4, arrayB_512_4, SHUFFLE_MAGIC_NO); + arrayB_512_5 = _mm512_shuffle_i32x4(arrayB_512_5, arrayB_512_5, SHUFFLE_MAGIC_NO); + arrayB_512_6 = _mm512_shuffle_i32x4(arrayB_512_6, arrayB_512_6, SHUFFLE_MAGIC_NO); + arrayB_512_7 = _mm512_shuffle_i32x4(arrayB_512_7, arrayB_512_7, SHUFFLE_MAGIC_NO); + } + } + } + + if (m != 16) { + unsigned short tail_mask_value = (((unsigned short)0xffff) >> (16-m)); + __mmask16 tail_mask = *((__mmask16*) &tail_mask_value); + + result_512_0 = _mm512_shuffle_f32x4(result_512_0, result_512_0, 0xd8); + result_512_1 = _mm512_shuffle_f32x4(result_512_1, result_512_1, 0xd8); + result_512_2 = _mm512_shuffle_f32x4(result_512_2, result_512_2, 0xd8); + result_512_3 = _mm512_shuffle_f32x4(result_512_3, result_512_3, 0xd8); + STORE16_MASK_COMPLETE_RESULT(result_512_0, (&C[ldc*0]), tail_mask) + STORE16_MASK_COMPLETE_RESULT(result_512_1, (&C[ldc*1]), tail_mask) + STORE16_MASK_COMPLETE_RESULT(result_512_2, (&C[ldc*2]), tail_mask) + STORE16_MASK_COMPLETE_RESULT(result_512_3, (&C[ldc*3]), tail_mask) + result_512_4 = _mm512_shuffle_f32x4(result_512_4, result_512_4, 0xd8); + result_512_5 = _mm512_shuffle_f32x4(result_512_5, result_512_5, 0xd8); + result_512_6 = _mm512_shuffle_f32x4(result_512_6, result_512_6, 0xd8); + result_512_7 = _mm512_shuffle_f32x4(result_512_7, result_512_7, 0xd8); + STORE16_MASK_COMPLETE_RESULT(result_512_4, (&C[ldc*4]), tail_mask) + STORE16_MASK_COMPLETE_RESULT(result_512_5, (&C[ldc*5]), tail_mask) + STORE16_MASK_COMPLETE_RESULT(result_512_6, (&C[ldc*6]), tail_mask) + STORE16_MASK_COMPLETE_RESULT(result_512_7, (&C[ldc*7]), tail_mask) + } else { + result_512_0 = _mm512_shuffle_f32x4(result_512_0, result_512_0, 0xd8); + result_512_1 = _mm512_shuffle_f32x4(result_512_1, result_512_1, 0xd8); + result_512_2 = _mm512_shuffle_f32x4(result_512_2, result_512_2, 0xd8); + result_512_3 = _mm512_shuffle_f32x4(result_512_3, result_512_3, 0xd8); + STORE16_COMPLETE_RESULT(result_512_0, (&C[ldc*0])) + STORE16_COMPLETE_RESULT(result_512_1, (&C[ldc*1])) + STORE16_COMPLETE_RESULT(result_512_2, (&C[ldc*2])) + STORE16_COMPLETE_RESULT(result_512_3, (&C[ldc*3])) + result_512_4 = _mm512_shuffle_f32x4(result_512_4, result_512_4, 0xd8); + result_512_5 = _mm512_shuffle_f32x4(result_512_5, result_512_5, 0xd8); + result_512_6 = _mm512_shuffle_f32x4(result_512_6, result_512_6, 0xd8); + result_512_7 = _mm512_shuffle_f32x4(result_512_7, result_512_7, 0xd8); + STORE16_COMPLETE_RESULT(result_512_4, (&C[ldc*4])) + STORE16_COMPLETE_RESULT(result_512_5, (&C[ldc*5])) + STORE16_COMPLETE_RESULT(result_512_6, (&C[ldc*6])) + STORE16_COMPLETE_RESULT(result_512_7, (&C[ldc*7])) + } +} + +// SBGEMM Kernel for 16> (32-m)); + __mmask16 tail_mask = *((__mmask16*) &tail_mask_value); + for (int i = 0; i < n; i++) { + result_512_tmp_0 = _mm512_permutex2var_ps(result_512[i], shuffle_idx_base0, result_512[i+8]); + result_512_tmp_1 = _mm512_permutex2var_ps(result_512[i], shuffle_idx_base1, result_512[i+8]); + STORE16_COMPLETE_RESULT(result_512_tmp_0, (&C[ldc*i])) + STORE16_MASK_COMPLETE_RESULT(result_512_tmp_1, (&C[ldc*i+16]), tail_mask) + } + } else { + for (int i = 0; i < n; i++) { + result_512_tmp_0 = _mm512_permutex2var_ps(result_512[i], shuffle_idx_base0, result_512[i+8]); + result_512_tmp_1 = _mm512_permutex2var_ps(result_512[i], shuffle_idx_base1, result_512[i+8]); + STORE16_COMPLETE_RESULT(result_512_tmp_0, (&C[ldc*i])) + STORE16_COMPLETE_RESULT(result_512_tmp_1, (&C[ldc*i+16])) + } + } +} + +// SBGEMM Kernel for 16<=M, N<8, K can be any number, but the processing will take 32 as a base +#ifndef ONE_ALPHA // ALPHA is not ONE +void sbgemm_block_kernel_16xNx32_alpha(BLASLONG m, BLASLONG n, BLASLONG k, float alpha, bfloat16 *A, bfloat16 *B, float *C, int ldc) +#else // ALPHA is ONE +void sbgemm_block_kernel_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, float alpha, bfloat16 *A, bfloat16 *B, float *C, int ldc) +#endif +{ + int SHUFFLE_MAGIC_NO = 0x39; + BLASLONG tag_k_32x = k & (~31); + BLASLONG idxB_base = 0; + BLASLONG width = 32; + +#ifndef ONE_ALPHA + __m512 ALPHAVECTOR = _mm512_set1_ps(alpha); +#endif + + __m512i arrayA_512; + __m512i arrayB_512[8]; + __m512 result_512[8]; + + for (int i = 0; i < 8; i += 2) { + result_512[i] = _mm512_setzero_ps(); + result_512[i+1] = _mm512_setzero_ps(); + } + + for (BLASLONG idx_k = 0; idx_k < k; idx_k += 32) { + // Load B with unroll n + for (int i = 0; i < n; i ++) { + arrayB_512[i] = _mm512_loadu_si512(&B[idxB_base]); + idxB_base += 32; + } + + if (idx_k == tag_k_32x) {width = k - tag_k_32x;} + + for (BLASLONG idx = 0; idx < width;) { + // Each two rows are a group for 32-pair bf16 elements + // Load two rows into a 512 register + arrayA_512 = _mm512_loadu_si512(&A[idx<<4]); + + for (int i = 0; i < n; i ++) { + result_512[i] = _mm512_dpbf16_ps(result_512[i], (__m512bh) arrayA_512, (__m512bh) _mm512_broadcastd_epi32(_mm512_castsi512_si128(arrayB_512[i]))); + arrayB_512[i] = _mm512_shuffle_epi32(arrayB_512[i], SHUFFLE_MAGIC_NO); + } + + idx += 2; + // Every 4 loops we need to switch to next 128 bits of arrayB registers + if ((idx & (~7)) == idx) { + for (int i = 0; i < n; i++) { + arrayB_512[i] = _mm512_shuffle_i32x4(arrayB_512[i], arrayB_512[i], SHUFFLE_MAGIC_NO); + } + } + } + } + + if (m != 16) { + unsigned short tail_mask_value = (((unsigned short)0xffff) >> (16-m)); + __mmask16 tail_mask = *((__mmask16*) &tail_mask_value); + for (int i = 0; i < n; i++) { + result_512[i] = _mm512_shuffle_f32x4(result_512[i], result_512[i], 0xd8); + STORE16_MASK_COMPLETE_RESULT(result_512[i], (&C[ldc*i]), tail_mask) + } + } else { + for (int i = 0; i < n; i++) { + result_512[i] = _mm512_shuffle_f32x4(result_512[i], result_512[i], 0xd8); + STORE16_COMPLETE_RESULT(result_512[i], (&C[ldc*i])) + } + } +} +#ifndef ONE_ALPHA // ALPHA is not ONE +void sbgemm_blocking_kernel_2_alpha(blasint M, blasint N, blasint K, float alpha, bfloat16 *A, blasint lda, bfloat16 *B, blasint ldb, float *C, blasint ldc, bfloat16 * block_A, bfloat16 * block_B) +#else // ALPHA is ONE +void sbgemm_blocking_kernel_2_one(blasint M, blasint N, blasint K, float alpha, bfloat16 *A, blasint lda, bfloat16 *B, blasint ldb, float *C, blasint ldc, bfloat16 * block_A, bfloat16 * block_B) +#endif +{ + BLASLONG m_step, n_step, k_step, k_step_round32; + BLASLONG tag_m_Nx = M & (~(BF16_BLOCK_THRES_M-1)); + + BLASLONG n_from, n_to; + BLASLONG tag_n_Nx; + + n_from = 0; + n_to = (BF16_BLOCK_THRES_N > N) ? N : BF16_BLOCK_THRES_N; + tag_n_Nx = n_to & (~(BF16_BLOCK_STEP_N-1)); + + k_step = (K > BF16_BLOCK_THRES_K) ? BF16_BLOCK_THRES_K : K; + k_step_round32 = k_step & (~31); + k_step_round32 = (k_step > k_step_round32) ? (k_step_round32 + 32) : k_step_round32; + + if (M >= BF16_BLOCK_THRES_M) { + while (n_from < N) { + for (BLASLONG idx_k = 0; idx_k < K;) { + // Use Kx32 kernel when BF16_BLOCK_THRES_M==32, Kx16 kernel when BF16_BLOCK_THRES_M==16, ... + COL_MAJOR_INCOPY_KERNEL_Kx32(k_step, &A(idx_k, 0), lda, block_A); + // TODO: MT + for (BLASLONG idx_n = n_from; idx_n < tag_n_Nx; idx_n += BF16_BLOCK_STEP_N) { + // Use 8x32 kernel when BF16_BLOCK_THRES_N==8, 4x32 kernel when BF16_BLOCK_THRES_N==4, ... + COL_MAJOR_ONCOPY_KERNEL_8x32(k_step, &B(idx_n, idx_k), ldb, block_B + (idx_n-n_from)*k_step_round32); + SBGEMM_BLOCK_KERNEL_32x8x32(32, k_step, alpha, block_A, block_B + (idx_n-n_from)*k_step_round32, &C(idx_n, 0), ldc); + } + + if (tag_n_Nx != n_to) { + n_step = n_to - tag_n_Nx; + COL_MAJOR_ONCOPY_KERNEL_Nx32(n_step, k_step, &B(tag_n_Nx, idx_k), ldb, block_B + (tag_n_Nx-n_from)*k_step_round32); + SBGEMM_BLOCK_KERNEL_32xNx32(32, n_step, k_step, alpha, block_A, block_B + (tag_n_Nx-n_from)*k_step_round32, &C(tag_n_Nx, 0), ldc); + } + + for (BLASLONG idx_m = BF16_BLOCK_THRES_M; idx_m < tag_m_Nx; idx_m += BF16_BLOCK_THRES_M) { + COL_MAJOR_INCOPY_KERNEL_Kx32(k_step, &A(idx_k, idx_m), lda, block_A); + for (BLASLONG idx_n = n_from; idx_n < tag_n_Nx; idx_n += BF16_BLOCK_STEP_N) { + SBGEMM_BLOCK_KERNEL_32x8x32(32, k_step, alpha, block_A, block_B + (idx_n-n_from)*k_step_round32, &C(idx_n, idx_m), ldc); + } + + if (tag_n_Nx != n_to) { + n_step = n_to - tag_n_Nx; + SBGEMM_BLOCK_KERNEL_32xNx32(32, n_step, k_step, alpha, block_A, block_B + (tag_n_Nx-n_from)*k_step_round32, &C(tag_n_Nx, idx_m), ldc); + } + } + + if (tag_m_Nx != M) { + m_step = M - tag_m_Nx; + if (m_step > 16) { + COL_MAJOR_INCOPY_KERNEL_Kx32m(k_step, m_step, &A(idx_k, tag_m_Nx), lda, block_A); + for (BLASLONG idx_n = n_from; idx_n < tag_n_Nx; idx_n += BF16_BLOCK_STEP_N) { + SBGEMM_BLOCK_KERNEL_32x8x32(m_step, k_step, alpha, block_A, block_B + (idx_n-n_from)*k_step_round32, &C(idx_n, tag_m_Nx), ldc); + } + + if (tag_n_Nx != n_to) { + n_step = n_to - tag_n_Nx; + SBGEMM_BLOCK_KERNEL_32xNx32(m_step, n_step, k_step, alpha, block_A, block_B + (tag_n_Nx-n_from)*k_step_round32, &C(tag_n_Nx, tag_m_Nx), ldc); + } + } else if (m_step == 16) { + COL_MAJOR_INCOPY_KERNEL_Kx16(k_step, m_step, &A(idx_k, tag_m_Nx), lda, block_A); + for (BLASLONG idx_n = n_from; idx_n < tag_n_Nx; idx_n += BF16_BLOCK_STEP_N) { + SBGEMM_BLOCK_KERNEL_16x8x32(m_step, k_step, alpha, block_A, block_B + (idx_n-n_from)*k_step_round32, &C(idx_n, tag_m_Nx), ldc); + } + + if (tag_n_Nx != n_to) { + n_step = n_to - tag_n_Nx; + SBGEMM_BLOCK_KERNEL_16xNx32(m_step, n_step, k_step, alpha, block_A, block_B + (tag_n_Nx-n_from)*k_step_round32, &C(tag_n_Nx, tag_m_Nx), ldc); + } + } else { + COL_MAJOR_INCOPY_KERNEL_Kx16m(k_step, m_step, &A(idx_k, tag_m_Nx), lda, block_A); + for (BLASLONG idx_n = n_from; idx_n < tag_n_Nx; idx_n += BF16_BLOCK_STEP_N) { + SBGEMM_BLOCK_KERNEL_16x8x32(m_step, k_step, alpha, block_A, block_B + (idx_n-n_from)*k_step_round32, &C(idx_n, tag_m_Nx), ldc); + } + + if (tag_n_Nx != n_to) { + n_step = n_to - tag_n_Nx; + SBGEMM_BLOCK_KERNEL_16xNx32(m_step, n_step, k_step, alpha, block_A, block_B + (tag_n_Nx-n_from)*k_step_round32, &C(tag_n_Nx, tag_m_Nx), ldc); + } + } + } + + idx_k += k_step; + k_step = K - idx_k; + k_step = (k_step > BF16_BLOCK_THRES_K) ? BF16_BLOCK_THRES_K : k_step; + k_step_round32 = k_step & (~31); + k_step_round32 = (k_step > k_step_round32) ? (k_step_round32 + 32) : k_step_round32; + } + + n_from = n_to; + n_to += BF16_BLOCK_THRES_N; + n_to = (n_to > N) ? N : n_to; + tag_n_Nx = n_to & (~(BF16_BLOCK_STEP_N-1)); + } + } else { + m_step = M - tag_m_Nx; + while (n_from < N) { + for (BLASLONG idx_k = 0; idx_k < K;) { + // Use Kx32 kernel when BF16_BLOCK_THRES_M==32, Kx16 kernel when BF16_BLOCK_THRES_M==16, ... + COL_MAJOR_INCOPY_KERNEL_Kx32m(k_step, m_step, &A(idx_k, 0), lda, block_A); + // TODO: MT + for (BLASLONG idx_n = n_from; idx_n < tag_n_Nx; idx_n += BF16_BLOCK_STEP_N) { + // Use 8x32 kernel when BF16_BLOCK_THRES_N==8, 4x32 kernel when BF16_BLOCK_THRES_N==4, ... + COL_MAJOR_ONCOPY_KERNEL_8x32(k_step, &B(idx_n, idx_k), ldb, block_B + (idx_n-n_from)*k_step_round32); + SBGEMM_BLOCK_KERNEL_32x8x32(m_step, k_step, alpha, block_A, block_B + (idx_n-n_from)*k_step_round32, &C(idx_n, 0), ldc); + } + + if (tag_n_Nx != n_to) { + n_step = n_to - tag_n_Nx; + COL_MAJOR_ONCOPY_KERNEL_Nx32(n_step, k_step, &B(tag_n_Nx, idx_k), ldb, block_B + (tag_n_Nx-n_from)*k_step_round32); + SBGEMM_BLOCK_KERNEL_32xNx32(m_step, n_step, k_step, alpha, block_A, block_B + (tag_n_Nx-n_from)*k_step_round32, &C(tag_n_Nx, 0), ldc); + } + + idx_k += k_step; + k_step = K - idx_k; + k_step = (k_step > BF16_BLOCK_THRES_K) ? BF16_BLOCK_THRES_K : k_step; + k_step_round32 = k_step & (~31); + k_step_round32 = (k_step > k_step_round32) ? (k_step_round32 + 32) : k_step_round32; + } + n_from = n_to; + n_to += BF16_BLOCK_THRES_N; + n_to = (n_to > N) ? N : n_to; + tag_n_Nx = n_to & (~(BF16_BLOCK_STEP_N-1)); + } + } +} + +#ifndef ONE_ALPHA // ALPHA is not ONE +void sbgemm_internal_kernel_alpha(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, + OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *B, OPENBLAS_CONST blasint ldb, float *C, OPENBLAS_CONST blasint ldc) +#else // ALPHA is ONE +void sbgemm_internal_kernel_one(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, + OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *B, OPENBLAS_CONST blasint ldb, float *C, OPENBLAS_CONST blasint ldc) +#endif +{ + bfloat16 block_A[BF16_BLOCK_THRES_K * BF16_BLOCK_THRES_M]; + bfloat16 block_B[BF16_BLOCK_THRES_N * BF16_BLOCK_THRES_K]; + + // TODO: assume no trans for both A and B, to complement these scenarios later + if (Order == CblasColMajor) { + SBGEMM_BLOCKING_KERNEL_2(M, N, K, alpha, A, lda, B, ldb, C, ldc, block_A, block_B); + } else { + + } +} \ No newline at end of file From 6fe0f1fab9d6a7f46d71d37ebb210fbf56924fbc Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 11 Jan 2021 19:05:29 +0100 Subject: [PATCH 031/134] Label get_cpu_ftr as volatile to keep gcc from rearranging the code --- driver/others/dynamic_arm64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index 4f1b12f27..37c0694b6 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -68,7 +68,7 @@ extern void openblas_warning(int verbose, const char * msg); #endif #define get_cpu_ftr(id, var) ({ \ - __asm__("mrs %0, "#id : "=r" (var)); \ + __asm__ __volatile__("mrs %0, "#id : "=r" (var)); \ }) static char *corename[] = { From ed652d81365e14ac5db62f2abf9db0efa2ff193d Mon Sep 17 00:00:00 2001 From: Gordon Fossum Date: Mon, 11 Jan 2021 21:13:53 -0500 Subject: [PATCH 032/134] Added definitions for GEMM_PREFERED_SIZE and SWITCH_RATIO to the POWER9 and POWER10 specific sections of param.h. --- param.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/param.h b/param.h index 6c5e0f107..6a790ab61 100644 --- a/param.h +++ b/param.h @@ -2399,6 +2399,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_B 65536 #define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define SWITCH_RATIO 16 +#define GEMM_PREFERED_SIZE 16 + #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 8 #define DGEMM_DEFAULT_UNROLL_M 16 @@ -2435,6 +2438,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_B 65536 #define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define SWITCH_RATIO 16 +#define GEMM_PREFERED_SIZE 16 + #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 8 #define DGEMM_DEFAULT_UNROLL_M 8 From 2d45a262d999f3ff2121b9fb3898c170a01c4cce Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 12 Jan 2021 16:32:29 +0100 Subject: [PATCH 033/134] Support compilation with nvfortran --- common_arm64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common_arm64.h b/common_arm64.h index 9cdded305..2270ffba7 100644 --- a/common_arm64.h +++ b/common_arm64.h @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define INLINE inline -#ifdef F_INTERFACE_FLANG +#if defined( F_INTERFACE_FLANG) || defined(F_INTERFACE_PGI) #define RETURN_BY_STACK #else #define RETURN_BY_COMPLEX From bff2b7c94d7a1cfa687da0693289c78e44eecc8e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 12 Jan 2021 16:34:18 +0100 Subject: [PATCH 034/134] Support compilation with NVIDIA HPC compilers (which do not take gcc-style arch options) --- Makefile.arm64 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.arm64 b/Makefile.arm64 index 62a877fff..c3fe583e4 100644 --- a/Makefile.arm64 +++ b/Makefile.arm64 @@ -1,4 +1,4 @@ - +ifneq ($(C_COMPILER), PGI) ifeq ($(CORE), ARMV8) CCOMMON_OPT += -march=armv8-a FCOMMON_OPT += -march=armv8-a @@ -77,4 +77,4 @@ CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 endif endif - +endif From 43aac5bacc7f8f55fa981f990715f914ef739254 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 12 Jan 2021 16:36:12 +0100 Subject: [PATCH 035/134] Support NVIDIA HPC compiler --- kernel/arm/zdot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/arm/zdot.c b/kernel/arm/zdot.c index 9249b54f8..79baa61b1 100644 --- a/kernel/arm/zdot.c +++ b/kernel/arm/zdot.c @@ -48,7 +48,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA dot[0]=0.0; dot[1]=0.0; -#if !defined(__PPC__) && !defined(__SunOS) +#if !defined(__PPC__) && !defined(__SunOS) && !defined(__PGI) CREAL(result) = 0.0 ; CIMAG(result) = 0.0 ; #else @@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA i++ ; } -#if !defined(__PPC__) && !defined(__SunOS) +#if !defined(__PPC__) && !defined(__SunOS) && !defined(__PGI) CREAL(result) = dot[0]; CIMAG(result) = dot[1]; #else From c2a8ebfe695fda904ce2ae2153680d0c3810f2ce Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 12 Jan 2021 16:38:51 +0100 Subject: [PATCH 036/134] Add workaround for NVIDIA HPC mishandling of the asm DOT kernels --- kernel/arm64/KERNEL.ARMV8 | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/arm64/KERNEL.ARMV8 b/kernel/arm64/KERNEL.ARMV8 index 603e47d87..c8a53c86b 100644 --- a/kernel/arm64/KERNEL.ARMV8 +++ b/kernel/arm64/KERNEL.ARMV8 @@ -97,9 +97,18 @@ CNRM2KERNEL = znrm2.S ZNRM2KERNEL = znrm2.S DDOTKERNEL = dot.S +ifneq ($(C_COMPILER), PGI) SDOTKERNEL = ../generic/dot.c +else +SDOTKERNEL = dot.S +endif +ifneq ($(C_COMPILER), PGI) CDOTKERNEL = zdot.S ZDOTKERNEL = zdot.S +else +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c +endif DSDOTKERNEL = dot.S DGEMM_BETA = dgemm_beta.S From 0f27a036071501664d8c4ee491e02345d9bde115 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 12 Jan 2021 16:39:35 +0100 Subject: [PATCH 037/134] Add workaround for NVIDIA HPC mishandling of the asm DOT kernels --- kernel/arm64/KERNEL.CORTEXA57 | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/arm64/KERNEL.CORTEXA57 b/kernel/arm64/KERNEL.CORTEXA57 index dcf2383a9..0be334893 100644 --- a/kernel/arm64/KERNEL.CORTEXA57 +++ b/kernel/arm64/KERNEL.CORTEXA57 @@ -70,10 +70,19 @@ DCOPYKERNEL = copy.S CCOPYKERNEL = copy.S ZCOPYKERNEL = copy.S +ifneq ($(C_COMPILER), PGI) SDOTKERNEL = ../generic/dot.c +else +SDOTKERNEL = dot.S +endif DDOTKERNEL = dot.S +ifneq ($(C_COMPILER), PGI) CDOTKERNEL = zdot.S ZDOTKERNEL = zdot.S +else +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c +endif DSDOTKERNEL = dot.S SNRM2KERNEL = nrm2.S From 49959d4f1cf79e2945cf40e3da5964ee2df13710 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 12 Jan 2021 16:47:15 +0100 Subject: [PATCH 038/134] Add workaround for NVIDIA HPC --- kernel/arm64/KERNEL.CORTEXA53 | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/kernel/arm64/KERNEL.CORTEXA53 b/kernel/arm64/KERNEL.CORTEXA53 index e23133e52..db322dd0d 100644 --- a/kernel/arm64/KERNEL.CORTEXA53 +++ b/kernel/arm64/KERNEL.CORTEXA53 @@ -96,11 +96,20 @@ DNRM2KERNEL = nrm2.S CNRM2KERNEL = znrm2.S ZNRM2KERNEL = znrm2.S -DDOTKERNEL = dot.S -SDOTKERNEL = ../generic/dot.c -CDOTKERNEL = zdot.S -ZDOTKERNEL = zdot.S -DSDOTKERNEL = dot.S +ifneq ($(C_COMPILER), PGI) +SDOTKERNEL = ../generic/dot.c +else +SDOTKERNEL = dot.S +endif +DDOTKERNEL = dot.S +ifneq ($(C_COMPILER), PGI) +CDOTKERNEL = zdot.S +ZDOTKERNEL = zdot.S +else +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c +endif +DSDOTKERNEL = dot.S DGEMM_BETA = dgemm_beta.S SGEMM_BETA = sgemm_beta.S From 2efa3b70dcd90fb15be39f121b91105218b718c1 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 12 Jan 2021 16:49:39 +0100 Subject: [PATCH 039/134] Add workaround for NVIDIA HPC --- kernel/arm64/KERNEL.THUNDERX | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/arm64/KERNEL.THUNDERX b/kernel/arm64/KERNEL.THUNDERX index cb02c7bc5..669f62698 100644 --- a/kernel/arm64/KERNEL.THUNDERX +++ b/kernel/arm64/KERNEL.THUNDERX @@ -47,8 +47,13 @@ ZCOPYKERNEL = copy.S SDOTKERNEL = dot_thunderx.c DDOTKERNEL = ddot_thunderx.c +ifneq ($(C_COMPILER), PGI) CDOTKERNEL = zdot.S ZDOTKERNEL = zdot.S +else +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c +endif DSDOTKERNEL = dot.S SNRM2KERNEL = nrm2.S From b716c0ef010af184fec8d5d33aa9c5cc2fc767b7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 12 Jan 2021 16:51:35 +0100 Subject: [PATCH 040/134] Add workaround for NVIDIA HPC --- kernel/arm64/KERNEL.TSV110 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/arm64/KERNEL.TSV110 b/kernel/arm64/KERNEL.TSV110 index 1ce7bb7c0..54d016e17 100644 --- a/kernel/arm64/KERNEL.TSV110 +++ b/kernel/arm64/KERNEL.TSV110 @@ -72,8 +72,13 @@ ZCOPYKERNEL = copy.S SDOTKERNEL = dot.S DDOTKERNEL = dot.S +ifneq ($(C_COMPILER), PGI) CDOTKERNEL = zdot.S ZDOTKERNEL = zdot.S +else +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c +endif DSDOTKERNEL = dot.S SNRM2KERNEL = nrm2.S From 9ccb12b03179b13eedc97eb75ca3dfc7ea406a70 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 12 Jan 2021 23:20:07 +0100 Subject: [PATCH 041/134] Add prototypes for cblas_csrot and cblas_zdrot --- cblas.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cblas.h b/cblas.h index da00d46d6..8aafdb186 100644 --- a/cblas.h +++ b/cblas.h @@ -125,6 +125,8 @@ void cblas_zswap(OPENBLAS_CONST blasint n, void *x, OPENBLAS_CONST blasint incx, void cblas_srot(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s); void cblas_drot(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s); +void cblas_csrot(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s); +void cblas_zdrot(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s); void cblas_srotg(float *a, float *b, float *c, float *s); void cblas_drotg(double *a, double *b, double *c, double *s); From ac3e2a3fdd2f2e430ff7b6a58aeb8252afc935de Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 12 Jan 2021 23:22:00 +0100 Subject: [PATCH 042/134] Add CBLAS interfaces for csrot and zdrot --- interface/Makefile | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/interface/Makefile b/interface/Makefile index 597956fdb..1a440c9c3 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -316,7 +316,7 @@ CCBLAS1OBJS = \ cblas_cscal.$(SUFFIX) cblas_csscal.$(SUFFIX) \ cblas_cswap.$(SUFFIX) cblas_scnrm2.$(SUFFIX) \ cblas_caxpby.$(SUFFIX) \ - cblas_icmin.$(SUFFIX) cblas_icmax.$(SUFFIX) cblas_scsum.$(SUFFIX) + cblas_icmin.$(SUFFIX) cblas_icmax.$(SUFFIX) cblas_scsum.$(SUFFIX) cblas_csrot.$(SUFFIX) CCBLAS2OBJS = \ cblas_cgemv.$(SUFFIX) cblas_cgerc.$(SUFFIX) cblas_cgeru.$(SUFFIX) \ @@ -346,7 +346,7 @@ CZBLAS1OBJS = \ cblas_zscal.$(SUFFIX) cblas_zdscal.$(SUFFIX) \ cblas_zswap.$(SUFFIX) cblas_dznrm2.$(SUFFIX) \ cblas_zaxpby.$(SUFFIX) \ - cblas_izmin.$(SUFFIX) cblas_izmax.$(SUFFIX) cblas_dzsum.$(SUFFIX) + cblas_izmin.$(SUFFIX) cblas_izmax.$(SUFFIX) cblas_dzsum.$(SUFFIX) cblas_zdrot.$(SUFFIX) CZBLAS2OBJS = \ @@ -1664,6 +1664,12 @@ cblas_csscal.$(SUFFIX) cblas_csscal.$(PSUFFIX) : zscal.c cblas_zdscal.$(SUFFIX) cblas_zdscal.$(PSUFFIX) : zscal.c $(CC) $(CFLAGS) -DCBLAS -c -DSSCAL $< -o $(@F) +cblas_csrot.$(SUFFIX) cblas_csrot.$(PSUFFIX) : zrot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_zdrot.$(SUFFIX) cblas_zdrot.$(PSUFFIX) : zrot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + ifeq ($(BUILD_BFLOAT16),1) cblas_sbgemv.$(SUFFIX) cblas_sbgemv.$(PSUFFIX) : sbgemv.c $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) From 930aff2c2e58f6ffbd0b8a09e1e7029d562749dd Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 13 Jan 2021 00:27:42 +0100 Subject: [PATCH 043/134] Build CBLAS interfaces for CROTG and ZROTG as well --- Makefile | 2641 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 2288 insertions(+), 353 deletions(-) diff --git a/Makefile b/Makefile index de0735c4a..fab403c82 100644 --- a/Makefile +++ b/Makefile @@ -1,402 +1,2337 @@ -TOPDIR = . -include ./Makefile.system +TOPDIR = .. +include $(TOPDIR)/Makefile.system -BLASDIRS = interface driver/level2 driver/level3 driver/others +SUPPORT_GEMM3M = 0 -ifneq ($(DYNAMIC_ARCH), 1) -BLASDIRS += kernel +ifeq ($(ARCH), x86) +SUPPORT_GEMM3M = 1 endif -ifdef SANITY_CHECK -BLASDIRS += reference +ifeq ($(ARCH), x86_64) +SUPPORT_GEMM3M = 1 endif -SUBDIRS = $(BLASDIRS) -ifneq ($(NO_LAPACK), 1) -SUBDIRS += lapack +ifeq ($(ARCH), ia64) +SUPPORT_GEMM3M = 1 endif -RELA = -ifeq ($(BUILD_RELAPACK), 1) -RELA = re_lapack +ifeq ($(ARCH), MIPS) +SUPPORT_GEMM3M = 1 endif -ifeq ($(NO_FORTRAN), 1) -define NOFORTRAN -1 -endef -define NO_LAPACK -1 -endef -export NOFORTRAN -export NO_LAPACK +ifneq ($(NO_FBLAS), 1) + +SBLAS1OBJS = \ + saxpy.$(SUFFIX) sswap.$(SUFFIX) \ + scopy.$(SUFFIX) sscal.$(SUFFIX) \ + sdot.$(SUFFIX) sdsdot.$(SUFFIX) dsdot.$(SUFFIX) \ + sasum.$(SUFFIX) ssum.$(SUFFIX) snrm2.$(SUFFIX) \ + smax.$(SUFFIX) samax.$(SUFFIX) ismax.$(SUFFIX) isamax.$(SUFFIX) \ + smin.$(SUFFIX) samin.$(SUFFIX) ismin.$(SUFFIX) isamin.$(SUFFIX) \ + srot.$(SUFFIX) srotg.$(SUFFIX) srotm.$(SUFFIX) srotmg.$(SUFFIX) \ + saxpby.$(SUFFIX) + +SBLAS2OBJS = \ + sgemv.$(SUFFIX) sger.$(SUFFIX) \ + strsv.$(SUFFIX) strmv.$(SUFFIX) ssymv.$(SUFFIX) \ + ssyr.$(SUFFIX) ssyr2.$(SUFFIX) sgbmv.$(SUFFIX) \ + ssbmv.$(SUFFIX) sspmv.$(SUFFIX) \ + sspr.$(SUFFIX) sspr2.$(SUFFIX) \ + stbsv.$(SUFFIX) stbmv.$(SUFFIX) \ + stpsv.$(SUFFIX) stpmv.$(SUFFIX) + +SBLAS3OBJS = \ + sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \ + strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) \ + somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\ + sgeadd.$(SUFFIX) + +ifeq ($(BUILD_BFLOAT16),1) +SBBLAS1OBJS = sbdot.$(SUFFIX) +SBBLAS2OBJS = sbgemv.$(SUFFIX) +SBBLAS3OBJS = sbgemm.$(SUFFIX) +SBEXTOBJS = sbstobf16.$(SUFFIX) sbdtobf16.$(SUFFIX) sbf16tos.$(SUFFIX) dbf16tod.$(SUFFIX) endif -LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS)) +DBLAS1OBJS = \ + daxpy.$(SUFFIX) dswap.$(SUFFIX) \ + dcopy.$(SUFFIX) dscal.$(SUFFIX) \ + ddot.$(SUFFIX) \ + dasum.$(SUFFIX) dsum.$(SUFFIX) dnrm2.$(SUFFIX) \ + dmax.$(SUFFIX) damax.$(SUFFIX) idmax.$(SUFFIX) idamax.$(SUFFIX) \ + dmin.$(SUFFIX) damin.$(SUFFIX) idmin.$(SUFFIX) idamin.$(SUFFIX) \ + drot.$(SUFFIX) drotg.$(SUFFIX) drotm.$(SUFFIX) drotmg.$(SUFFIX) \ + daxpby.$(SUFFIX) -SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test +DBLAS2OBJS = \ + dgemv.$(SUFFIX) dger.$(SUFFIX) \ + dtrsv.$(SUFFIX) dtrmv.$(SUFFIX) dsymv.$(SUFFIX) \ + dsyr.$(SUFFIX) dsyr2.$(SUFFIX) dgbmv.$(SUFFIX) \ + dsbmv.$(SUFFIX) dspmv.$(SUFFIX) \ + dspr.$(SUFFIX) dspr2.$(SUFFIX) \ + dtbsv.$(SUFFIX) dtbmv.$(SUFFIX) \ + dtpsv.$(SUFFIX) dtpmv.$(SUFFIX) -.PHONY : all libs netlib $(RELA) test ctest shared install -.NOTPARALLEL : all libs $(RELA) prof lapack-test install blas-test +DBLAS3OBJS = \ + dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \ + dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \ + domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX)\ + dgeadd.$(SUFFIX) + +CBLAS1OBJS = \ + caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \ + ccopy.$(SUFFIX) cscal.$(SUFFIX) csscal.$(SUFFIX) \ + cdotc.$(SUFFIX) cdotu.$(SUFFIX) \ + scasum.$(SUFFIX) scsum.$(SUFFIX) scnrm2.$(SUFFIX) \ + scamax.$(SUFFIX) icamax.$(SUFFIX) \ + scamin.$(SUFFIX) icamin.$(SUFFIX) \ + csrot.$(SUFFIX) crotg.$(SUFFIX) \ + caxpby.$(SUFFIX) + +CBLAS2OBJS = \ + cgemv.$(SUFFIX) cgeru.$(SUFFIX) cgerc.$(SUFFIX) \ + ctrsv.$(SUFFIX) ctrmv.$(SUFFIX) \ + csyr2.$(SUFFIX) cgbmv.$(SUFFIX) \ + csbmv.$(SUFFIX) \ + cspr2.$(SUFFIX) \ + ctbsv.$(SUFFIX) ctbmv.$(SUFFIX) \ + ctpsv.$(SUFFIX) ctpmv.$(SUFFIX) \ + chemv.$(SUFFIX) chbmv.$(SUFFIX) \ + cher.$(SUFFIX) cher2.$(SUFFIX) \ + chpmv.$(SUFFIX) chpr.$(SUFFIX) chpr2.$(SUFFIX) + +CBLAS3OBJS = \ + cgemm.$(SUFFIX) csymm.$(SUFFIX) ctrmm.$(SUFFIX) \ + ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \ + chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \ + comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX)\ + cgeadd.$(SUFFIX) + +ZBLAS1OBJS = \ + zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \ + zcopy.$(SUFFIX) zscal.$(SUFFIX) zdscal.$(SUFFIX) \ + zdotc.$(SUFFIX) zdotu.$(SUFFIX) \ + dzasum.$(SUFFIX) dzsum.$(SUFFIX) dznrm2.$(SUFFIX) \ + dzamax.$(SUFFIX) izamax.$(SUFFIX) \ + dzamin.$(SUFFIX) izamin.$(SUFFIX) \ + zdrot.$(SUFFIX) zrotg.$(SUFFIX) \ + zaxpby.$(SUFFIX) + +ZBLAS2OBJS = \ + zgemv.$(SUFFIX) zgeru.$(SUFFIX) zgerc.$(SUFFIX) \ + ztrsv.$(SUFFIX) ztrmv.$(SUFFIX) \ + zsyr2.$(SUFFIX) zgbmv.$(SUFFIX) \ + zsbmv.$(SUFFIX) \ + zspr2.$(SUFFIX) \ + ztbsv.$(SUFFIX) ztbmv.$(SUFFIX) \ + ztpsv.$(SUFFIX) ztpmv.$(SUFFIX) \ + zhemv.$(SUFFIX) zhbmv.$(SUFFIX) \ + zher.$(SUFFIX) zher2.$(SUFFIX) \ + zhpmv.$(SUFFIX) zhpr.$(SUFFIX) zhpr2.$(SUFFIX) + +ZBLAS3OBJS = \ + zgemm.$(SUFFIX) zsymm.$(SUFFIX) ztrmm.$(SUFFIX) \ + ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \ + zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \ + zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)\ + zgeadd.$(SUFFIX) + +ifeq ($(SUPPORT_GEMM3M), 1) + +# CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX) +CBLAS3OBJS += cgemm3m.$(SUFFIX) + +# ZBLAS3OBJS += zgemm3m.$(SUFFIX) zsymm3m.$(SUFFIX) zhemm3m.$(SUFFIX) +ZBLAS3OBJS += zgemm3m.$(SUFFIX) -all :: libs netlib $(RELA) tests shared - @echo - @echo " OpenBLAS build complete. ($(LIB_COMPONENTS))" - @echo - @echo " OS ... $(OSNAME) " - @echo " Architecture ... $(ARCH) " -ifndef BINARY64 - @echo " BINARY ... 32bit " -else - @echo " BINARY ... 64bit " endif -ifdef INTERFACE64 -ifneq ($(INTERFACE64), 0) - @echo " Use 64 bits int (equivalent to \"-i8\" in Fortran) " -endif -endif - @$(CC) --version > /dev/null 2>&1;\ - if [ $$? -eq 0 ]; then \ - cverinfo=`$(CC) --version | sed -n '1p'`; \ - if [ -z "$${cverinfo}" ]; then \ - cverinfo=`$(CC) --version | sed -n '2p'`; \ - fi; \ - echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\ - else \ - echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\ - fi -ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) - @$(FC) --version > /dev/null 2>&1;\ - if [ $$? -eq 0 ]; then \ - fverinfo=`$(FC) --version | sed -n '1p'`; \ - if [ -z "$${fverinfo}" ]; then \ - fverinfo=`$(FC) --version | sed -n '2p'`; \ - fi; \ - echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\ - else \ - echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\ - fi -endif -ifneq ($(OSNAME), AIX) - @echo -n " Library Name ... $(LIBNAME)" -else - @echo " Library Name ... $(LIBNAME)" -endif - -ifndef SMP - @echo " (Single-threading) " -else - @echo " (Multi-threading; Max num-threads is $(NUM_THREADS))" -endif - -ifeq ($(DYNAMIC_ARCH), 1) - @echo " Supporting multiple $(ARCH) cpu models with minimum requirement for the common code being $(CORE)" -endif - -ifeq ($(USE_OPENMP), 1) - @echo - @echo " Use OpenMP in the multithreading. Because of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, " - @echo " you should use OMP_NUM_THREADS environment variable to control the number of threads." - @echo -endif - -ifeq ($(OSNAME), Darwin) - @echo "WARNING: If you plan to use the dynamic library $(LIBDYNNAME), you must run:" - @echo - @echo "\"make PREFIX=/your_installation_path/ install\"." - @echo - @echo "(or set PREFIX in Makefile.rule and run make install." - @echo "If you want to move the .dylib to a new location later, make sure you change" - @echo "the internal name of the dylib with:" - @echo - @echo "install_name_tool -id /new/absolute/path/to/$(LIBDYNNAME) $(LIBDYNNAME)" -endif - @echo - @echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"." - @echo - -shared : -ifneq ($(NO_SHARED), 1) -ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly)) - @$(MAKE) -C exports so - @ln -fs $(LIBSONAME) $(LIBPREFIX).so - @ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) -endif -ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD)) - @$(MAKE) -C exports so - @ln -fs $(LIBSONAME) $(LIBPREFIX).so -endif -ifeq ($(OSNAME), Darwin) - @$(MAKE) -C exports dyn - @ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib - @ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib -endif -ifeq ($(OSNAME), WINNT) - @$(MAKE) -C exports dll -endif -ifeq ($(OSNAME), CYGWIN_NT) - @$(MAKE) -C exports dll -endif -endif - -tests : -ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) - touch $(LIBNAME) -ifndef NO_FBLAS - $(MAKE) -C test all -endif - $(MAKE) -C utest all -ifneq ($(NO_CBLAS), 1) - $(MAKE) -C ctest all -ifeq ($(CPP_THREAD_SAFETY_TEST), 1) - $(MAKE) -C cpp_thread_test all -endif -endif -endif - -libs : -ifeq ($(CORE), UNKNOWN) - $(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.) -endif -ifeq ($(NOFORTRAN), 1) - $(info OpenBLAS: Detecting fortran compiler failed. Cannot compile LAPACK. Only compile BLAS.) -endif -ifeq ($(NO_STATIC), 1) -ifeq ($(NO_SHARED), 1) - $(error OpenBLAS: neither static nor shared are enabled.) -endif -endif - @-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) - @for d in $(SUBDIRS) ; \ - do if test -d $$d; then \ - $(MAKE) -C $$d $(@F) || exit 1 ; \ - fi; \ - done -#Save the config files for installation - @cp Makefile.conf Makefile.conf_last - @cp config.h config_last.h -ifdef QUAD_PRECISION - @echo "#define QUAD_PRECISION">> config_last.h -endif ifeq ($(EXPRECISION), 1) - @echo "#define EXPRECISION">> config_last.h -endif -## -ifeq ($(DYNAMIC_ARCH), 1) - @$(MAKE) -C kernel commonlibs || exit 1 - @for d in $(DYNAMIC_CORE) ; \ - do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ - done - @echo DYNAMIC_ARCH=1 >> Makefile.conf_last -ifeq ($(DYNAMIC_OLDER), 1) - @echo DYNAMIC_OLDER=1 >> Makefile.conf_last -endif -endif -ifdef USE_THREAD - @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last -endif - @touch lib.grd -prof : prof_blas prof_lapack +QBLAS1OBJS = \ + qaxpy.$(SUFFIX) qswap.$(SUFFIX) \ + qcopy.$(SUFFIX) qscal.$(SUFFIX) \ + qdot.$(SUFFIX) \ + qasum.$(SUFFIX) qsum.$(SUFFIX) qnrm2.$(SUFFIX) \ + qmax.$(SUFFIX) qamax.$(SUFFIX) iqmax.$(SUFFIX) iqamax.$(SUFFIX) \ + qmin.$(SUFFIX) qamin.$(SUFFIX) iqmin.$(SUFFIX) iqamin.$(SUFFIX) \ + qrot.$(SUFFIX) qrotg.$(SUFFIX) qrotm.$(SUFFIX) qrotmg.$(SUFFIX) \ + +QBLAS2OBJS = \ + qgemv.$(SUFFIX) qger.$(SUFFIX) \ + qtrsv.$(SUFFIX) qtrmv.$(SUFFIX) qsymv.$(SUFFIX) \ + qsyr.$(SUFFIX) qsyr2.$(SUFFIX) qgbmv.$(SUFFIX) \ + qsbmv.$(SUFFIX) qspmv.$(SUFFIX) \ + qspr.$(SUFFIX) qspr2.$(SUFFIX) \ + qtbsv.$(SUFFIX) qtbmv.$(SUFFIX) \ + qtpsv.$(SUFFIX) qtpmv.$(SUFFIX) + +QBLAS3OBJS = \ + qgemm.$(SUFFIX) qsymm.$(SUFFIX) qtrmm.$(SUFFIX) \ + qtrsm.$(SUFFIX) qsyrk.$(SUFFIX) qsyr2k.$(SUFFIX) + +XBLAS1OBJS = \ + xaxpy.$(SUFFIX) xaxpyc.$(SUFFIX) xswap.$(SUFFIX) \ + xcopy.$(SUFFIX) xscal.$(SUFFIX) xqscal.$(SUFFIX) \ + xdotc.$(SUFFIX) xdotu.$(SUFFIX) \ + qxasum.$(SUFFIX) qxsum.$(SUFFIX) qxnrm2.$(SUFFIX) \ + qxamax.$(SUFFIX) ixamax.$(SUFFIX) \ + qxamin.$(SUFFIX) ixamin.$(SUFFIX) \ + xqrot.$(SUFFIX) xrotg.$(SUFFIX) \ + +XBLAS2OBJS = \ + xgemv.$(SUFFIX) xgeru.$(SUFFIX) xgerc.$(SUFFIX) \ + xtrsv.$(SUFFIX) xtrmv.$(SUFFIX) xsymv.$(SUFFIX) \ + xsyr.$(SUFFIX) xsyr2.$(SUFFIX) xgbmv.$(SUFFIX) \ + xsbmv.$(SUFFIX) xspmv.$(SUFFIX) \ + xspr.$(SUFFIX) xspr2.$(SUFFIX) \ + xtbsv.$(SUFFIX) xtbmv.$(SUFFIX) \ + xtpsv.$(SUFFIX) xtpmv.$(SUFFIX) \ + xhemv.$(SUFFIX) xhbmv.$(SUFFIX) \ + xher.$(SUFFIX) xher2.$(SUFFIX) \ + xhpmv.$(SUFFIX) xhpr.$(SUFFIX) xhpr2.$(SUFFIX) + +XBLAS3OBJS = \ + xgemm.$(SUFFIX) xsymm.$(SUFFIX) xtrmm.$(SUFFIX) \ + xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ + xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) + +ifeq ($(SUPPORT_GEMM3M), 1) + +XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) -prof_blas : - ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX) - for d in $(SUBDIRS) ; \ - do if test -d $$d; then \ - $(MAKE) -C $$d prof || exit 1 ; \ - fi; \ - done -ifeq ($(DYNAMIC_ARCH), 1) - $(MAKE) -C kernel commonprof || exit 1 endif -blas : - ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) - for d in $(BLASDIRS) ; \ - do if test -d $$d; then \ - $(MAKE) -C $$d libs || exit 1 ; \ - fi; \ - done - -hpl : - ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) - for d in $(BLASDIRS) ../laswp exports ; \ - do if test -d $$d; then \ - $(MAKE) -C $$d $(@F) || exit 1 ; \ - fi; \ - done -ifeq ($(DYNAMIC_ARCH), 1) - $(MAKE) -C kernel commonlibs || exit 1 - for d in $(DYNAMIC_CORE) ; \ - do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ - done endif -hpl_p : - ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX) - for d in $(SUBDIRS) ../laswp exports ; \ - do if test -d $$d; then \ - $(MAKE) -C $$d $(@F) || exit 1 ; \ - fi; \ - done +ifdef QUAD_PRECISION -ifeq ($(NO_LAPACK), 1) -netlib : +QBLAS1OBJS = \ + qaxpy.$(SUFFIX) qswap.$(SUFFIX) \ + qcopy.$(SUFFIX) qscal.$(SUFFIX) \ + qasum.$(SUFFIX) qsum.$(SUFFIX) qnrm2.$(SUFFIX) \ + qmax.$(SUFFIX) qamax.$(SUFFIX) iqmax.$(SUFFIX) iqamax.$(SUFFIX) \ + qmin.$(SUFFIX) qamin.$(SUFFIX) iqmin.$(SUFFIX) iqamin.$(SUFFIX) \ + qrot.$(SUFFIX) qrotg.$(SUFFIX) qrotm.$(SUFFIX) qrotmg.$(SUFFIX) \ + +QBLAS2OBJS = \ + qgemv.$(SUFFIX) qger.$(SUFFIX) \ + qtrsv.$(SUFFIX) qtrmv.$(SUFFIX) qsymv.$(SUFFIX) \ + qsyr.$(SUFFIX) qsyr2.$(SUFFIX) qgbmv.$(SUFFIX) \ + qsbmv.$(SUFFIX) qspmv.$(SUFFIX) \ + qspr.$(SUFFIX) qspr2.$(SUFFIX) \ + qtbsv.$(SUFFIX) qtbmv.$(SUFFIX) \ + qtpsv.$(SUFFIX) qtpmv.$(SUFFIX) + +QBLAS3OBJS = \ + qgemm.$(SUFFIX) qsymm.$(SUFFIX) qtrmm.$(SUFFIX) \ + qtrsm.$(SUFFIX) qsyrk.$(SUFFIX) qsyr2k.$(SUFFIX) + +XBLAS1OBJS = \ + xaxpy.$(SUFFIX) xaxpyc.$(SUFFIX) xswap.$(SUFFIX) \ + xcopy.$(SUFFIX) xscal.$(SUFFIX) xqscal.$(SUFFIX) \ + qxasum.$(SUFFIX) qxsum.$(SUFFIX) qxnrm2.$(SUFFIX) \ + qxamax.$(SUFFIX) ixamax.$(SUFFIX) \ + qxamin.$(SUFFIX) ixamin.$(SUFFIX) \ + xqrot.$(SUFFIX) xrotg.$(SUFFIX) \ + +XBLAS2OBJS = \ + xgemv.$(SUFFIX) xgeru.$(SUFFIX) xgerc.$(SUFFIX) \ + xtrsv.$(SUFFIX) xtrmv.$(SUFFIX) xsymv.$(SUFFIX) \ + xsyr.$(SUFFIX) xsyr2.$(SUFFIX) xgbmv.$(SUFFIX) \ + xsbmv.$(SUFFIX) xspmv.$(SUFFIX) \ + xspr.$(SUFFIX) xspr2.$(SUFFIX) \ + xtbsv.$(SUFFIX) xtbmv.$(SUFFIX) \ + xtpsv.$(SUFFIX) xtpmv.$(SUFFIX) \ + xhemv.$(SUFFIX) xhbmv.$(SUFFIX) \ + xher.$(SUFFIX) xher2.$(SUFFIX) \ + xhpmv.$(SUFFIX) xhpr.$(SUFFIX) xhpr2.$(SUFFIX) + +XBLAS3OBJS = \ + xgemm.$(SUFFIX) xsymm.$(SUFFIX) xtrmm.$(SUFFIX) \ + xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ + xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) + +ifeq ($(SUPPORT_GEMM3M), 1) + +XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) -else -netlib : lapack_prebuild -ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) - @$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib - @$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib -endif -ifneq ($(NO_LAPACKE), 1) - @$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib endif endif -ifeq ($(NO_LAPACK), 1) -re_lapack : - -else -re_lapack : - @$(MAKE) -C relapack endif -prof_lapack : lapack_prebuild - @$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof +HPLOBJS = dgemm.$(SUFFIX) dtrsm.$(SUFFIX) \ + dgemv.$(SUFFIX) dtrsv.$(SUFFIX) dger.$(SUFFIX) \ + idamax.$(SUFFIX) daxpy.$(SUFFIX) dcopy.$(SUFFIX) dscal.$(SUFFIX) -lapack_prebuild : -ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) - -@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc - -@echo "FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "FFLAGS_DRV = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc -ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGGFORTRAN1) - -@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc -else - -@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -endif - -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "LAPACKLIB = ../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "TMGLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "LAPACKELIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -ifeq ($(F_COMPILER), GFORTRAN) - -@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc -ifdef SMP -ifeq ($(OSNAME), WINNT) - -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc -else ifeq ($(OSNAME), Haiku) - -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc -else - -@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc -endif -else - -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc -endif -else - -@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc -endif -ifeq ($(BUILD_LAPACK_DEPRECATED), 1) - -@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc -endif -ifeq ($(BUILD_SINGLE), 1) - -@echo "BUILD_SINGLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc -endif -ifeq ($(BUILD_DOUBLE), 1) - -@echo "BUILD_DOUBLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc -endif -ifeq ($(BUILD_COMPLEX), 1) - -@echo "BUILD_COMPLEX = 1" >> $(NETLIB_LAPACK_DIR)/make.inc -endif -ifeq ($(BUILD_COMPLEX16), 1) - -@echo "BUILD_COMPLEX16 = 1" >> $(NETLIB_LAPACK_DIR)/make.inc -endif - -@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc - -@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc +CSBLAS1OBJS = \ + cblas_isamax.$(SUFFIX) cblas_isamin.$(SUFFIX) cblas_sasum.$(SUFFIX) cblas_saxpy.$(SUFFIX) \ + cblas_scopy.$(SUFFIX) cblas_sdot.$(SUFFIX) cblas_sdsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) \ + cblas_srot.$(SUFFIX) cblas_srotg.$(SUFFIX) cblas_srotm.$(SUFFIX) cblas_srotmg.$(SUFFIX) \ + cblas_sscal.$(SUFFIX) cblas_sswap.$(SUFFIX) cblas_snrm2.$(SUFFIX) cblas_saxpby.$(SUFFIX) \ + cblas_ismin.$(SUFFIX) cblas_ismax.$(SUFFIX) cblas_ssum.$(SUFFIX) + +CSBLAS2OBJS = \ + cblas_sgemv.$(SUFFIX) cblas_sger.$(SUFFIX) cblas_ssymv.$(SUFFIX) cblas_strmv.$(SUFFIX) \ + cblas_strsv.$(SUFFIX) cblas_ssyr.$(SUFFIX) cblas_ssyr2.$(SUFFIX) cblas_sgbmv.$(SUFFIX) \ + cblas_ssbmv.$(SUFFIX) cblas_sspmv.$(SUFFIX) cblas_sspr.$(SUFFIX) cblas_sspr2.$(SUFFIX) \ + cblas_stbmv.$(SUFFIX) cblas_stbsv.$(SUFFIX) cblas_stpmv.$(SUFFIX) cblas_stpsv.$(SUFFIX) + +CSBLAS3OBJS = \ + cblas_sgemm.$(SUFFIX) cblas_ssymm.$(SUFFIX) cblas_strmm.$(SUFFIX) cblas_strsm.$(SUFFIX) \ + cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\ + cblas_sgeadd.$(SUFFIX) + +ifeq ($(BUILD_BFLOAT16),1) +CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX) +CSBBLAS2OBJS = cblas_sbgemv.$(SUFFIX) +CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX) +CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX) endif -large.tgz : -ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) - if [ ! -a $< ]; then - -wget http://www.netlib.org/lapack/timing/large.tgz; - fi -endif +CDBLAS1OBJS = \ + cblas_idamax.$(SUFFIX) cblas_idamin.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \ + cblas_dcopy.$(SUFFIX) cblas_ddot.$(SUFFIX) \ + cblas_drot.$(SUFFIX) cblas_drotg.$(SUFFIX) cblas_drotm.$(SUFFIX) cblas_drotmg.$(SUFFIX) \ + cblas_dscal.$(SUFFIX) cblas_dswap.$(SUFFIX) cblas_dnrm2.$(SUFFIX) cblas_daxpby.$(SUFFIX) \ + cblas_idmin.$(SUFFIX) cblas_idmax.$(SUFFIX) cblas_dsum.$(SUFFIX) -timing.tgz : -ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) - if [ ! -a $< ]; then - -wget http://www.netlib.org/lapack/timing/timing.tgz; - fi -endif +CDBLAS2OBJS = \ + cblas_dgemv.$(SUFFIX) cblas_dger.$(SUFFIX) cblas_dsymv.$(SUFFIX) cblas_dtrmv.$(SUFFIX) \ + cblas_dtrsv.$(SUFFIX) cblas_dsyr.$(SUFFIX) cblas_dsyr2.$(SUFFIX) cblas_dgbmv.$(SUFFIX) \ + cblas_dsbmv.$(SUFFIX) cblas_dspmv.$(SUFFIX) cblas_dspr.$(SUFFIX) cblas_dspr2.$(SUFFIX) \ + cblas_dtbmv.$(SUFFIX) cblas_dtbsv.$(SUFFIX) cblas_dtpmv.$(SUFFIX) cblas_dtpsv.$(SUFFIX) + +CDBLAS3OBJS += \ + cblas_dgemm.$(SUFFIX) cblas_dsymm.$(SUFFIX) cblas_dtrmm.$(SUFFIX) cblas_dtrsm.$(SUFFIX) \ + cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) cblas_domatcopy.$(SUFFIX) cblas_dimatcopy.$(SUFFIX) \ + cblas_dgeadd.$(SUFFIX) + +CCBLAS1OBJS = \ + cblas_icamax.$(SUFFIX) cblas_icamin.$(SUFFIX) cblas_scasum.$(SUFFIX) cblas_caxpy.$(SUFFIX) \ + cblas_ccopy.$(SUFFIX) \ + cblas_cdotc.$(SUFFIX) cblas_cdotu.$(SUFFIX) \ + cblas_cdotc_sub.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) \ + cblas_cscal.$(SUFFIX) cblas_csscal.$(SUFFIX) \ + cblas_cswap.$(SUFFIX) cblas_scnrm2.$(SUFFIX) \ + cblas_caxpby.$(SUFFIX) \ + cblas_icmin.$(SUFFIX) cblas_icmax.$(SUFFIX) cblas_scsum.$(SUFFIX) cblas_csrot.$(SUFFIX) cblas_crotg.$(SUFFIX) + +CCBLAS2OBJS = \ + cblas_cgemv.$(SUFFIX) cblas_cgerc.$(SUFFIX) cblas_cgeru.$(SUFFIX) \ + cblas_cgbmv.$(SUFFIX) cblas_chbmv.$(SUFFIX) cblas_chemv.$(SUFFIX) \ + cblas_cher.$(SUFFIX) cblas_cher2.$(SUFFIX) cblas_chpmv.$(SUFFIX) \ + cblas_chpr.$(SUFFIX) cblas_chpr2.$(SUFFIX) cblas_ctbmv.$(SUFFIX) \ + cblas_ctbsv.$(SUFFIX) cblas_ctpmv.$(SUFFIX) cblas_ctpsv.$(SUFFIX) \ + cblas_ctrmv.$(SUFFIX) cblas_ctrsv.$(SUFFIX) + +CCBLAS3OBJS = \ + cblas_cgemm.$(SUFFIX) cblas_csymm.$(SUFFIX) cblas_ctrmm.$(SUFFIX) cblas_ctrsm.$(SUFFIX) \ + cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \ + cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \ + cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\ + cblas_cgeadd.$(SUFFIX) + +CXERBLAOBJ = \ + cblas_xerbla.$(SUFFIX) + + + +CZBLAS1OBJS = \ + cblas_izamax.$(SUFFIX) cblas_izamin.$(SUFFIX) cblas_dzasum.$(SUFFIX) cblas_zaxpy.$(SUFFIX) \ + cblas_zcopy.$(SUFFIX) \ + cblas_zdotc.$(SUFFIX) cblas_zdotu.$(SUFFIX) \ + cblas_zdotc_sub.$(SUFFIX) cblas_zdotu_sub.$(SUFFIX) \ + cblas_zscal.$(SUFFIX) cblas_zdscal.$(SUFFIX) \ + cblas_zswap.$(SUFFIX) cblas_dznrm2.$(SUFFIX) \ + cblas_zaxpby.$(SUFFIX) \ + cblas_izmin.$(SUFFIX) cblas_izmax.$(SUFFIX) cblas_dzsum.$(SUFFIX) cblas_zdrot.$(SUFFIX) cblas_zrotg.$(SUFFIX) + + +CZBLAS2OBJS = \ + cblas_zgemv.$(SUFFIX) cblas_zgerc.$(SUFFIX) cblas_zgeru.$(SUFFIX) \ + cblas_zgbmv.$(SUFFIX) cblas_zhbmv.$(SUFFIX) cblas_zhemv.$(SUFFIX) \ + cblas_zher.$(SUFFIX) cblas_zher2.$(SUFFIX) cblas_zhpmv.$(SUFFIX) \ + cblas_zhpr.$(SUFFIX) cblas_zhpr2.$(SUFFIX) cblas_ztbmv.$(SUFFIX) \ + cblas_ztbsv.$(SUFFIX) cblas_ztpmv.$(SUFFIX) cblas_ztpsv.$(SUFFIX) \ + cblas_ztrmv.$(SUFFIX) cblas_ztrsv.$(SUFFIX) + +CZBLAS3OBJS = \ + cblas_zgemm.$(SUFFIX) cblas_zsymm.$(SUFFIX) cblas_ztrmm.$(SUFFIX) cblas_ztrsm.$(SUFFIX) \ + cblas_zsyrk.$(SUFFIX) cblas_zsyr2k.$(SUFFIX) \ + cblas_zhemm.$(SUFFIX) cblas_zherk.$(SUFFIX) cblas_zher2k.$(SUFFIX)\ + cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \ + cblas_zgeadd.$(SUFFIX) + + +ifeq ($(SUPPORT_GEMM3M), 1) + +# CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX) +CCBLAS3OBJS += cblas_cgemm3m.$(SUFFIX) + +# ZBLAS3OBJS += zgemm3m.$(SUFFIX) zsymm3m.$(SUFFIX) zhemm3m.$(SUFFIX) +CZBLAS3OBJS += cblas_zgemm3m.$(SUFFIX) -lapack-timing : large.tgz timing.tgz -ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) - (cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING) - (cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz ) - $(MAKE) -C $(NETLIB_LAPACK_DIR)/TIMING endif -lapack-test : - (cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out) - $(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz - $(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc -ifneq ($(CROSS), 1) - ( cd $(NETLIB_LAPACK_DIR)/INSTALL; $(MAKE) all; ./testlsame; ./testslamch; ./testdlamch; \ - ./testsecond; ./testdsecnd; ./testieee; ./testversion ) - (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING) +ifneq ($(NO_CBLAS), 1) + +override CFLAGS += -I. + +SBLAS1OBJS += $(CSBLAS1OBJS) +SBLAS2OBJS += $(CSBLAS2OBJS) +SBLAS3OBJS += $(CSBLAS3OBJS) +SBBLAS1OBJS += $(CSBBLAS1OBJS) +SBBLAS2OBJS += $(CSBBLAS2OBJS) +SBBLAS3OBJS += $(CSBBLAS3OBJS) +DBLAS1OBJS += $(CDBLAS1OBJS) +DBLAS2OBJS += $(CDBLAS2OBJS) +DBLAS3OBJS += $(CDBLAS3OBJS) +CBLAS1OBJS += $(CCBLAS1OBJS) +CBLAS2OBJS += $(CCBLAS2OBJS) +CBLAS3OBJS += $(CCBLAS3OBJS) +ZBLAS1OBJS += $(CZBLAS1OBJS) +ZBLAS2OBJS += $(CZBLAS2OBJS) +ZBLAS3OBJS += $(CZBLAS3OBJS) + +SBEXTOBJS += $(CSBEXTOBJS) + +CBAUXOBJS += $(CXERBLAOBJ) endif -lapack-runtest: - ( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \ - ./testsecond; ./testdsecnd; ./testieee; ./testversion ) - (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) +SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) +SBBLASOBJS = $(SBBLAS1OBJS) $(SBBLAS2OBJS) $(SBBLAS3OBJS) +DBLASOBJS = $(DBLAS1OBJS) $(DBLAS2OBJS) $(DBLAS3OBJS) +QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS) +CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) +ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS) +XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) + +#SLAPACKOBJS = \ +# sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ +# spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \ +# slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX) + +SLAPACKOBJS = \ + sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ + spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \ + slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) strtrs.$(SUFFIX) -blas-test: - (cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && rm -f x* *.out) - $(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing - (cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && cat *.out) +#DLAPACKOBJS = \ +# dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \ +# dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \ +# dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX) + +DLAPACKOBJS = \ + dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \ + dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \ + dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dtrtrs.$(SUFFIX) -dummy : +QLAPACKOBJS = \ + qgetf2.$(SUFFIX) qgetrf.$(SUFFIX) qlauu2.$(SUFFIX) qlauum.$(SUFFIX) \ + qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \ + qlaswp.$(SUFFIX) qtrtrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \ + qtrtrs.$(SUFFIX) -install : - $(MAKE) -f Makefile.install install +#CLAPACKOBJS = \ +# cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ +# cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ +# clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX) + +CLAPACKOBJS = \ + cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ + cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ + clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX) + +#ZLAPACKOBJS = \ +# zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ +# zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ +# zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX) + + +ZLAPACKOBJS = \ + zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ + zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ + zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) ztrtrs.$(SUFFIX) + + +XLAPACKOBJS = \ + xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \ + xpotf2.$(SUFFIX) xpotrf.$(SUFFIX) xtrti2.$(SUFFIX) xtrtri.$(SUFFIX) \ + xlaswp.$(SUFFIX) xtrtrs.$(SUFFIX) xgesv.$(SUFFIX) xpotri.$(SUFFIX) \ + xtrtrs.$(SUFFIX) + +ifneq ($(NO_LAPACK), 1) +SBLASOBJS += $(SLAPACKOBJS) +DBLASOBJS += $(DLAPACKOBJS) +#QBLASOBJS += $(QLAPACKOBJS) +CBLASOBJS += $(CLAPACKOBJS) +ZBLASOBJS += $(ZLAPACKOBJS) +#XBLASOBJS += $(XLAPACKOBJS) + +endif + +ifneq ($(BUILD_SINGLE),1) + SBLASOBJS= +ifeq ($(BUILD_DOUBLE),1) + SBLASOBJS = dsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) strsm.$(SUFFIX) \ + sgetrs.$(SUFFIX) sgetrf.$(SUFFIX) spotf2.$(SUFFIX) spotrf.$(SUFFIX) \ + ssyrk.$(SUFFIX) sgemv.$(SUFFIX) +endif +ifeq ($(BUILD_COMPLEX),1) + SBLASOBJS = \ + sdot.$(SUFFIX) srot.$(SUFFIX) snrm2.$(SUFFIX) sswap.$(SUFFIX) \ + isamax.$(SUFFIX) saxpy.$(SUFFIX) sscal.$(SUFFIX) scopy.$(SUFFIX) \ + sgemv.$(SUFFIX) sgemm.$(SUFFIX) +endif +endif +ifneq ($(BUILD_DOUBLE),1) + DBLASOBJS= +ifeq ($(BUILD_COMPLEX16),1) + DBLASOBJS = \ + ddot.$(SUFFIX) drot.$(SUFFIX) dnrm2.$(SUFFIX) dswap.$(SUFFIX) \ + idamax.$(SUFFIX) daxpy.$(SUFFIX) dscal.$(SUFFIX) dcopy.$(SUFFIX) \ + dgemv.$(SUFFIX) dgemm.$(SUFFIX) +endif +endif +ifneq ($(BUILD_COMPLEX),1) + CBLASOBJS= +ifeq ($(BUILD_COMPLEX16),1) + CBLASOBJS = cgetrs.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) cgetrf.$(SUFFIX) \ + cpotrf.$(SUFFIX) ctrsm.$(SUFFIX) cblas_cdotc_sub.$(SUFFIX) +endif +endif +ifneq ($(BUILD_COMPLEX16),1) + ZBLASOBJS= +endif + +FUNCOBJS = $(SBEXTOBJS) $(CXERBLAOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) + +ifeq ($(EXPRECISION), 1) +FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) +endif + +ifeq ($(QUAD_PRECISION), 1) +FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) +endif + +FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=) + + +include $(TOPDIR)/Makefile.tail + +all :: libs + +ifdef FUNCTION_PROFILE +$(BLASOBJS) $(BLASOBJS_P) : functable.h +$(BLASOBJS) $(BLASOBJS_P) : override CFLAGS += -DPROFILE_FUNC_NAME=interface_$(*F) + +functable.h : Makefile + ./create $(FUNCALLFILES) > functable.h + +endif clean :: - @for d in $(SUBDIRS_ALL) ; \ - do if test -d $$d; then \ - $(MAKE) -C $$d $(@F) || exit 1 ; \ - fi; \ - done -#ifdef DYNAMIC_ARCH - @$(MAKE) -C kernel clean -#endif - @$(MAKE) -C reference clean - @rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h *.so.renamed *.a.renamed *.so.0 -ifeq ($(OSNAME), Darwin) - @rm -rf getarch.dSYM getarch_2nd.dSYM + @rm -f functable.h + +level1 : $(SBEXTOBJS) $(SBBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) + $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ + +level2 : $(SBBLAS2OBJS) $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) + $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ + +level3 : $(SBBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) + $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ + +aux : $(CBAUXOBJS) + $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ + +$(CSBBLASOBJS) $(CSBBLASOBJS_P) $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ +$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) $(CBAUXOBJS_P) : override CFLAGS += -DCBLAS + +srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +drot.$(SUFFIX) drot.$(PSUFFIX) : rot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qrot.$(SUFFIX) qrot.$(PSUFFIX) : rot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +csrot.$(SUFFIX) csrot.$(PSUFFIX) : zrot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +zdrot.$(SUFFIX) zdrot.$(PSUFFIX) : zrot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +xqrot.$(SUFFIX) xqrot.$(PSUFFIX) : zrot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +srotm.$(SUFFIX) srotm.$(PSUFFIX): rotm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +drotm.$(SUFFIX) drotm.$(PSUFFIX): rotm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qrotm.$(SUFFIX) qrotm.$(PSUFFIX): rotm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +srotmg.$(SUFFIX) srotmg.$(PSUFFIX): rotmg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +drotmg.$(SUFFIX) drotmg.$(PSUFFIX): rotmg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qrotmg.$(SUFFIX) qrotmg.$(PSUFFIX): rotmg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +srotg.$(SUFFIX) srotg.$(PSUFFIX): rotg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +drotg.$(SUFFIX) drotg.$(PSUFFIX): rotg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qrotg.$(SUFFIX) qrotg.$(PSUFFIX): rotg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +crotg.$(SUFFIX) crotg.$(PSUFFIX): zrotg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zrotg.$(SUFFIX) zrotg.$(PSUFFIX): zrotg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xrotg.$(SUFFIX) xrotg.$(PSUFFIX): zrotg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sasum.$(SUFFIX) sasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dasum.$(SUFFIX) dasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qasum.$(SUFFIX) qasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +scasum.$(SUFFIX) scasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dzasum.$(SUFFIX) dzasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qxasum.$(SUFFIX) qxasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +ssum.$(SUFFIX) ssum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dsum.$(SUFFIX) dsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qsum.$(SUFFIX) qsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +scsum.$(SUFFIX) scsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dzsum.$(SUFFIX) dzsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qxsum.$(SUFFIX) qxsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +snrm2.$(SUFFIX) snrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dnrm2.$(SUFFIX) dnrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qnrm2.$(SUFFIX) qnrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +scnrm2.$(SUFFIX) scnrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dznrm2.$(SUFFIX) dznrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qxnrm2.$(SUFFIX) qxnrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +samax.$(SUFFIX) samax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +damax.$(SUFFIX) damax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +qamax.$(SUFFIX) qamax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +scamax.$(SUFFIX) scamax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +dzamax.$(SUFFIX) dzamax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +qxamax.$(SUFFIX) qxamax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +samin.$(SUFFIX) samin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +damin.$(SUFFIX) damin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +qamin.$(SUFFIX) qamin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +scamin.$(SUFFIX) scamin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +dzamin.$(SUFFIX) dzamin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +qxamin.$(SUFFIX) qxamin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +smax.$(SUFFIX) smax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +dmax.$(SUFFIX) dmax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +qmax.$(SUFFIX) qmax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +smin.$(SUFFIX) smin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +dmin.$(SUFFIX) dmin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +qmin.$(SUFFIX) qmin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +isamax.$(SUFFIX) isamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +idamax.$(SUFFIX) idamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +iqamax.$(SUFFIX) iqamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +icamax.$(SUFFIX) icamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +izamax.$(SUFFIX) izamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +ixamax.$(SUFFIX) ixamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +isamin.$(SUFFIX) isamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +idamin.$(SUFFIX) idamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +iqamin.$(SUFFIX) iqamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +icamin.$(SUFFIX) icamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +izamin.$(SUFFIX) izamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +ixamin.$(SUFFIX) ixamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +ismax.$(SUFFIX) ismax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +idmax.$(SUFFIX) idmax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +iqmax.$(SUFFIX) iqmax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +ismin.$(SUFFIX) ismin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +idmin.$(SUFFIX) idmin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +iqmin.$(SUFFIX) iqmin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +sdsdot.$(SUFFIX) sdsdot.$(PSUFFIX) : sdsdot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dsdot.$(SUFFIX) dsdot.$(PSUFFIX) : dsdot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +ifeq ($(BUILD_BFLOAT16),1) +sbdot.$(SUFFIX) sbdot.$(PSUFFIX) : bf16dot.c + $(CC) $(CFLAGS) -c $< -o $(@F) +sbstobf16.$(SUFFIX) sbstobf16.$(PSUFFIX) : tobf16.c + $(CC) $(CFLAGS) -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) +sbdtobf16.$(SUFFIX) sbdtobf16.$(PSUFFIX) : tobf16.c + $(CC) $(CFLAGS) -USINGLE_PREC -DDOUBLE_PREC -c $< -o $(@F) +sbf16tos.$(SUFFIX) sbf16tos.$(PSUFFIX) : bf16to.c + $(CC) $(CFLAGS) -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) +dbf16tod.$(SUFFIX) dbf16tod.$(PSUFFIX) : bf16to.c + $(CC) $(CFLAGS) -USINGLE_PREC -DDOUBLE_PREC -c $< -o $(@F) endif - @rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib - @rm -f cblas.tmp cblas.tmp2 - @touch $(NETLIB_LAPACK_DIR)/make.inc - @$(MAKE) -C $(NETLIB_LAPACK_DIR) clean - @rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h - @$(MAKE) -C relapack clean - @rm -f *.grd Makefile.conf_last config_last.h - @(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out testing_results.txt) - @echo Done. + +sdot.$(SUFFIX) sdot.$(PSUFFIX) : dot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +ddot.$(SUFFIX) ddot.$(PSUFFIX) : dot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qdot.$(SUFFIX) qdot.$(PSUFFIX) : dot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +cdotu.$(SUFFIX) cdotu.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -c -UCONJ $< -o $(@F) + +cdotc.$(SUFFIX) cdotc.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) + +zdotu.$(SUFFIX) zdotu.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -c -UCONJ $< -o $(@F) + +zdotc.$(SUFFIX) zdotc.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) + +xdotu.$(SUFFIX) xdotu.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -c -UCONJ $< -o $(@F) + +xdotc.$(SUFFIX) xdotc.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) + +saxpy.$(SUFFIX) saxpy.$(PSUFFIX) : axpy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +daxpy.$(SUFFIX) daxpy.$(PSUFFIX) : axpy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qaxpy.$(SUFFIX) qaxpy.$(PSUFFIX) : axpy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +caxpy.$(SUFFIX) caxpy.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +zaxpy.$(SUFFIX) zaxpy.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +xaxpy.$(SUFFIX) xaxpy.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +caxpyc.$(SUFFIX) caxpyc.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) + +zaxpyc.$(SUFFIX) zaxpyc.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) + +xaxpyc.$(SUFFIX) xaxpyc.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) + +sscal.$(SUFFIX) sscal.$(PSUFFIX) : scal.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dscal.$(SUFFIX) dscal.$(PSUFFIX) : scal.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qscal.$(SUFFIX) qscal.$(PSUFFIX) : scal.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +cscal.$(SUFFIX) cscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +zscal.$(SUFFIX) zscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +xscal.$(SUFFIX) xscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +csscal.$(SUFFIX) csscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -c -DSSCAL $< -o $(@F) + +zdscal.$(SUFFIX) zdscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -c -DSSCAL $< -o $(@F) + +xqscal.$(SUFFIX) xqscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -c -DSSCAL $< -o $(@F) + +scopy.$(SUFFIX) scopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dcopy.$(SUFFIX) dcopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qcopy.$(SUFFIX) qcopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +ccopy.$(SUFFIX) ccopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +zcopy.$(SUFFIX) zcopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +xcopy.$(SUFFIX) xcopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +sswap.$(SUFFIX) sswap.$(PSUFFIX) : swap.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dswap.$(SUFFIX) dswap.$(PSUFFIX) : swap.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qswap.$(SUFFIX) qswap.$(PSUFFIX) : swap.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +cswap.$(SUFFIX) cswap.$(PSUFFIX) : zswap.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +zswap.$(SUFFIX) zswap.$(PSUFFIX) : zswap.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +xswap.$(SUFFIX) xswap.$(PSUFFIX) : zswap.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +sger.$(SUFFIX) sger.$(PSUFFIX) : ger.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dger.$(SUFFIX) dger.$(PSUFFIX) : ger.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qger.$(SUFFIX) qger.$(PSUFFIX) : ger.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cgeru.$(SUFFIX) cgeru.$(PSUFFIX) : zger.c + $(CC) -c $(CFLAGS) -UCONJ $< -o $(@F) + +cgerc.$(SUFFIX) cgerc.$(PSUFFIX) : zger.c + $(CC) -c $(CFLAGS) -DCONJ $< -o $(@F) + +zgeru.$(SUFFIX) zgeru.$(PSUFFIX) : zger.c + $(CC) -c $(CFLAGS) -UCONJ $< -o $(@F) + +zgerc.$(SUFFIX) zgerc.$(PSUFFIX) : zger.c + $(CC) -c $(CFLAGS) -DCONJ $< -o $(@F) + +xgeru.$(SUFFIX) xgeru.$(PSUFFIX) : zger.c + $(CC) -c $(CFLAGS) -UCONJ $< -o $(@F) + +xgerc.$(SUFFIX) xgerc.$(PSUFFIX) : zger.c + $(CC) -c $(CFLAGS) -DCONJ $< -o $(@F) + +ifeq ($(BUILD_BFLOAT16),1) +sbgemv.$(SUFFIX) sbgemv.$(PSUFFIX) : sbgemv.c + $(CC) $(CFLAGS) -c $< -o $(@F) +endif + +ifndef USE_NETLIB_GEMV +sgemv.$(SUFFIX) sgemv.$(PSUFFIX): gemv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +dgemv.$(SUFFIX) dgemv.$(PSUFFIX): gemv.c + $(CC) -c $(CFLAGS) -o $(@F) $< +else +sgemv.$(SUFFIX) sgemv.$(PSUFFIX): netlib/sgemv.f + $(FC) -c $(FFLAGS) -o $(@F) $< + +dgemv.$(SUFFIX) dgemv.$(PSUFFIX): netlib/dgemv.f + $(FC) -c $(FFLAGS) -o $(@F) $< +endif + +qgemv.$(SUFFIX) qgemv.$(PSUFFIX): gemv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +ifndef USE_NETLIB_GEMV +cgemv.$(SUFFIX) cgemv.$(PSUFFIX): zgemv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +zgemv.$(SUFFIX) zgemv.$(PSUFFIX): zgemv.c + $(CC) -c $(CFLAGS) -o $(@F) $< +else +cgemv.$(SUFFIX) cgemv.$(PSUFFIX): netlib/cgemv.f + $(FC) -c $(FFLAGS) -o $(@F) $< + +zgemv.$(SUFFIX) zgemv.$(PSUFFIX): netlib/zgemv.f + $(FC) -c $(FFLAGS) -o $(@F) $< +endif + +xgemv.$(SUFFIX) xgemv.$(PSUFFIX): zgemv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +strsv.$(SUFFIX) strsv.$(PSUFFIX) : trsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtrsv.$(SUFFIX) dtrsv.$(PSUFFIX) : trsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtrsv.$(SUFFIX) qtrsv.$(PSUFFIX) : trsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctrsv.$(SUFFIX) ctrsv.$(PSUFFIX) : ztrsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztrsv.$(SUFFIX) ztrsv.$(PSUFFIX) : ztrsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtrsv.$(SUFFIX) xtrsv.$(PSUFFIX) : ztrsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +strmv.$(SUFFIX) strmv.$(PSUFFIX) : trmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtrmv.$(SUFFIX) dtrmv.$(PSUFFIX) : trmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtrmv.$(SUFFIX) qtrmv.$(PSUFFIX) : trmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctrmv.$(SUFFIX) ctrmv.$(PSUFFIX) : ztrmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztrmv.$(SUFFIX) ztrmv.$(PSUFFIX) : ztrmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtrmv.$(SUFFIX) xtrmv.$(PSUFFIX) : ztrmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ssymv.$(SUFFIX) ssymv.$(PSUFFIX) : symv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dsymv.$(SUFFIX) dsymv.$(PSUFFIX) : symv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qsymv.$(SUFFIX) qsymv.$(PSUFFIX) : symv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +csymv.$(SUFFIX) csymv.$(PSUFFIX) : zsymv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zsymv.$(SUFFIX) zsymv.$(PSUFFIX) : zsymv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xsymv.$(SUFFIX) xsymv.$(PSUFFIX) : zsymv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ssyr.$(SUFFIX) ssyr.$(PSUFFIX) : syr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dsyr.$(SUFFIX) dsyr.$(PSUFFIX) : syr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qsyr.$(SUFFIX) qsyr.$(PSUFFIX) : syr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +csyr.$(SUFFIX) csyr.$(PSUFFIX) : zsyr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zsyr.$(SUFFIX) zsyr.$(PSUFFIX) : zsyr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xsyr.$(SUFFIX) xsyr.$(PSUFFIX) : zsyr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ssyr2.$(SUFFIX) ssyr2.$(PSUFFIX) : syr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dsyr2.$(SUFFIX) dsyr2.$(PSUFFIX) : syr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qsyr2.$(SUFFIX) qsyr2.$(PSUFFIX) : syr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +csyr2.$(SUFFIX) csyr2.$(PSUFFIX) : zsyr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zsyr2.$(SUFFIX) zsyr2.$(PSUFFIX) : zsyr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xsyr2.$(SUFFIX) xsyr2.$(PSUFFIX) : zsyr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sgbmv.$(SUFFIX) sgbmv.$(PSUFFIX): gbmv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +dgbmv.$(SUFFIX) dgbmv.$(PSUFFIX): gbmv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +qgbmv.$(SUFFIX) qgbmv.$(PSUFFIX): gbmv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +cgbmv.$(SUFFIX) cgbmv.$(PSUFFIX): zgbmv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +zgbmv.$(SUFFIX) zgbmv.$(PSUFFIX): zgbmv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +xgbmv.$(SUFFIX) xgbmv.$(PSUFFIX): zgbmv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +ssbmv.$(SUFFIX) ssbmv.$(PSUFFIX) : sbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dsbmv.$(SUFFIX) dsbmv.$(PSUFFIX) : sbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qsbmv.$(SUFFIX) qsbmv.$(PSUFFIX) : sbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +csbmv.$(SUFFIX) csbmv.$(PSUFFIX) : zsbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zsbmv.$(SUFFIX) zsbmv.$(PSUFFIX) : zsbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xsbmv.$(SUFFIX) xsbmv.$(PSUFFIX) : zsbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sspmv.$(SUFFIX) sspmv.$(PSUFFIX) : spmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dspmv.$(SUFFIX) dspmv.$(PSUFFIX) : spmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qspmv.$(SUFFIX) qspmv.$(PSUFFIX) : spmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cspmv.$(SUFFIX) cspmv.$(PSUFFIX) : zspmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zspmv.$(SUFFIX) zspmv.$(PSUFFIX) : zspmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xspmv.$(SUFFIX) xspmv.$(PSUFFIX) : zspmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sspr.$(SUFFIX) sspr.$(PSUFFIX) : spr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dspr.$(SUFFIX) dspr.$(PSUFFIX) : spr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qspr.$(SUFFIX) qspr.$(PSUFFIX) : spr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cspr.$(SUFFIX) cspr.$(PSUFFIX) : zspr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zspr.$(SUFFIX) zspr.$(PSUFFIX) : zspr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xspr.$(SUFFIX) xspr.$(PSUFFIX) : zspr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sspr2.$(SUFFIX) sspr2.$(PSUFFIX) : spr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dspr2.$(SUFFIX) dspr2.$(PSUFFIX) : spr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qspr2.$(SUFFIX) qspr2.$(PSUFFIX) : spr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cspr2.$(SUFFIX) cspr2.$(PSUFFIX) : zspr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zspr2.$(SUFFIX) zspr2.$(PSUFFIX) : zspr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xspr2.$(SUFFIX) xspr2.$(PSUFFIX) : zspr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +stbmv.$(SUFFIX) stbmv.$(PSUFFIX) : tbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtbmv.$(SUFFIX) dtbmv.$(PSUFFIX) : tbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtbmv.$(SUFFIX) qtbmv.$(PSUFFIX) : tbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctbmv.$(SUFFIX) ctbmv.$(PSUFFIX) : ztbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztbmv.$(SUFFIX) ztbmv.$(PSUFFIX) : ztbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtbmv.$(SUFFIX) xtbmv.$(PSUFFIX) : ztbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +stbsv.$(SUFFIX) stbsv.$(PSUFFIX) : tbsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtbsv.$(SUFFIX) dtbsv.$(PSUFFIX) : tbsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtbsv.$(SUFFIX) qtbsv.$(PSUFFIX) : tbsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctbsv.$(SUFFIX) ctbsv.$(PSUFFIX) : ztbsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztbsv.$(SUFFIX) ztbsv.$(PSUFFIX) : ztbsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtbsv.$(SUFFIX) xtbsv.$(PSUFFIX) : ztbsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +stpsv.$(SUFFIX) stpsv.$(PSUFFIX) : tpsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtpsv.$(SUFFIX) dtpsv.$(PSUFFIX) : tpsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtpsv.$(SUFFIX) qtpsv.$(PSUFFIX) : tpsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctpsv.$(SUFFIX) ctpsv.$(PSUFFIX) : ztpsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztpsv.$(SUFFIX) ztpsv.$(PSUFFIX) : ztpsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtpsv.$(SUFFIX) xtpsv.$(PSUFFIX) : ztpsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +stpmv.$(SUFFIX) stpmv.$(PSUFFIX) : tpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtpmv.$(SUFFIX) dtpmv.$(PSUFFIX) : tpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtpmv.$(SUFFIX) qtpmv.$(PSUFFIX) : tpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctpmv.$(SUFFIX) ctpmv.$(PSUFFIX) : ztpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztpmv.$(SUFFIX) ztpmv.$(PSUFFIX) : ztpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtpmv.$(SUFFIX) xtpmv.$(PSUFFIX) : ztpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +chemv.$(SUFFIX) chemv.$(PSUFFIX) : zhemv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zhemv.$(SUFFIX) zhemv.$(PSUFFIX) : zhemv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xhemv.$(SUFFIX) xhemv.$(PSUFFIX) : zhemv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +chbmv.$(SUFFIX) chbmv.$(PSUFFIX) : zhbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zhbmv.$(SUFFIX) zhbmv.$(PSUFFIX) : zhbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xhbmv.$(SUFFIX) xhbmv.$(PSUFFIX) : zhbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cher.$(SUFFIX) cher.$(PSUFFIX) : zher.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zher.$(SUFFIX) zher.$(PSUFFIX) : zher.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xher.$(SUFFIX) xher.$(PSUFFIX) : zher.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cher2.$(SUFFIX) cher2.$(PSUFFIX) : zher2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zher2.$(SUFFIX) zher2.$(PSUFFIX) : zher2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xher2.$(SUFFIX) xher2.$(PSUFFIX) : zher2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +chpmv.$(SUFFIX) chpmv.$(PSUFFIX) : zhpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zhpmv.$(SUFFIX) zhpmv.$(PSUFFIX) : zhpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xhpmv.$(SUFFIX) xhpmv.$(PSUFFIX) : zhpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +chpr.$(SUFFIX) chpr.$(PSUFFIX) : zhpr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zhpr.$(SUFFIX) zhpr.$(PSUFFIX) : zhpr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xhpr.$(SUFFIX) xhpr.$(PSUFFIX) : zhpr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +chpr2.$(SUFFIX) chpr2.$(PSUFFIX) : zhpr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zhpr2.$(SUFFIX) zhpr2.$(PSUFFIX) : zhpr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ifeq ($(BUILD_BFLOAT16),1) +sbgemm.$(SUFFIX) sbgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -c $(CFLAGS) $< -o $(@F) +endif + +sgemm.$(SUFFIX) sgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -c $(CFLAGS) $< -o $(@F) + +dgemm.$(SUFFIX) dgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -c $(CFLAGS) $< -o $(@F) + +qgemm.$(SUFFIX) qgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -c $(CFLAGS) $< -o $(@F) + +cgemm.$(SUFFIX) cgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -c $(CFLAGS) $< -o $(@F) + +zgemm.$(SUFFIX) zgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -c $(CFLAGS) $< -o $(@F) + +xgemm.$(SUFFIX) xgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -c $(CFLAGS) $< -o $(@F) + +ssymm.$(SUFFIX) ssymm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dsymm.$(SUFFIX) dsymm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qsymm.$(SUFFIX) qsymm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +csymm.$(SUFFIX) csymm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zsymm.$(SUFFIX) zsymm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xsymm.$(SUFFIX) xsymm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +strmm.$(SUFFIX) strmm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) + +dtrmm.$(SUFFIX) dtrmm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) + +qtrmm.$(SUFFIX) qtrmm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) + +ctrmm.$(SUFFIX) ctrmm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) + +ztrmm.$(SUFFIX) ztrmm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) + +xtrmm.$(SUFFIX) xtrmm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) + +strsm.$(SUFFIX) strsm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtrsm.$(SUFFIX) dtrsm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtrsm.$(SUFFIX) qtrsm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctrsm.$(SUFFIX) ctrsm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztrsm.$(SUFFIX) ztrsm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtrsm.$(SUFFIX) xtrsm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ssyrk.$(SUFFIX) ssyrk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dsyrk.$(SUFFIX) dsyrk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qsyrk.$(SUFFIX) qsyrk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +csyrk.$(SUFFIX) csyrk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zsyrk.$(SUFFIX) zsyrk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xsyrk.$(SUFFIX) xsyrk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ssyr2k.$(SUFFIX) ssyr2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dsyr2k.$(SUFFIX) dsyr2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qsyr2k.$(SUFFIX) qsyr2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +csyr2k.$(SUFFIX) csyr2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zsyr2k.$(SUFFIX) zsyr2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xsyr2k.$(SUFFIX) xsyr2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +chemm.$(SUFFIX) chemm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +zhemm.$(SUFFIX) zhemm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +xhemm.$(SUFFIX) xhemm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +cherk.$(SUFFIX) cherk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +zherk.$(SUFFIX) zherk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +xherk.$(SUFFIX) xherk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +cher2k.$(SUFFIX) cher2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +zher2k.$(SUFFIX) zher2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +xher2k.$(SUFFIX) xher2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +cgemm3m.$(SUFFIX) cgemm3m.$(PSUFFIX) : gemm.c + $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) + +zgemm3m.$(SUFFIX) zgemm3m.$(PSUFFIX) : gemm.c + $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) + +xgemm3m.$(SUFFIX) xgemm3m.$(PSUFFIX) : gemm.c + $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) + +csymm3m.$(SUFFIX) csymm3m.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) + +zsymm3m.$(SUFFIX) zsymm3m.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) + +xsymm3m.$(SUFFIX) xsymm3m.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) + +chemm3m.$(SUFFIX) chemm3m.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DGEMM3M -DHEMM $< -o $(@F) + +zhemm3m.$(SUFFIX) zhemm3m.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DGEMM3M -DHEMM $< -o $(@F) + +xhemm3m.$(SUFFIX) xhemm3m.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DGEMM3M -DHEMM $< -o $(@F) + +cblas_isamax.$(SUFFIX) cblas_isamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +cblas_idamax.$(SUFFIX) cblas_idamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +cblas_icamax.$(SUFFIX) cblas_icamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +cblas_izamax.$(SUFFIX) cblas_izamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +cblas_isamin.$(SUFFIX) cblas_isamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +cblas_idamin.$(SUFFIX) cblas_idamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +cblas_icamin.$(SUFFIX) cblas_icamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +cblas_izamin.$(SUFFIX) cblas_izamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +cblas_ismax.$(SUFFIX) cblas_ismax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +cblas_idmax.$(SUFFIX) cblas_idmax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +cblas_ismin.$(SUFFIX) cblas_ismin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +cblas_idmin.$(SUFFIX) cblas_idmin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +cblas_icmax.$(SUFFIX) cblas_icmax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +cblas_izmax.$(SUFFIX) cblas_izmax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +cblas_icmin.$(SUFFIX) cblas_icmin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +cblas_izmin.$(SUFFIX) cblas_izmin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +cblas_sasum.$(SUFFIX) cblas_sasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dasum.$(SUFFIX) cblas_dasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_scasum.$(SUFFIX) cblas_scasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dzasum.$(SUFFIX) cblas_dzasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_ssum.$(SUFFIX) cblas_ssum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dsum.$(SUFFIX) cblas_dsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_scsum.$(SUFFIX) cblas_scsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dzsum.$(SUFFIX) cblas_dzsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_sdsdot.$(SUFFIX) cblas_sdsdot.$(PSUFFIX) : sdsdot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dsdot.$(SUFFIX) cblas_dsdot.$(PSUFFIX) : dsdot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +ifeq ($(BUILD_BFLOAT16),1) +cblas_sbdot.$(SUFFIX) cblas_sbdot.$(PSUFFIX) : bf16dot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) +cblas_sbstobf16.$(SUFFIX) cblas_sbstobf16.$(PSUFFIX) : tobf16.c + $(CC) $(CFLAGS) -DCBLAS -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) +cblas_sbdtobf16.$(SUFFIX) cblas_sbdtobf16.$(PSUFFIX) : tobf16.c + $(CC) $(CFLAGS) -DCBLAS -USINGLE_PREC -DDOUBLE_PREC -c $< -o $(@F) +cblas_sbf16tos.$(SUFFIX) cblas_sbf16tos.$(PSUFFIX) : bf16to.c + $(CC) $(CFLAGS) -DCBLAS -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) +cblas_dbf16tod.$(SUFFIX) cblas_dbf16tod.$(PSUFFIX) : bf16to.c + $(CC) $(CFLAGS) -DCBLAS -USINGLE_PREC -DDOUBLE_PREC -c $< -o $(@F) +endif + +cblas_sdot.$(SUFFIX) cblas_sdot.$(PSUFFIX) : dot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_ddot.$(SUFFIX) cblas_ddot.$(PSUFFIX) : dot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_cdotu.$(SUFFIX) cblas_cdotu.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -c -UCONJ $< -o $(@F) + +cblas_cdotc.$(SUFFIX) cblas_cdotc.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F) + +cblas_zdotu.$(SUFFIX) cblas_zdotu.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -c -UCONJ $< -o $(@F) + +cblas_zdotc.$(SUFFIX) cblas_zdotc.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F) + +cblas_cdotu_sub.$(SUFFIX) cblas_cdotu_sub.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -DFORCE_USE_STACK -c -UCONJ $< -o $(@F) + +cblas_cdotc_sub.$(SUFFIX) cblas_cdotc_sub.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -DFORCE_USE_STACK -c -DCONJ $< -o $(@F) + +cblas_zdotu_sub.$(SUFFIX) cblas_zdotu_sub.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -DFORCE_USE_STACK -c -UCONJ $< -o $(@F) + +cblas_zdotc_sub.$(SUFFIX) cblas_zdotc_sub.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -DFORCE_USE_STACK -c -DCONJ $< -o $(@F) + +cblas_snrm2.$(SUFFIX) cblas_snrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dnrm2.$(SUFFIX) cblas_dnrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_scnrm2.$(SUFFIX) cblas_scnrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dznrm2.$(SUFFIX) cblas_dznrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_saxpy.$(SUFFIX) cblas_saxpy.$(PSUFFIX) : axpy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_zaxpy.$(SUFFIX) cblas_zaxpy.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_scopy.$(SUFFIX) cblas_scopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dcopy.$(SUFFIX) cblas_dcopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_ccopy.$(SUFFIX) cblas_ccopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_zcopy.$(SUFFIX) cblas_zcopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_sswap.$(SUFFIX) cblas_sswap.$(PSUFFIX) : swap.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dswap.$(SUFFIX) cblas_dswap.$(PSUFFIX) : swap.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_cswap.$(SUFFIX) cblas_cswap.$(PSUFFIX) : zswap.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_zswap.$(SUFFIX) cblas_zswap.$(PSUFFIX) : zswap.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_srot.$(SUFFIX) cblas_srot.$(PSUFFIX) : rot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_drot.$(SUFFIX) cblas_drot.$(PSUFFIX) : rot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_srotg.$(SUFFIX) cblas_srotg.$(PSUFFIX): rotg.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_drotg.$(SUFFIX) cblas_drotg.$(PSUFFIX): rotg.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_crotg.$(SUFFIX) crotg.$(PSUFFIX): zrotg.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +cblas_zrotg.$(SUFFIX) zrotg.$(PSUFFIX): zrotg.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +cblas_srotm.$(SUFFIX) cblas_srotm.$(PSUFFIX): rotm.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_drotm.$(SUFFIX) cblas_drotm.$(PSUFFIX): rotm.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_srotmg.$(SUFFIX) cblas_srotmg.$(PSUFFIX): rotmg.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_drotmg.$(SUFFIX) cblas_drotmg.$(PSUFFIX): rotmg.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_sscal.$(SUFFIX) cblas_sscal.$(PSUFFIX) : scal.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dscal.$(SUFFIX) cblas_dscal.$(PSUFFIX) : scal.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_cscal.$(SUFFIX) cblas_cscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_zscal.$(SUFFIX) cblas_zscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_csscal.$(SUFFIX) cblas_csscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -DCBLAS -c -DSSCAL $< -o $(@F) + +cblas_zdscal.$(SUFFIX) cblas_zdscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -DCBLAS -c -DSSCAL $< -o $(@F) + +cblas_csrot.$(SUFFIX) cblas_csrot.$(PSUFFIX) : zrot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_zdrot.$(SUFFIX) cblas_zdrot.$(PSUFFIX) : zrot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +ifeq ($(BUILD_BFLOAT16),1) +cblas_sbgemv.$(SUFFIX) cblas_sbgemv.$(PSUFFIX) : sbgemv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) +endif + +cblas_sgemv.$(SUFFIX) cblas_sgemv.$(PSUFFIX): gemv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_dgemv.$(SUFFIX) cblas_dgemv.$(PSUFFIX): gemv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_cgemv.$(SUFFIX) cblas_cgemv.$(PSUFFIX): zgemv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_zgemv.$(SUFFIX) cblas_zgemv.$(PSUFFIX): zgemv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_sger.$(SUFFIX) cblas_sger.$(PSUFFIX) : ger.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dger.$(SUFFIX) cblas_dger.$(PSUFFIX) : ger.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_cgeru.$(SUFFIX) cblas_cgeru.$(PSUFFIX) : zger.c + $(CC) -DCBLAS -c $(CFLAGS) -UCONJ $< -o $(@F) + +cblas_cgerc.$(SUFFIX) cblas_cgerc.$(PSUFFIX) : zger.c + $(CC) -DCBLAS -c $(CFLAGS) -DCONJ $< -o $(@F) + +cblas_zgeru.$(SUFFIX) cblas_zgeru.$(PSUFFIX) : zger.c + $(CC) -DCBLAS -c $(CFLAGS) -UCONJ $< -o $(@F) + +cblas_zgerc.$(SUFFIX) cblas_zgerc.$(PSUFFIX) : zger.c + $(CC) -DCBLAS -c $(CFLAGS) -DCONJ $< -o $(@F) + +cblas_strsv.$(SUFFIX) cblas_strsv.$(PSUFFIX) : trsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dtrsv.$(SUFFIX) cblas_dtrsv.$(PSUFFIX) : trsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ctrsv.$(SUFFIX) cblas_ctrsv.$(PSUFFIX) : ztrsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ztrsv.$(SUFFIX) cblas_ztrsv.$(PSUFFIX) : ztrsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_strmv.$(SUFFIX) cblas_strmv.$(PSUFFIX) : trmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dtrmv.$(SUFFIX) cblas_dtrmv.$(PSUFFIX) : trmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ctrmv.$(SUFFIX) cblas_ctrmv.$(PSUFFIX) : ztrmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ztrmv.$(SUFFIX) cblas_ztrmv.$(PSUFFIX) : ztrmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ssyr.$(SUFFIX) cblas_ssyr.$(PSUFFIX) : syr.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dsyr.$(SUFFIX) cblas_dsyr.$(PSUFFIX) : syr.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_cher.$(SUFFIX) cblas_cher.$(PSUFFIX) : zher.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zher.$(SUFFIX) cblas_zher.$(PSUFFIX) : zher.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ssyr2.$(SUFFIX) cblas_ssyr2.$(PSUFFIX) : syr2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dsyr2.$(SUFFIX) cblas_dsyr2.$(PSUFFIX) : syr2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_cher2.$(SUFFIX) cblas_cher2.$(PSUFFIX) : zher2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zher2.$(SUFFIX) cblas_zher2.$(PSUFFIX) : zher2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_sgbmv.$(SUFFIX) cblas_sgbmv.$(PSUFFIX): gbmv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_dgbmv.$(SUFFIX) cblas_dgbmv.$(PSUFFIX): gbmv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_cgbmv.$(SUFFIX) cblas_cgbmv.$(PSUFFIX): zgbmv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_zgbmv.$(SUFFIX) cblas_zgbmv.$(PSUFFIX): zgbmv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_ssbmv.$(SUFFIX) cblas_ssbmv.$(PSUFFIX) : sbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dsbmv.$(SUFFIX) cblas_dsbmv.$(PSUFFIX) : sbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_chbmv.$(SUFFIX) cblas_chbmv.$(PSUFFIX) : zhbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zhbmv.$(SUFFIX) cblas_zhbmv.$(PSUFFIX) : zhbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_sspmv.$(SUFFIX) cblas_sspmv.$(PSUFFIX) : spmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dspmv.$(SUFFIX) cblas_dspmv.$(PSUFFIX) : spmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_sspr.$(SUFFIX) cblas_sspr.$(PSUFFIX) : spr.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dspr.$(SUFFIX) cblas_dspr.$(PSUFFIX) : spr.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_chpr.$(SUFFIX) cblas_chpr.$(PSUFFIX) : zhpr.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zhpr.$(SUFFIX) cblas_zhpr.$(PSUFFIX) : zhpr.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_sspr2.$(SUFFIX) cblas_sspr2.$(PSUFFIX) : spr2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dspr2.$(SUFFIX) cblas_dspr2.$(PSUFFIX) : spr2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_chpr2.$(SUFFIX) cblas_chpr2.$(PSUFFIX) : zhpr2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zhpr2.$(SUFFIX) cblas_zhpr2.$(PSUFFIX) : zhpr2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_stbmv.$(SUFFIX) cblas_stbmv.$(PSUFFIX) : tbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dtbmv.$(SUFFIX) cblas_dtbmv.$(PSUFFIX) : tbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ctbmv.$(SUFFIX) cblas_ctbmv.$(PSUFFIX) : ztbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ztbmv.$(SUFFIX) cblas_ztbmv.$(PSUFFIX) : ztbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_stbsv.$(SUFFIX) cblas_stbsv.$(PSUFFIX) : tbsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dtbsv.$(SUFFIX) cblas_dtbsv.$(PSUFFIX) : tbsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ctbsv.$(SUFFIX) cblas_ctbsv.$(PSUFFIX) : ztbsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ztbsv.$(SUFFIX) cblas_ztbsv.$(PSUFFIX) : ztbsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_stpmv.$(SUFFIX) cblas_stpmv.$(PSUFFIX) : tpmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dtpmv.$(SUFFIX) cblas_dtpmv.$(PSUFFIX) : tpmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ctpmv.$(SUFFIX) cblas_ctpmv.$(PSUFFIX) : ztpmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ztpmv.$(SUFFIX) cblas_ztpmv.$(PSUFFIX) : ztpmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_chpmv.$(SUFFIX) cblas_chpmv.$(PSUFFIX) : zhpmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zhpmv.$(SUFFIX) cblas_zhpmv.$(PSUFFIX) : zhpmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_stpsv.$(SUFFIX) cblas_stpsv.$(PSUFFIX) : tpsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dtpsv.$(SUFFIX) cblas_dtpsv.$(PSUFFIX) : tpsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ctpsv.$(SUFFIX) cblas_ctpsv.$(PSUFFIX) : ztpsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ztpsv.$(SUFFIX) cblas_ztpsv.$(PSUFFIX) : ztpsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ssymv.$(SUFFIX) cblas_ssymv.$(PSUFFIX) : symv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dsymv.$(SUFFIX) cblas_dsymv.$(PSUFFIX) : symv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_chemv.$(SUFFIX) cblas_chemv.$(PSUFFIX) : zhemv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zhemv.$(SUFFIX) cblas_zhemv.$(PSUFFIX) : zhemv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_sgemm.$(SUFFIX) cblas_sgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +ifeq ($(BUILD_BFLOAT16),1) +cblas_sbgemm.$(SUFFIX) cblas_sbgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) +endif + +cblas_dgemm.$(SUFFIX) cblas_dgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_cgemm.$(SUFFIX) cblas_cgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zgemm.$(SUFFIX) cblas_zgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ssymm.$(SUFFIX) cblas_ssymm.$(PSUFFIX) : symm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dsymm.$(SUFFIX) cblas_dsymm.$(PSUFFIX) : symm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_csymm.$(SUFFIX) cblas_csymm.$(PSUFFIX) : symm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zsymm.$(SUFFIX) cblas_zsymm.$(PSUFFIX) : symm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ssyrk.$(SUFFIX) cblas_ssyrk.$(PSUFFIX) : syrk.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dsyrk.$(SUFFIX) cblas_dsyrk.$(PSUFFIX) : syrk.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_csyrk.$(SUFFIX) cblas_csyrk.$(PSUFFIX) : syrk.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zsyrk.$(SUFFIX) cblas_zsyrk.$(PSUFFIX) : syrk.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ssyr2k.$(SUFFIX) cblas_ssyr2k.$(PSUFFIX) : syr2k.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dsyr2k.$(SUFFIX) cblas_dsyr2k.$(PSUFFIX) : syr2k.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_csyr2k.$(SUFFIX) cblas_csyr2k.$(PSUFFIX) : syr2k.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zsyr2k.$(SUFFIX) cblas_zsyr2k.$(PSUFFIX) : syr2k.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_strmm.$(SUFFIX) cblas_strmm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) -DTRMM $< -o $(@F) + +cblas_dtrmm.$(SUFFIX) cblas_dtrmm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) -DTRMM $< -o $(@F) + +cblas_ctrmm.$(SUFFIX) cblas_ctrmm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) -DTRMM $< -o $(@F) + +cblas_ztrmm.$(SUFFIX) cblas_ztrmm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) -DTRMM $< -o $(@F) + +cblas_strsm.$(SUFFIX) cblas_strsm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dtrsm.$(SUFFIX) cblas_dtrsm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ctrsm.$(SUFFIX) cblas_ctrsm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ztrsm.$(SUFFIX) cblas_ztrsm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_chemm.$(SUFFIX) cblas_chemm.$(PSUFFIX) : symm.c + $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) + +cblas_zhemm.$(SUFFIX) cblas_zhemm.$(PSUFFIX) : symm.c + $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) + +cblas_cherk.$(SUFFIX) cblas_cherk.$(PSUFFIX) : syrk.c + $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) + +cblas_zherk.$(SUFFIX) cblas_zherk.$(PSUFFIX) : syrk.c + $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) + +cblas_cher2k.$(SUFFIX) cblas_cher2k.$(PSUFFIX) : syr2k.c + $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) + +cblas_zher2k.$(SUFFIX) cblas_zher2k.$(PSUFFIX) : syr2k.c + $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) + +cblas_cgemm3m.$(SUFFIX) cblas_cgemm3m.$(PSUFFIX) : gemm.c + $(CC) -DCBLAS -c $(CFLAGS) -DGEMM3M $< -o $(@F) + +cblas_zgemm3m.$(SUFFIX) cblas_zgemm3m.$(PSUFFIX) : gemm.c + $(CC) -DCBLAS -c $(CFLAGS) -DGEMM3M $< -o $(@F) + + +sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : lapack/getf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : lapack/getf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qgetf2.$(SUFFIX) qgetf2.$(PSUFFIX) : getf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : lapack/zgetf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : lapack/zgetf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xgetf2.$(SUFFIX) xgetf2.$(PSUFFIX) : zgetf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : lapack/getrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : lapack/getrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qgetrf.$(SUFFIX) qgetrf.$(PSUFFIX) : getrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : lapack/zgetrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : lapack/zgetrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xgetrf.$(SUFFIX) xgetrf.$(PSUFFIX) : zgetrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +slauu2.$(SUFFIX) slauu2.$(PSUFFIX) : lapack/lauu2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dlauu2.$(SUFFIX) dlauu2.$(PSUFFIX) : lapack/lauu2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qlauu2.$(SUFFIX) qlauu2.$(PSUFFIX) : lauu2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +clauu2.$(SUFFIX) clauu2.$(PSUFFIX) : lapack/zlauu2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zlauu2.$(SUFFIX) zlauu2.$(PSUFFIX) : lapack/zlauu2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xlauu2.$(SUFFIX) xlauu2.$(PSUFFIX) : zlauu2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +slauum.$(SUFFIX) slauum.$(PSUFFIX) : lapack/lauum.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dlauum.$(SUFFIX) dlauum.$(PSUFFIX) : lapack/lauum.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qlauum.$(SUFFIX) qlauum.$(PSUFFIX) : lauum.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +clauum.$(SUFFIX) clauum.$(PSUFFIX) : lapack/zlauum.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : lapack/zlauum.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xlauum.$(SUFFIX) xlauum.$(PSUFFIX) : zlauum.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : lapack/potf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : lapack/potf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qpotf2.$(SUFFIX) qpotf2.$(PSUFFIX) : potf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : lapack/zpotf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : lapack/zpotf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xpotf2.$(SUFFIX) xpotf2.$(PSUFFIX) : zpotf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : lapack/potrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : lapack/potrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qpotrf.$(SUFFIX) qpotrf.$(PSUFFIX) : potrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : lapack/zpotrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : lapack/zpotrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xpotrf.$(SUFFIX) xpotrf.$(PSUFFIX) : zpotrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +strti2.$(SUFFIX) strti2.$(PSUFFIX) : lapack/trti2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtrti2.$(SUFFIX) dtrti2.$(PSUFFIX) : lapack/trti2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtrti2.$(SUFFIX) qtrti2.$(PSUFFIX) : trti2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctrti2.$(SUFFIX) ctrti2.$(PSUFFIX) : lapack/ztrti2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztrti2.$(SUFFIX) ztrti2.$(PSUFFIX) : lapack/ztrti2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtrti2.$(SUFFIX) xtrti2.$(PSUFFIX) : ztrti2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +strtri.$(SUFFIX) strtri.$(PSUFFIX) : lapack/trtri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtrtri.$(SUFFIX) dtrtri.$(PSUFFIX) : lapack/trtri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtrtri.$(SUFFIX) qtrtri.$(PSUFFIX) : trtri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctrtri.$(SUFFIX) ctrtri.$(PSUFFIX) : lapack/ztrtri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : lapack/ztrtri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtrtri.$(SUFFIX) xtrtri.$(PSUFFIX) : ztrtri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : lapack/laswp.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : lapack/laswp.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qlaswp.$(SUFFIX) qlaswp.$(PSUFFIX) : laswp.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +claswp.$(SUFFIX) claswp.$(PSUFFIX) : lapack/zlaswp.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : lapack/zlaswp.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xlaswp.$(SUFFIX) xlaswp.$(PSUFFIX) : zlaswp.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : lapack/getrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : lapack/getrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qgetrs.$(SUFFIX) qgetrs.$(PSUFFIX) : lapack/getrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : lapack/zgetrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : lapack/zgetrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : lapack/zgetrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +strtrs.$(SUFFIX) strtrs.$(PSUFFIX) : lapack/trtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtrtrs.$(SUFFIX) dtrtrs.$(PSUFFIX) : lapack/trtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtrtrs.$(SUFFIX) qtrtrs.$(PSUFFIX) : lapack/trtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctrtrs.$(SUFFIX) ctrtrs.$(PSUFFIX) : lapack/ztrtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztrtrs.$(SUFFIX) ztrtrs.$(PSUFFIX) : lapack/ztrtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtrtrs.$(SUFFIX) xtrtrs.$(PSUFFIX) : lapack/ztrtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : lapack/gesv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : lapack/gesv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qgesv.$(SUFFIX) qgesv.$(PSUFFIX) : gesv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : lapack/gesv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : lapack/gesv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xgesv.$(SUFFIX) xgesv.$(PSUFFIX) : gesv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +spotri.$(SUFFIX) spotri.$(PSUFFIX) : lapack/potri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dpotri.$(SUFFIX) dpotri.$(PSUFFIX) : lapack/potri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qpotri.$(SUFFIX) qpotri.$(PSUFFIX) : potri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cpotri.$(SUFFIX) cpotri.$(PSUFFIX) : lapack/zpotri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zpotri.$(SUFFIX) zpotri.$(PSUFFIX) : lapack/zpotri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xpotri.$(SUFFIX) xpotri.$(PSUFFIX) : zpotri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +slarf.$(SUFFIX) slarf.$(PSUFFIX) : larf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dlarf.$(SUFFIX) dlarf.$(PSUFFIX) : larf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qlarf.$(SUFFIX) qlarf.$(PSUFFIX) : larf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +clarf.$(SUFFIX) clarf.$(PSUFFIX) : larf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zlarf.$(SUFFIX) zlarf.$(PSUFFIX) : larf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xlarf.$(SUFFIX) xlarf.$(PSUFFIX) : larf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + + +############# BLAS EXTENSIONS ##################################### + +daxpby.$(SUFFIX) daxpby.$(PSUFFIX) : axpby.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +cblas_daxpby.$(SUFFIX) cblas_daxpby.$(PSUFFIX) : axpby.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +saxpby.$(SUFFIX) saxpby.$(PSUFFIX) : axpby.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +cblas_saxpby.$(SUFFIX) cblas_saxpby.$(PSUFFIX) : axpby.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +zaxpby.$(SUFFIX) zaxpby.$(PSUFFIX) : zaxpby.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +cblas_zaxpby.$(SUFFIX) cblas_zaxpby.$(PSUFFIX) : zaxpby.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +caxpby.$(SUFFIX) caxpby.$(PSUFFIX) : zaxpby.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +cblas_caxpby.$(SUFFIX) cblas_caxpby.$(PSUFFIX) : zaxpby.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +domatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : omatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cblas_domatcopy.$(SUFFIX) cblas_domatcopy.$(PSUFFIX) : omatcopy.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +somatcopy.$(SUFFIX) somatcopy.$(PSUFFIX) : omatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cblas_somatcopy.$(SUFFIX) cblas_somatcopy.$(PSUFFIX) : omatcopy.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +comatcopy.$(SUFFIX) comatcopy.$(PSUFFIX) : zomatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cblas_comatcopy.$(SUFFIX) cblas_comatcopy.$(PSUFFIX) : zomatcopy.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +zomatcopy.$(SUFFIX) zomatcopy.$(PSUFFIX) : zomatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cblas_zomatcopy.$(SUFFIX) cblas_zomatcopy.$(PSUFFIX) : zomatcopy.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +dimatcopy.$(SUFFIX) dimatcopy.$(PSUFFIX) : imatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cblas_dimatcopy.$(SUFFIX) cblas_dimatcopy.$(PSUFFIX) : imatcopy.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +simatcopy.$(SUFFIX) simatcopy.$(PSUFFIX) : imatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cblas_simatcopy.$(SUFFIX) cblas_simatcopy.$(PSUFFIX) : imatcopy.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +cimatcopy.$(SUFFIX) cimatcopy.$(PSUFFIX) : zimatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cblas_cimatcopy.$(SUFFIX) cblas_cimatcopy.$(PSUFFIX) : zimatcopy.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +zimatcopy.$(SUFFIX) zimatcopy.$(PSUFFIX) : zimatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cblas_zimatcopy.$(SUFFIX) cblas_zimatcopy.$(PSUFFIX) : zimatcopy.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +sgeadd.$(SUFFIX) sgeadd.$(PSUFFIX) : geadd.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dgeadd.$(SUFFIX) dgeadd.$(PSUFFIX) : geadd.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cgeadd.$(SUFFIX) cgeadd.$(PSUFFIX) : zgeadd.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zgeadd.$(SUFFIX) zgeadd.$(PSUFFIX) : zgeadd.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cblas_sgeadd.$(SUFFIX) cblas_sgeadd.$(PSUFFIX) : geadd.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +cblas_dgeadd.$(SUFFIX) cblas_dgeadd.$(PSUFFIX) : geadd.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +cblas_cgeadd.$(SUFFIX) cblas_cgeadd.$(PSUFFIX) : zgeadd.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +cblas_zgeadd.$(SUFFIX) cblas_zgeadd.$(PSUFFIX) : zgeadd.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +cblas_xerbla.$(SUFFIX) cblas_xerbla.$(PSUFFIX) : xerbla.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + From bc5b35367fa8e8ba7b949641f95d308540c3920b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 13 Jan 2021 00:28:43 +0100 Subject: [PATCH 044/134] restore Makefile after accidental overwrite --- Makefile | 2693 ++++++++---------------------------------------------- 1 file changed, 379 insertions(+), 2314 deletions(-) diff --git a/Makefile b/Makefile index fab403c82..de0735c4a 100644 --- a/Makefile +++ b/Makefile @@ -1,2337 +1,402 @@ -TOPDIR = .. -include $(TOPDIR)/Makefile.system +TOPDIR = . +include ./Makefile.system -SUPPORT_GEMM3M = 0 +BLASDIRS = interface driver/level2 driver/level3 driver/others -ifeq ($(ARCH), x86) -SUPPORT_GEMM3M = 1 +ifneq ($(DYNAMIC_ARCH), 1) +BLASDIRS += kernel endif -ifeq ($(ARCH), x86_64) -SUPPORT_GEMM3M = 1 +ifdef SANITY_CHECK +BLASDIRS += reference endif -ifeq ($(ARCH), ia64) -SUPPORT_GEMM3M = 1 -endif - -ifeq ($(ARCH), MIPS) -SUPPORT_GEMM3M = 1 -endif - -ifneq ($(NO_FBLAS), 1) - -SBLAS1OBJS = \ - saxpy.$(SUFFIX) sswap.$(SUFFIX) \ - scopy.$(SUFFIX) sscal.$(SUFFIX) \ - sdot.$(SUFFIX) sdsdot.$(SUFFIX) dsdot.$(SUFFIX) \ - sasum.$(SUFFIX) ssum.$(SUFFIX) snrm2.$(SUFFIX) \ - smax.$(SUFFIX) samax.$(SUFFIX) ismax.$(SUFFIX) isamax.$(SUFFIX) \ - smin.$(SUFFIX) samin.$(SUFFIX) ismin.$(SUFFIX) isamin.$(SUFFIX) \ - srot.$(SUFFIX) srotg.$(SUFFIX) srotm.$(SUFFIX) srotmg.$(SUFFIX) \ - saxpby.$(SUFFIX) - -SBLAS2OBJS = \ - sgemv.$(SUFFIX) sger.$(SUFFIX) \ - strsv.$(SUFFIX) strmv.$(SUFFIX) ssymv.$(SUFFIX) \ - ssyr.$(SUFFIX) ssyr2.$(SUFFIX) sgbmv.$(SUFFIX) \ - ssbmv.$(SUFFIX) sspmv.$(SUFFIX) \ - sspr.$(SUFFIX) sspr2.$(SUFFIX) \ - stbsv.$(SUFFIX) stbmv.$(SUFFIX) \ - stpsv.$(SUFFIX) stpmv.$(SUFFIX) - -SBLAS3OBJS = \ - sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \ - strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) \ - somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\ - sgeadd.$(SUFFIX) - -ifeq ($(BUILD_BFLOAT16),1) -SBBLAS1OBJS = sbdot.$(SUFFIX) -SBBLAS2OBJS = sbgemv.$(SUFFIX) -SBBLAS3OBJS = sbgemm.$(SUFFIX) -SBEXTOBJS = sbstobf16.$(SUFFIX) sbdtobf16.$(SUFFIX) sbf16tos.$(SUFFIX) dbf16tod.$(SUFFIX) -endif - -DBLAS1OBJS = \ - daxpy.$(SUFFIX) dswap.$(SUFFIX) \ - dcopy.$(SUFFIX) dscal.$(SUFFIX) \ - ddot.$(SUFFIX) \ - dasum.$(SUFFIX) dsum.$(SUFFIX) dnrm2.$(SUFFIX) \ - dmax.$(SUFFIX) damax.$(SUFFIX) idmax.$(SUFFIX) idamax.$(SUFFIX) \ - dmin.$(SUFFIX) damin.$(SUFFIX) idmin.$(SUFFIX) idamin.$(SUFFIX) \ - drot.$(SUFFIX) drotg.$(SUFFIX) drotm.$(SUFFIX) drotmg.$(SUFFIX) \ - daxpby.$(SUFFIX) - -DBLAS2OBJS = \ - dgemv.$(SUFFIX) dger.$(SUFFIX) \ - dtrsv.$(SUFFIX) dtrmv.$(SUFFIX) dsymv.$(SUFFIX) \ - dsyr.$(SUFFIX) dsyr2.$(SUFFIX) dgbmv.$(SUFFIX) \ - dsbmv.$(SUFFIX) dspmv.$(SUFFIX) \ - dspr.$(SUFFIX) dspr2.$(SUFFIX) \ - dtbsv.$(SUFFIX) dtbmv.$(SUFFIX) \ - dtpsv.$(SUFFIX) dtpmv.$(SUFFIX) - -DBLAS3OBJS = \ - dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \ - dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \ - domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX)\ - dgeadd.$(SUFFIX) - -CBLAS1OBJS = \ - caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \ - ccopy.$(SUFFIX) cscal.$(SUFFIX) csscal.$(SUFFIX) \ - cdotc.$(SUFFIX) cdotu.$(SUFFIX) \ - scasum.$(SUFFIX) scsum.$(SUFFIX) scnrm2.$(SUFFIX) \ - scamax.$(SUFFIX) icamax.$(SUFFIX) \ - scamin.$(SUFFIX) icamin.$(SUFFIX) \ - csrot.$(SUFFIX) crotg.$(SUFFIX) \ - caxpby.$(SUFFIX) - -CBLAS2OBJS = \ - cgemv.$(SUFFIX) cgeru.$(SUFFIX) cgerc.$(SUFFIX) \ - ctrsv.$(SUFFIX) ctrmv.$(SUFFIX) \ - csyr2.$(SUFFIX) cgbmv.$(SUFFIX) \ - csbmv.$(SUFFIX) \ - cspr2.$(SUFFIX) \ - ctbsv.$(SUFFIX) ctbmv.$(SUFFIX) \ - ctpsv.$(SUFFIX) ctpmv.$(SUFFIX) \ - chemv.$(SUFFIX) chbmv.$(SUFFIX) \ - cher.$(SUFFIX) cher2.$(SUFFIX) \ - chpmv.$(SUFFIX) chpr.$(SUFFIX) chpr2.$(SUFFIX) - -CBLAS3OBJS = \ - cgemm.$(SUFFIX) csymm.$(SUFFIX) ctrmm.$(SUFFIX) \ - ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \ - chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \ - comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX)\ - cgeadd.$(SUFFIX) - -ZBLAS1OBJS = \ - zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \ - zcopy.$(SUFFIX) zscal.$(SUFFIX) zdscal.$(SUFFIX) \ - zdotc.$(SUFFIX) zdotu.$(SUFFIX) \ - dzasum.$(SUFFIX) dzsum.$(SUFFIX) dznrm2.$(SUFFIX) \ - dzamax.$(SUFFIX) izamax.$(SUFFIX) \ - dzamin.$(SUFFIX) izamin.$(SUFFIX) \ - zdrot.$(SUFFIX) zrotg.$(SUFFIX) \ - zaxpby.$(SUFFIX) - -ZBLAS2OBJS = \ - zgemv.$(SUFFIX) zgeru.$(SUFFIX) zgerc.$(SUFFIX) \ - ztrsv.$(SUFFIX) ztrmv.$(SUFFIX) \ - zsyr2.$(SUFFIX) zgbmv.$(SUFFIX) \ - zsbmv.$(SUFFIX) \ - zspr2.$(SUFFIX) \ - ztbsv.$(SUFFIX) ztbmv.$(SUFFIX) \ - ztpsv.$(SUFFIX) ztpmv.$(SUFFIX) \ - zhemv.$(SUFFIX) zhbmv.$(SUFFIX) \ - zher.$(SUFFIX) zher2.$(SUFFIX) \ - zhpmv.$(SUFFIX) zhpr.$(SUFFIX) zhpr2.$(SUFFIX) - -ZBLAS3OBJS = \ - zgemm.$(SUFFIX) zsymm.$(SUFFIX) ztrmm.$(SUFFIX) \ - ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \ - zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \ - zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)\ - zgeadd.$(SUFFIX) - -ifeq ($(SUPPORT_GEMM3M), 1) - -# CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX) -CBLAS3OBJS += cgemm3m.$(SUFFIX) - -# ZBLAS3OBJS += zgemm3m.$(SUFFIX) zsymm3m.$(SUFFIX) zhemm3m.$(SUFFIX) -ZBLAS3OBJS += zgemm3m.$(SUFFIX) - -endif - -ifeq ($(EXPRECISION), 1) - -QBLAS1OBJS = \ - qaxpy.$(SUFFIX) qswap.$(SUFFIX) \ - qcopy.$(SUFFIX) qscal.$(SUFFIX) \ - qdot.$(SUFFIX) \ - qasum.$(SUFFIX) qsum.$(SUFFIX) qnrm2.$(SUFFIX) \ - qmax.$(SUFFIX) qamax.$(SUFFIX) iqmax.$(SUFFIX) iqamax.$(SUFFIX) \ - qmin.$(SUFFIX) qamin.$(SUFFIX) iqmin.$(SUFFIX) iqamin.$(SUFFIX) \ - qrot.$(SUFFIX) qrotg.$(SUFFIX) qrotm.$(SUFFIX) qrotmg.$(SUFFIX) \ - -QBLAS2OBJS = \ - qgemv.$(SUFFIX) qger.$(SUFFIX) \ - qtrsv.$(SUFFIX) qtrmv.$(SUFFIX) qsymv.$(SUFFIX) \ - qsyr.$(SUFFIX) qsyr2.$(SUFFIX) qgbmv.$(SUFFIX) \ - qsbmv.$(SUFFIX) qspmv.$(SUFFIX) \ - qspr.$(SUFFIX) qspr2.$(SUFFIX) \ - qtbsv.$(SUFFIX) qtbmv.$(SUFFIX) \ - qtpsv.$(SUFFIX) qtpmv.$(SUFFIX) - -QBLAS3OBJS = \ - qgemm.$(SUFFIX) qsymm.$(SUFFIX) qtrmm.$(SUFFIX) \ - qtrsm.$(SUFFIX) qsyrk.$(SUFFIX) qsyr2k.$(SUFFIX) - -XBLAS1OBJS = \ - xaxpy.$(SUFFIX) xaxpyc.$(SUFFIX) xswap.$(SUFFIX) \ - xcopy.$(SUFFIX) xscal.$(SUFFIX) xqscal.$(SUFFIX) \ - xdotc.$(SUFFIX) xdotu.$(SUFFIX) \ - qxasum.$(SUFFIX) qxsum.$(SUFFIX) qxnrm2.$(SUFFIX) \ - qxamax.$(SUFFIX) ixamax.$(SUFFIX) \ - qxamin.$(SUFFIX) ixamin.$(SUFFIX) \ - xqrot.$(SUFFIX) xrotg.$(SUFFIX) \ - -XBLAS2OBJS = \ - xgemv.$(SUFFIX) xgeru.$(SUFFIX) xgerc.$(SUFFIX) \ - xtrsv.$(SUFFIX) xtrmv.$(SUFFIX) xsymv.$(SUFFIX) \ - xsyr.$(SUFFIX) xsyr2.$(SUFFIX) xgbmv.$(SUFFIX) \ - xsbmv.$(SUFFIX) xspmv.$(SUFFIX) \ - xspr.$(SUFFIX) xspr2.$(SUFFIX) \ - xtbsv.$(SUFFIX) xtbmv.$(SUFFIX) \ - xtpsv.$(SUFFIX) xtpmv.$(SUFFIX) \ - xhemv.$(SUFFIX) xhbmv.$(SUFFIX) \ - xher.$(SUFFIX) xher2.$(SUFFIX) \ - xhpmv.$(SUFFIX) xhpr.$(SUFFIX) xhpr2.$(SUFFIX) - -XBLAS3OBJS = \ - xgemm.$(SUFFIX) xsymm.$(SUFFIX) xtrmm.$(SUFFIX) \ - xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ - xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) - -ifeq ($(SUPPORT_GEMM3M), 1) - -XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) - -endif - -endif - -ifdef QUAD_PRECISION - -QBLAS1OBJS = \ - qaxpy.$(SUFFIX) qswap.$(SUFFIX) \ - qcopy.$(SUFFIX) qscal.$(SUFFIX) \ - qasum.$(SUFFIX) qsum.$(SUFFIX) qnrm2.$(SUFFIX) \ - qmax.$(SUFFIX) qamax.$(SUFFIX) iqmax.$(SUFFIX) iqamax.$(SUFFIX) \ - qmin.$(SUFFIX) qamin.$(SUFFIX) iqmin.$(SUFFIX) iqamin.$(SUFFIX) \ - qrot.$(SUFFIX) qrotg.$(SUFFIX) qrotm.$(SUFFIX) qrotmg.$(SUFFIX) \ - -QBLAS2OBJS = \ - qgemv.$(SUFFIX) qger.$(SUFFIX) \ - qtrsv.$(SUFFIX) qtrmv.$(SUFFIX) qsymv.$(SUFFIX) \ - qsyr.$(SUFFIX) qsyr2.$(SUFFIX) qgbmv.$(SUFFIX) \ - qsbmv.$(SUFFIX) qspmv.$(SUFFIX) \ - qspr.$(SUFFIX) qspr2.$(SUFFIX) \ - qtbsv.$(SUFFIX) qtbmv.$(SUFFIX) \ - qtpsv.$(SUFFIX) qtpmv.$(SUFFIX) - -QBLAS3OBJS = \ - qgemm.$(SUFFIX) qsymm.$(SUFFIX) qtrmm.$(SUFFIX) \ - qtrsm.$(SUFFIX) qsyrk.$(SUFFIX) qsyr2k.$(SUFFIX) - -XBLAS1OBJS = \ - xaxpy.$(SUFFIX) xaxpyc.$(SUFFIX) xswap.$(SUFFIX) \ - xcopy.$(SUFFIX) xscal.$(SUFFIX) xqscal.$(SUFFIX) \ - qxasum.$(SUFFIX) qxsum.$(SUFFIX) qxnrm2.$(SUFFIX) \ - qxamax.$(SUFFIX) ixamax.$(SUFFIX) \ - qxamin.$(SUFFIX) ixamin.$(SUFFIX) \ - xqrot.$(SUFFIX) xrotg.$(SUFFIX) \ - -XBLAS2OBJS = \ - xgemv.$(SUFFIX) xgeru.$(SUFFIX) xgerc.$(SUFFIX) \ - xtrsv.$(SUFFIX) xtrmv.$(SUFFIX) xsymv.$(SUFFIX) \ - xsyr.$(SUFFIX) xsyr2.$(SUFFIX) xgbmv.$(SUFFIX) \ - xsbmv.$(SUFFIX) xspmv.$(SUFFIX) \ - xspr.$(SUFFIX) xspr2.$(SUFFIX) \ - xtbsv.$(SUFFIX) xtbmv.$(SUFFIX) \ - xtpsv.$(SUFFIX) xtpmv.$(SUFFIX) \ - xhemv.$(SUFFIX) xhbmv.$(SUFFIX) \ - xher.$(SUFFIX) xher2.$(SUFFIX) \ - xhpmv.$(SUFFIX) xhpr.$(SUFFIX) xhpr2.$(SUFFIX) - -XBLAS3OBJS = \ - xgemm.$(SUFFIX) xsymm.$(SUFFIX) xtrmm.$(SUFFIX) \ - xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ - xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) - -ifeq ($(SUPPORT_GEMM3M), 1) - -XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) - -endif -endif - -endif - -HPLOBJS = dgemm.$(SUFFIX) dtrsm.$(SUFFIX) \ - dgemv.$(SUFFIX) dtrsv.$(SUFFIX) dger.$(SUFFIX) \ - idamax.$(SUFFIX) daxpy.$(SUFFIX) dcopy.$(SUFFIX) dscal.$(SUFFIX) - -CSBLAS1OBJS = \ - cblas_isamax.$(SUFFIX) cblas_isamin.$(SUFFIX) cblas_sasum.$(SUFFIX) cblas_saxpy.$(SUFFIX) \ - cblas_scopy.$(SUFFIX) cblas_sdot.$(SUFFIX) cblas_sdsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) \ - cblas_srot.$(SUFFIX) cblas_srotg.$(SUFFIX) cblas_srotm.$(SUFFIX) cblas_srotmg.$(SUFFIX) \ - cblas_sscal.$(SUFFIX) cblas_sswap.$(SUFFIX) cblas_snrm2.$(SUFFIX) cblas_saxpby.$(SUFFIX) \ - cblas_ismin.$(SUFFIX) cblas_ismax.$(SUFFIX) cblas_ssum.$(SUFFIX) - -CSBLAS2OBJS = \ - cblas_sgemv.$(SUFFIX) cblas_sger.$(SUFFIX) cblas_ssymv.$(SUFFIX) cblas_strmv.$(SUFFIX) \ - cblas_strsv.$(SUFFIX) cblas_ssyr.$(SUFFIX) cblas_ssyr2.$(SUFFIX) cblas_sgbmv.$(SUFFIX) \ - cblas_ssbmv.$(SUFFIX) cblas_sspmv.$(SUFFIX) cblas_sspr.$(SUFFIX) cblas_sspr2.$(SUFFIX) \ - cblas_stbmv.$(SUFFIX) cblas_stbsv.$(SUFFIX) cblas_stpmv.$(SUFFIX) cblas_stpsv.$(SUFFIX) - -CSBLAS3OBJS = \ - cblas_sgemm.$(SUFFIX) cblas_ssymm.$(SUFFIX) cblas_strmm.$(SUFFIX) cblas_strsm.$(SUFFIX) \ - cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\ - cblas_sgeadd.$(SUFFIX) - -ifeq ($(BUILD_BFLOAT16),1) -CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX) -CSBBLAS2OBJS = cblas_sbgemv.$(SUFFIX) -CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX) -CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX) -endif - -CDBLAS1OBJS = \ - cblas_idamax.$(SUFFIX) cblas_idamin.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \ - cblas_dcopy.$(SUFFIX) cblas_ddot.$(SUFFIX) \ - cblas_drot.$(SUFFIX) cblas_drotg.$(SUFFIX) cblas_drotm.$(SUFFIX) cblas_drotmg.$(SUFFIX) \ - cblas_dscal.$(SUFFIX) cblas_dswap.$(SUFFIX) cblas_dnrm2.$(SUFFIX) cblas_daxpby.$(SUFFIX) \ - cblas_idmin.$(SUFFIX) cblas_idmax.$(SUFFIX) cblas_dsum.$(SUFFIX) - -CDBLAS2OBJS = \ - cblas_dgemv.$(SUFFIX) cblas_dger.$(SUFFIX) cblas_dsymv.$(SUFFIX) cblas_dtrmv.$(SUFFIX) \ - cblas_dtrsv.$(SUFFIX) cblas_dsyr.$(SUFFIX) cblas_dsyr2.$(SUFFIX) cblas_dgbmv.$(SUFFIX) \ - cblas_dsbmv.$(SUFFIX) cblas_dspmv.$(SUFFIX) cblas_dspr.$(SUFFIX) cblas_dspr2.$(SUFFIX) \ - cblas_dtbmv.$(SUFFIX) cblas_dtbsv.$(SUFFIX) cblas_dtpmv.$(SUFFIX) cblas_dtpsv.$(SUFFIX) - -CDBLAS3OBJS += \ - cblas_dgemm.$(SUFFIX) cblas_dsymm.$(SUFFIX) cblas_dtrmm.$(SUFFIX) cblas_dtrsm.$(SUFFIX) \ - cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) cblas_domatcopy.$(SUFFIX) cblas_dimatcopy.$(SUFFIX) \ - cblas_dgeadd.$(SUFFIX) - -CCBLAS1OBJS = \ - cblas_icamax.$(SUFFIX) cblas_icamin.$(SUFFIX) cblas_scasum.$(SUFFIX) cblas_caxpy.$(SUFFIX) \ - cblas_ccopy.$(SUFFIX) \ - cblas_cdotc.$(SUFFIX) cblas_cdotu.$(SUFFIX) \ - cblas_cdotc_sub.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) \ - cblas_cscal.$(SUFFIX) cblas_csscal.$(SUFFIX) \ - cblas_cswap.$(SUFFIX) cblas_scnrm2.$(SUFFIX) \ - cblas_caxpby.$(SUFFIX) \ - cblas_icmin.$(SUFFIX) cblas_icmax.$(SUFFIX) cblas_scsum.$(SUFFIX) cblas_csrot.$(SUFFIX) cblas_crotg.$(SUFFIX) - -CCBLAS2OBJS = \ - cblas_cgemv.$(SUFFIX) cblas_cgerc.$(SUFFIX) cblas_cgeru.$(SUFFIX) \ - cblas_cgbmv.$(SUFFIX) cblas_chbmv.$(SUFFIX) cblas_chemv.$(SUFFIX) \ - cblas_cher.$(SUFFIX) cblas_cher2.$(SUFFIX) cblas_chpmv.$(SUFFIX) \ - cblas_chpr.$(SUFFIX) cblas_chpr2.$(SUFFIX) cblas_ctbmv.$(SUFFIX) \ - cblas_ctbsv.$(SUFFIX) cblas_ctpmv.$(SUFFIX) cblas_ctpsv.$(SUFFIX) \ - cblas_ctrmv.$(SUFFIX) cblas_ctrsv.$(SUFFIX) - -CCBLAS3OBJS = \ - cblas_cgemm.$(SUFFIX) cblas_csymm.$(SUFFIX) cblas_ctrmm.$(SUFFIX) cblas_ctrsm.$(SUFFIX) \ - cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \ - cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \ - cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\ - cblas_cgeadd.$(SUFFIX) - -CXERBLAOBJ = \ - cblas_xerbla.$(SUFFIX) - - - -CZBLAS1OBJS = \ - cblas_izamax.$(SUFFIX) cblas_izamin.$(SUFFIX) cblas_dzasum.$(SUFFIX) cblas_zaxpy.$(SUFFIX) \ - cblas_zcopy.$(SUFFIX) \ - cblas_zdotc.$(SUFFIX) cblas_zdotu.$(SUFFIX) \ - cblas_zdotc_sub.$(SUFFIX) cblas_zdotu_sub.$(SUFFIX) \ - cblas_zscal.$(SUFFIX) cblas_zdscal.$(SUFFIX) \ - cblas_zswap.$(SUFFIX) cblas_dznrm2.$(SUFFIX) \ - cblas_zaxpby.$(SUFFIX) \ - cblas_izmin.$(SUFFIX) cblas_izmax.$(SUFFIX) cblas_dzsum.$(SUFFIX) cblas_zdrot.$(SUFFIX) cblas_zrotg.$(SUFFIX) - - -CZBLAS2OBJS = \ - cblas_zgemv.$(SUFFIX) cblas_zgerc.$(SUFFIX) cblas_zgeru.$(SUFFIX) \ - cblas_zgbmv.$(SUFFIX) cblas_zhbmv.$(SUFFIX) cblas_zhemv.$(SUFFIX) \ - cblas_zher.$(SUFFIX) cblas_zher2.$(SUFFIX) cblas_zhpmv.$(SUFFIX) \ - cblas_zhpr.$(SUFFIX) cblas_zhpr2.$(SUFFIX) cblas_ztbmv.$(SUFFIX) \ - cblas_ztbsv.$(SUFFIX) cblas_ztpmv.$(SUFFIX) cblas_ztpsv.$(SUFFIX) \ - cblas_ztrmv.$(SUFFIX) cblas_ztrsv.$(SUFFIX) - -CZBLAS3OBJS = \ - cblas_zgemm.$(SUFFIX) cblas_zsymm.$(SUFFIX) cblas_ztrmm.$(SUFFIX) cblas_ztrsm.$(SUFFIX) \ - cblas_zsyrk.$(SUFFIX) cblas_zsyr2k.$(SUFFIX) \ - cblas_zhemm.$(SUFFIX) cblas_zherk.$(SUFFIX) cblas_zher2k.$(SUFFIX)\ - cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \ - cblas_zgeadd.$(SUFFIX) - - -ifeq ($(SUPPORT_GEMM3M), 1) - -# CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX) -CCBLAS3OBJS += cblas_cgemm3m.$(SUFFIX) - -# ZBLAS3OBJS += zgemm3m.$(SUFFIX) zsymm3m.$(SUFFIX) zhemm3m.$(SUFFIX) -CZBLAS3OBJS += cblas_zgemm3m.$(SUFFIX) - -endif - - -ifneq ($(NO_CBLAS), 1) - -override CFLAGS += -I. - -SBLAS1OBJS += $(CSBLAS1OBJS) -SBLAS2OBJS += $(CSBLAS2OBJS) -SBLAS3OBJS += $(CSBLAS3OBJS) -SBBLAS1OBJS += $(CSBBLAS1OBJS) -SBBLAS2OBJS += $(CSBBLAS2OBJS) -SBBLAS3OBJS += $(CSBBLAS3OBJS) -DBLAS1OBJS += $(CDBLAS1OBJS) -DBLAS2OBJS += $(CDBLAS2OBJS) -DBLAS3OBJS += $(CDBLAS3OBJS) -CBLAS1OBJS += $(CCBLAS1OBJS) -CBLAS2OBJS += $(CCBLAS2OBJS) -CBLAS3OBJS += $(CCBLAS3OBJS) -ZBLAS1OBJS += $(CZBLAS1OBJS) -ZBLAS2OBJS += $(CZBLAS2OBJS) -ZBLAS3OBJS += $(CZBLAS3OBJS) - -SBEXTOBJS += $(CSBEXTOBJS) - -CBAUXOBJS += $(CXERBLAOBJ) -endif - -SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) -SBBLASOBJS = $(SBBLAS1OBJS) $(SBBLAS2OBJS) $(SBBLAS3OBJS) -DBLASOBJS = $(DBLAS1OBJS) $(DBLAS2OBJS) $(DBLAS3OBJS) -QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS) -CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) -ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS) -XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) - -#SLAPACKOBJS = \ -# sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ -# spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \ -# slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX) - -SLAPACKOBJS = \ - sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ - spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \ - slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) strtrs.$(SUFFIX) - - -#DLAPACKOBJS = \ -# dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \ -# dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \ -# dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX) - -DLAPACKOBJS = \ - dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \ - dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \ - dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dtrtrs.$(SUFFIX) - - -QLAPACKOBJS = \ - qgetf2.$(SUFFIX) qgetrf.$(SUFFIX) qlauu2.$(SUFFIX) qlauum.$(SUFFIX) \ - qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \ - qlaswp.$(SUFFIX) qtrtrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \ - qtrtrs.$(SUFFIX) - -#CLAPACKOBJS = \ -# cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ -# cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ -# clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX) - -CLAPACKOBJS = \ - cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ - cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ - clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX) - -#ZLAPACKOBJS = \ -# zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ -# zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ -# zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX) - - -ZLAPACKOBJS = \ - zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ - zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ - zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) ztrtrs.$(SUFFIX) - - -XLAPACKOBJS = \ - xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \ - xpotf2.$(SUFFIX) xpotrf.$(SUFFIX) xtrti2.$(SUFFIX) xtrtri.$(SUFFIX) \ - xlaswp.$(SUFFIX) xtrtrs.$(SUFFIX) xgesv.$(SUFFIX) xpotri.$(SUFFIX) \ - xtrtrs.$(SUFFIX) - +SUBDIRS = $(BLASDIRS) ifneq ($(NO_LAPACK), 1) -SBLASOBJS += $(SLAPACKOBJS) -DBLASOBJS += $(DLAPACKOBJS) -#QBLASOBJS += $(QLAPACKOBJS) -CBLASOBJS += $(CLAPACKOBJS) -ZBLASOBJS += $(ZLAPACKOBJS) -#XBLASOBJS += $(XLAPACKOBJS) - +SUBDIRS += lapack endif -ifneq ($(BUILD_SINGLE),1) - SBLASOBJS= -ifeq ($(BUILD_DOUBLE),1) - SBLASOBJS = dsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) strsm.$(SUFFIX) \ - sgetrs.$(SUFFIX) sgetrf.$(SUFFIX) spotf2.$(SUFFIX) spotrf.$(SUFFIX) \ - ssyrk.$(SUFFIX) sgemv.$(SUFFIX) -endif -ifeq ($(BUILD_COMPLEX),1) - SBLASOBJS = \ - sdot.$(SUFFIX) srot.$(SUFFIX) snrm2.$(SUFFIX) sswap.$(SUFFIX) \ - isamax.$(SUFFIX) saxpy.$(SUFFIX) sscal.$(SUFFIX) scopy.$(SUFFIX) \ - sgemv.$(SUFFIX) sgemm.$(SUFFIX) -endif -endif -ifneq ($(BUILD_DOUBLE),1) - DBLASOBJS= -ifeq ($(BUILD_COMPLEX16),1) - DBLASOBJS = \ - ddot.$(SUFFIX) drot.$(SUFFIX) dnrm2.$(SUFFIX) dswap.$(SUFFIX) \ - idamax.$(SUFFIX) daxpy.$(SUFFIX) dscal.$(SUFFIX) dcopy.$(SUFFIX) \ - dgemv.$(SUFFIX) dgemm.$(SUFFIX) -endif -endif -ifneq ($(BUILD_COMPLEX),1) - CBLASOBJS= -ifeq ($(BUILD_COMPLEX16),1) - CBLASOBJS = cgetrs.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) cgetrf.$(SUFFIX) \ - cpotrf.$(SUFFIX) ctrsm.$(SUFFIX) cblas_cdotc_sub.$(SUFFIX) -endif -endif -ifneq ($(BUILD_COMPLEX16),1) - ZBLASOBJS= +RELA = +ifeq ($(BUILD_RELAPACK), 1) +RELA = re_lapack endif -FUNCOBJS = $(SBEXTOBJS) $(CXERBLAOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) +ifeq ($(NO_FORTRAN), 1) +define NOFORTRAN +1 +endef +define NO_LAPACK +1 +endef +export NOFORTRAN +export NO_LAPACK +endif +LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS)) + +SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test + +.PHONY : all libs netlib $(RELA) test ctest shared install +.NOTPARALLEL : all libs $(RELA) prof lapack-test install blas-test + +all :: libs netlib $(RELA) tests shared + @echo + @echo " OpenBLAS build complete. ($(LIB_COMPONENTS))" + @echo + @echo " OS ... $(OSNAME) " + @echo " Architecture ... $(ARCH) " +ifndef BINARY64 + @echo " BINARY ... 32bit " +else + @echo " BINARY ... 64bit " +endif + +ifdef INTERFACE64 +ifneq ($(INTERFACE64), 0) + @echo " Use 64 bits int (equivalent to \"-i8\" in Fortran) " +endif +endif + @$(CC) --version > /dev/null 2>&1;\ + if [ $$? -eq 0 ]; then \ + cverinfo=`$(CC) --version | sed -n '1p'`; \ + if [ -z "$${cverinfo}" ]; then \ + cverinfo=`$(CC) --version | sed -n '2p'`; \ + fi; \ + echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\ + else \ + echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\ + fi +ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) + @$(FC) --version > /dev/null 2>&1;\ + if [ $$? -eq 0 ]; then \ + fverinfo=`$(FC) --version | sed -n '1p'`; \ + if [ -z "$${fverinfo}" ]; then \ + fverinfo=`$(FC) --version | sed -n '2p'`; \ + fi; \ + echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\ + else \ + echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\ + fi +endif +ifneq ($(OSNAME), AIX) + @echo -n " Library Name ... $(LIBNAME)" +else + @echo " Library Name ... $(LIBNAME)" +endif + +ifndef SMP + @echo " (Single-threading) " +else + @echo " (Multi-threading; Max num-threads is $(NUM_THREADS))" +endif + +ifeq ($(DYNAMIC_ARCH), 1) + @echo " Supporting multiple $(ARCH) cpu models with minimum requirement for the common code being $(CORE)" +endif + +ifeq ($(USE_OPENMP), 1) + @echo + @echo " Use OpenMP in the multithreading. Because of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, " + @echo " you should use OMP_NUM_THREADS environment variable to control the number of threads." + @echo +endif + +ifeq ($(OSNAME), Darwin) + @echo "WARNING: If you plan to use the dynamic library $(LIBDYNNAME), you must run:" + @echo + @echo "\"make PREFIX=/your_installation_path/ install\"." + @echo + @echo "(or set PREFIX in Makefile.rule and run make install." + @echo "If you want to move the .dylib to a new location later, make sure you change" + @echo "the internal name of the dylib with:" + @echo + @echo "install_name_tool -id /new/absolute/path/to/$(LIBDYNNAME) $(LIBDYNNAME)" +endif + @echo + @echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"." + @echo + +shared : +ifneq ($(NO_SHARED), 1) +ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly)) + @$(MAKE) -C exports so + @ln -fs $(LIBSONAME) $(LIBPREFIX).so + @ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) +endif +ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD)) + @$(MAKE) -C exports so + @ln -fs $(LIBSONAME) $(LIBPREFIX).so +endif +ifeq ($(OSNAME), Darwin) + @$(MAKE) -C exports dyn + @ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib + @ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib +endif +ifeq ($(OSNAME), WINNT) + @$(MAKE) -C exports dll +endif +ifeq ($(OSNAME), CYGWIN_NT) + @$(MAKE) -C exports dll +endif +endif + +tests : +ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) + touch $(LIBNAME) +ifndef NO_FBLAS + $(MAKE) -C test all +endif + $(MAKE) -C utest all +ifneq ($(NO_CBLAS), 1) + $(MAKE) -C ctest all +ifeq ($(CPP_THREAD_SAFETY_TEST), 1) + $(MAKE) -C cpp_thread_test all +endif +endif +endif + +libs : +ifeq ($(CORE), UNKNOWN) + $(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.) +endif +ifeq ($(NOFORTRAN), 1) + $(info OpenBLAS: Detecting fortran compiler failed. Cannot compile LAPACK. Only compile BLAS.) +endif +ifeq ($(NO_STATIC), 1) +ifeq ($(NO_SHARED), 1) + $(error OpenBLAS: neither static nor shared are enabled.) +endif +endif + @-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) + @for d in $(SUBDIRS) ; \ + do if test -d $$d; then \ + $(MAKE) -C $$d $(@F) || exit 1 ; \ + fi; \ + done +#Save the config files for installation + @cp Makefile.conf Makefile.conf_last + @cp config.h config_last.h +ifdef QUAD_PRECISION + @echo "#define QUAD_PRECISION">> config_last.h +endif ifeq ($(EXPRECISION), 1) -FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) + @echo "#define EXPRECISION">> config_last.h +endif +## +ifeq ($(DYNAMIC_ARCH), 1) + @$(MAKE) -C kernel commonlibs || exit 1 + @for d in $(DYNAMIC_CORE) ; \ + do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ + done + @echo DYNAMIC_ARCH=1 >> Makefile.conf_last +ifeq ($(DYNAMIC_OLDER), 1) + @echo DYNAMIC_OLDER=1 >> Makefile.conf_last +endif +endif +ifdef USE_THREAD + @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last +endif + @touch lib.grd + +prof : prof_blas prof_lapack + +prof_blas : + ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX) + for d in $(SUBDIRS) ; \ + do if test -d $$d; then \ + $(MAKE) -C $$d prof || exit 1 ; \ + fi; \ + done +ifeq ($(DYNAMIC_ARCH), 1) + $(MAKE) -C kernel commonprof || exit 1 endif -ifeq ($(QUAD_PRECISION), 1) -FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) +blas : + ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) + for d in $(BLASDIRS) ; \ + do if test -d $$d; then \ + $(MAKE) -C $$d libs || exit 1 ; \ + fi; \ + done + +hpl : + ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) + for d in $(BLASDIRS) ../laswp exports ; \ + do if test -d $$d; then \ + $(MAKE) -C $$d $(@F) || exit 1 ; \ + fi; \ + done +ifeq ($(DYNAMIC_ARCH), 1) + $(MAKE) -C kernel commonlibs || exit 1 + for d in $(DYNAMIC_CORE) ; \ + do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ + done endif -FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=) +hpl_p : + ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX) + for d in $(SUBDIRS) ../laswp exports ; \ + do if test -d $$d; then \ + $(MAKE) -C $$d $(@F) || exit 1 ; \ + fi; \ + done +ifeq ($(NO_LAPACK), 1) +netlib : -include $(TOPDIR)/Makefile.tail - -all :: libs - -ifdef FUNCTION_PROFILE -$(BLASOBJS) $(BLASOBJS_P) : functable.h -$(BLASOBJS) $(BLASOBJS_P) : override CFLAGS += -DPROFILE_FUNC_NAME=interface_$(*F) - -functable.h : Makefile - ./create $(FUNCALLFILES) > functable.h - +else +netlib : lapack_prebuild +ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) + @$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib + @$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib endif +ifneq ($(NO_LAPACKE), 1) + @$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib +endif +endif + +ifeq ($(NO_LAPACK), 1) +re_lapack : + +else +re_lapack : + @$(MAKE) -C relapack +endif + +prof_lapack : lapack_prebuild + @$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof + +lapack_prebuild : +ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) + -@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc + -@echo "FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "FFLAGS_DRV = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc +ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGGFORTRAN1) + -@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc +else + -@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc +endif + -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "LAPACKLIB = ../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "TMGLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "LAPACKELIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc +ifeq ($(F_COMPILER), GFORTRAN) + -@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc +ifdef SMP +ifeq ($(OSNAME), WINNT) + -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc +else ifeq ($(OSNAME), Haiku) + -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc +else + -@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc +endif +else + -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc +endif +else + -@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc +endif +ifeq ($(BUILD_LAPACK_DEPRECATED), 1) + -@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc +endif +ifeq ($(BUILD_SINGLE), 1) + -@echo "BUILD_SINGLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc +endif +ifeq ($(BUILD_DOUBLE), 1) + -@echo "BUILD_DOUBLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc +endif +ifeq ($(BUILD_COMPLEX), 1) + -@echo "BUILD_COMPLEX = 1" >> $(NETLIB_LAPACK_DIR)/make.inc +endif +ifeq ($(BUILD_COMPLEX16), 1) + -@echo "BUILD_COMPLEX16 = 1" >> $(NETLIB_LAPACK_DIR)/make.inc +endif + -@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc + -@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc +endif + +large.tgz : +ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) + if [ ! -a $< ]; then + -wget http://www.netlib.org/lapack/timing/large.tgz; + fi +endif + +timing.tgz : +ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) + if [ ! -a $< ]; then + -wget http://www.netlib.org/lapack/timing/timing.tgz; + fi +endif + +lapack-timing : large.tgz timing.tgz +ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) + (cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING) + (cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz ) + $(MAKE) -C $(NETLIB_LAPACK_DIR)/TIMING +endif + + +lapack-test : + (cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out) + $(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz + $(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc +ifneq ($(CROSS), 1) + ( cd $(NETLIB_LAPACK_DIR)/INSTALL; $(MAKE) all; ./testlsame; ./testslamch; ./testdlamch; \ + ./testsecond; ./testdsecnd; ./testieee; ./testversion ) + (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING) +endif + +lapack-runtest: + ( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \ + ./testsecond; ./testdsecnd; ./testieee; ./testversion ) + (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) + + +blas-test: + (cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && rm -f x* *.out) + $(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing + (cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && cat *.out) + + +dummy : + +install : + $(MAKE) -f Makefile.install install clean :: - @rm -f functable.h - -level1 : $(SBEXTOBJS) $(SBBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) - $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ - -level2 : $(SBBLAS2OBJS) $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) - $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ - -level3 : $(SBBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) - $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ - -aux : $(CBAUXOBJS) - $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ - -$(CSBBLASOBJS) $(CSBBLASOBJS_P) $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ -$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) $(CBAUXOBJS_P) : override CFLAGS += -DCBLAS - -srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -drot.$(SUFFIX) drot.$(PSUFFIX) : rot.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -qrot.$(SUFFIX) qrot.$(PSUFFIX) : rot.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -csrot.$(SUFFIX) csrot.$(PSUFFIX) : zrot.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -zdrot.$(SUFFIX) zdrot.$(PSUFFIX) : zrot.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -xqrot.$(SUFFIX) xqrot.$(PSUFFIX) : zrot.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -srotm.$(SUFFIX) srotm.$(PSUFFIX): rotm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -drotm.$(SUFFIX) drotm.$(PSUFFIX): rotm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qrotm.$(SUFFIX) qrotm.$(PSUFFIX): rotm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -srotmg.$(SUFFIX) srotmg.$(PSUFFIX): rotmg.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -drotmg.$(SUFFIX) drotmg.$(PSUFFIX): rotmg.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qrotmg.$(SUFFIX) qrotmg.$(PSUFFIX): rotmg.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -srotg.$(SUFFIX) srotg.$(PSUFFIX): rotg.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -drotg.$(SUFFIX) drotg.$(PSUFFIX): rotg.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qrotg.$(SUFFIX) qrotg.$(PSUFFIX): rotg.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -crotg.$(SUFFIX) crotg.$(PSUFFIX): zrotg.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zrotg.$(SUFFIX) zrotg.$(PSUFFIX): zrotg.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xrotg.$(SUFFIX) xrotg.$(PSUFFIX): zrotg.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -sasum.$(SUFFIX) sasum.$(PSUFFIX) : asum.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -dasum.$(SUFFIX) dasum.$(PSUFFIX) : asum.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -qasum.$(SUFFIX) qasum.$(PSUFFIX) : asum.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -scasum.$(SUFFIX) scasum.$(PSUFFIX) : asum.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -dzasum.$(SUFFIX) dzasum.$(PSUFFIX) : asum.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -qxasum.$(SUFFIX) qxasum.$(PSUFFIX) : asum.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -ssum.$(SUFFIX) ssum.$(PSUFFIX) : sum.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -dsum.$(SUFFIX) dsum.$(PSUFFIX) : sum.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -qsum.$(SUFFIX) qsum.$(PSUFFIX) : sum.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -scsum.$(SUFFIX) scsum.$(PSUFFIX) : sum.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -dzsum.$(SUFFIX) dzsum.$(PSUFFIX) : sum.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -qxsum.$(SUFFIX) qxsum.$(PSUFFIX) : sum.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -snrm2.$(SUFFIX) snrm2.$(PSUFFIX) : nrm2.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -dnrm2.$(SUFFIX) dnrm2.$(PSUFFIX) : nrm2.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -qnrm2.$(SUFFIX) qnrm2.$(PSUFFIX) : nrm2.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -scnrm2.$(SUFFIX) scnrm2.$(PSUFFIX) : nrm2.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -dznrm2.$(SUFFIX) dznrm2.$(PSUFFIX) : nrm2.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -qxnrm2.$(SUFFIX) qxnrm2.$(PSUFFIX) : nrm2.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -samax.$(SUFFIX) samax.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -damax.$(SUFFIX) damax.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -qamax.$(SUFFIX) qamax.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -scamax.$(SUFFIX) scamax.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -dzamax.$(SUFFIX) dzamax.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -qxamax.$(SUFFIX) qxamax.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -samin.$(SUFFIX) samin.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -damin.$(SUFFIX) damin.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -qamin.$(SUFFIX) qamin.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -scamin.$(SUFFIX) scamin.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -dzamin.$(SUFFIX) dzamin.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -qxamin.$(SUFFIX) qxamin.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -smax.$(SUFFIX) smax.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) - -dmax.$(SUFFIX) dmax.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) - -qmax.$(SUFFIX) qmax.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) - -smin.$(SUFFIX) smin.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) - -dmin.$(SUFFIX) dmin.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) - -qmin.$(SUFFIX) qmin.$(PSUFFIX) : max.c - $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) - -isamax.$(SUFFIX) isamax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -idamax.$(SUFFIX) idamax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -iqamax.$(SUFFIX) iqamax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -icamax.$(SUFFIX) icamax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -izamax.$(SUFFIX) izamax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -ixamax.$(SUFFIX) ixamax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -isamin.$(SUFFIX) isamin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -idamin.$(SUFFIX) idamin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -iqamin.$(SUFFIX) iqamin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -icamin.$(SUFFIX) icamin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -izamin.$(SUFFIX) izamin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -ixamin.$(SUFFIX) ixamin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -ismax.$(SUFFIX) ismax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) - -idmax.$(SUFFIX) idmax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) - -iqmax.$(SUFFIX) iqmax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) - -ismin.$(SUFFIX) ismin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) - -idmin.$(SUFFIX) idmin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) - -iqmin.$(SUFFIX) iqmin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) - -sdsdot.$(SUFFIX) sdsdot.$(PSUFFIX) : sdsdot.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -dsdot.$(SUFFIX) dsdot.$(PSUFFIX) : dsdot.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -ifeq ($(BUILD_BFLOAT16),1) -sbdot.$(SUFFIX) sbdot.$(PSUFFIX) : bf16dot.c - $(CC) $(CFLAGS) -c $< -o $(@F) -sbstobf16.$(SUFFIX) sbstobf16.$(PSUFFIX) : tobf16.c - $(CC) $(CFLAGS) -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) -sbdtobf16.$(SUFFIX) sbdtobf16.$(PSUFFIX) : tobf16.c - $(CC) $(CFLAGS) -USINGLE_PREC -DDOUBLE_PREC -c $< -o $(@F) -sbf16tos.$(SUFFIX) sbf16tos.$(PSUFFIX) : bf16to.c - $(CC) $(CFLAGS) -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) -dbf16tod.$(SUFFIX) dbf16tod.$(PSUFFIX) : bf16to.c - $(CC) $(CFLAGS) -USINGLE_PREC -DDOUBLE_PREC -c $< -o $(@F) + @for d in $(SUBDIRS_ALL) ; \ + do if test -d $$d; then \ + $(MAKE) -C $$d $(@F) || exit 1 ; \ + fi; \ + done +#ifdef DYNAMIC_ARCH + @$(MAKE) -C kernel clean +#endif + @$(MAKE) -C reference clean + @rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h *.so.renamed *.a.renamed *.so.0 +ifeq ($(OSNAME), Darwin) + @rm -rf getarch.dSYM getarch_2nd.dSYM endif - -sdot.$(SUFFIX) sdot.$(PSUFFIX) : dot.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -ddot.$(SUFFIX) ddot.$(PSUFFIX) : dot.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -qdot.$(SUFFIX) qdot.$(PSUFFIX) : dot.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -cdotu.$(SUFFIX) cdotu.$(PSUFFIX) : zdot.c - $(CC) $(CFLAGS) -c -UCONJ $< -o $(@F) - -cdotc.$(SUFFIX) cdotc.$(PSUFFIX) : zdot.c - $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) - -zdotu.$(SUFFIX) zdotu.$(PSUFFIX) : zdot.c - $(CC) $(CFLAGS) -c -UCONJ $< -o $(@F) - -zdotc.$(SUFFIX) zdotc.$(PSUFFIX) : zdot.c - $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) - -xdotu.$(SUFFIX) xdotu.$(PSUFFIX) : zdot.c - $(CC) $(CFLAGS) -c -UCONJ $< -o $(@F) - -xdotc.$(SUFFIX) xdotc.$(PSUFFIX) : zdot.c - $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) - -saxpy.$(SUFFIX) saxpy.$(PSUFFIX) : axpy.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -daxpy.$(SUFFIX) daxpy.$(PSUFFIX) : axpy.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -qaxpy.$(SUFFIX) qaxpy.$(PSUFFIX) : axpy.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -caxpy.$(SUFFIX) caxpy.$(PSUFFIX) : zaxpy.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -zaxpy.$(SUFFIX) zaxpy.$(PSUFFIX) : zaxpy.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -xaxpy.$(SUFFIX) xaxpy.$(PSUFFIX) : zaxpy.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -caxpyc.$(SUFFIX) caxpyc.$(PSUFFIX) : zaxpy.c - $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) - -zaxpyc.$(SUFFIX) zaxpyc.$(PSUFFIX) : zaxpy.c - $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) - -xaxpyc.$(SUFFIX) xaxpyc.$(PSUFFIX) : zaxpy.c - $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) - -sscal.$(SUFFIX) sscal.$(PSUFFIX) : scal.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -dscal.$(SUFFIX) dscal.$(PSUFFIX) : scal.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -qscal.$(SUFFIX) qscal.$(PSUFFIX) : scal.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -cscal.$(SUFFIX) cscal.$(PSUFFIX) : zscal.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -zscal.$(SUFFIX) zscal.$(PSUFFIX) : zscal.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -xscal.$(SUFFIX) xscal.$(PSUFFIX) : zscal.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -csscal.$(SUFFIX) csscal.$(PSUFFIX) : zscal.c - $(CC) $(CFLAGS) -c -DSSCAL $< -o $(@F) - -zdscal.$(SUFFIX) zdscal.$(PSUFFIX) : zscal.c - $(CC) $(CFLAGS) -c -DSSCAL $< -o $(@F) - -xqscal.$(SUFFIX) xqscal.$(PSUFFIX) : zscal.c - $(CC) $(CFLAGS) -c -DSSCAL $< -o $(@F) - -scopy.$(SUFFIX) scopy.$(PSUFFIX) : copy.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -dcopy.$(SUFFIX) dcopy.$(PSUFFIX) : copy.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -qcopy.$(SUFFIX) qcopy.$(PSUFFIX) : copy.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -ccopy.$(SUFFIX) ccopy.$(PSUFFIX) : copy.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -zcopy.$(SUFFIX) zcopy.$(PSUFFIX) : copy.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -xcopy.$(SUFFIX) xcopy.$(PSUFFIX) : copy.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -sswap.$(SUFFIX) sswap.$(PSUFFIX) : swap.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -dswap.$(SUFFIX) dswap.$(PSUFFIX) : swap.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -qswap.$(SUFFIX) qswap.$(PSUFFIX) : swap.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -cswap.$(SUFFIX) cswap.$(PSUFFIX) : zswap.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -zswap.$(SUFFIX) zswap.$(PSUFFIX) : zswap.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -xswap.$(SUFFIX) xswap.$(PSUFFIX) : zswap.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -sger.$(SUFFIX) sger.$(PSUFFIX) : ger.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dger.$(SUFFIX) dger.$(PSUFFIX) : ger.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qger.$(SUFFIX) qger.$(PSUFFIX) : ger.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cgeru.$(SUFFIX) cgeru.$(PSUFFIX) : zger.c - $(CC) -c $(CFLAGS) -UCONJ $< -o $(@F) - -cgerc.$(SUFFIX) cgerc.$(PSUFFIX) : zger.c - $(CC) -c $(CFLAGS) -DCONJ $< -o $(@F) - -zgeru.$(SUFFIX) zgeru.$(PSUFFIX) : zger.c - $(CC) -c $(CFLAGS) -UCONJ $< -o $(@F) - -zgerc.$(SUFFIX) zgerc.$(PSUFFIX) : zger.c - $(CC) -c $(CFLAGS) -DCONJ $< -o $(@F) - -xgeru.$(SUFFIX) xgeru.$(PSUFFIX) : zger.c - $(CC) -c $(CFLAGS) -UCONJ $< -o $(@F) - -xgerc.$(SUFFIX) xgerc.$(PSUFFIX) : zger.c - $(CC) -c $(CFLAGS) -DCONJ $< -o $(@F) - -ifeq ($(BUILD_BFLOAT16),1) -sbgemv.$(SUFFIX) sbgemv.$(PSUFFIX) : sbgemv.c - $(CC) $(CFLAGS) -c $< -o $(@F) -endif - -ifndef USE_NETLIB_GEMV -sgemv.$(SUFFIX) sgemv.$(PSUFFIX): gemv.c - $(CC) -c $(CFLAGS) -o $(@F) $< - -dgemv.$(SUFFIX) dgemv.$(PSUFFIX): gemv.c - $(CC) -c $(CFLAGS) -o $(@F) $< -else -sgemv.$(SUFFIX) sgemv.$(PSUFFIX): netlib/sgemv.f - $(FC) -c $(FFLAGS) -o $(@F) $< - -dgemv.$(SUFFIX) dgemv.$(PSUFFIX): netlib/dgemv.f - $(FC) -c $(FFLAGS) -o $(@F) $< -endif - -qgemv.$(SUFFIX) qgemv.$(PSUFFIX): gemv.c - $(CC) -c $(CFLAGS) -o $(@F) $< - -ifndef USE_NETLIB_GEMV -cgemv.$(SUFFIX) cgemv.$(PSUFFIX): zgemv.c - $(CC) -c $(CFLAGS) -o $(@F) $< - -zgemv.$(SUFFIX) zgemv.$(PSUFFIX): zgemv.c - $(CC) -c $(CFLAGS) -o $(@F) $< -else -cgemv.$(SUFFIX) cgemv.$(PSUFFIX): netlib/cgemv.f - $(FC) -c $(FFLAGS) -o $(@F) $< - -zgemv.$(SUFFIX) zgemv.$(PSUFFIX): netlib/zgemv.f - $(FC) -c $(FFLAGS) -o $(@F) $< -endif - -xgemv.$(SUFFIX) xgemv.$(PSUFFIX): zgemv.c - $(CC) -c $(CFLAGS) -o $(@F) $< - -strsv.$(SUFFIX) strsv.$(PSUFFIX) : trsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dtrsv.$(SUFFIX) dtrsv.$(PSUFFIX) : trsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qtrsv.$(SUFFIX) qtrsv.$(PSUFFIX) : trsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ctrsv.$(SUFFIX) ctrsv.$(PSUFFIX) : ztrsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ztrsv.$(SUFFIX) ztrsv.$(PSUFFIX) : ztrsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xtrsv.$(SUFFIX) xtrsv.$(PSUFFIX) : ztrsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -strmv.$(SUFFIX) strmv.$(PSUFFIX) : trmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dtrmv.$(SUFFIX) dtrmv.$(PSUFFIX) : trmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qtrmv.$(SUFFIX) qtrmv.$(PSUFFIX) : trmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ctrmv.$(SUFFIX) ctrmv.$(PSUFFIX) : ztrmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ztrmv.$(SUFFIX) ztrmv.$(PSUFFIX) : ztrmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xtrmv.$(SUFFIX) xtrmv.$(PSUFFIX) : ztrmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ssymv.$(SUFFIX) ssymv.$(PSUFFIX) : symv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dsymv.$(SUFFIX) dsymv.$(PSUFFIX) : symv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qsymv.$(SUFFIX) qsymv.$(PSUFFIX) : symv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -csymv.$(SUFFIX) csymv.$(PSUFFIX) : zsymv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zsymv.$(SUFFIX) zsymv.$(PSUFFIX) : zsymv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xsymv.$(SUFFIX) xsymv.$(PSUFFIX) : zsymv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ssyr.$(SUFFIX) ssyr.$(PSUFFIX) : syr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dsyr.$(SUFFIX) dsyr.$(PSUFFIX) : syr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qsyr.$(SUFFIX) qsyr.$(PSUFFIX) : syr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -csyr.$(SUFFIX) csyr.$(PSUFFIX) : zsyr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zsyr.$(SUFFIX) zsyr.$(PSUFFIX) : zsyr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xsyr.$(SUFFIX) xsyr.$(PSUFFIX) : zsyr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ssyr2.$(SUFFIX) ssyr2.$(PSUFFIX) : syr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dsyr2.$(SUFFIX) dsyr2.$(PSUFFIX) : syr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qsyr2.$(SUFFIX) qsyr2.$(PSUFFIX) : syr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -csyr2.$(SUFFIX) csyr2.$(PSUFFIX) : zsyr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zsyr2.$(SUFFIX) zsyr2.$(PSUFFIX) : zsyr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xsyr2.$(SUFFIX) xsyr2.$(PSUFFIX) : zsyr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -sgbmv.$(SUFFIX) sgbmv.$(PSUFFIX): gbmv.c - $(CC) -c $(CFLAGS) -o $(@F) $< - -dgbmv.$(SUFFIX) dgbmv.$(PSUFFIX): gbmv.c - $(CC) -c $(CFLAGS) -o $(@F) $< - -qgbmv.$(SUFFIX) qgbmv.$(PSUFFIX): gbmv.c - $(CC) -c $(CFLAGS) -o $(@F) $< - -cgbmv.$(SUFFIX) cgbmv.$(PSUFFIX): zgbmv.c - $(CC) -c $(CFLAGS) -o $(@F) $< - -zgbmv.$(SUFFIX) zgbmv.$(PSUFFIX): zgbmv.c - $(CC) -c $(CFLAGS) -o $(@F) $< - -xgbmv.$(SUFFIX) xgbmv.$(PSUFFIX): zgbmv.c - $(CC) -c $(CFLAGS) -o $(@F) $< - -ssbmv.$(SUFFIX) ssbmv.$(PSUFFIX) : sbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dsbmv.$(SUFFIX) dsbmv.$(PSUFFIX) : sbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qsbmv.$(SUFFIX) qsbmv.$(PSUFFIX) : sbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -csbmv.$(SUFFIX) csbmv.$(PSUFFIX) : zsbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zsbmv.$(SUFFIX) zsbmv.$(PSUFFIX) : zsbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xsbmv.$(SUFFIX) xsbmv.$(PSUFFIX) : zsbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -sspmv.$(SUFFIX) sspmv.$(PSUFFIX) : spmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dspmv.$(SUFFIX) dspmv.$(PSUFFIX) : spmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qspmv.$(SUFFIX) qspmv.$(PSUFFIX) : spmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cspmv.$(SUFFIX) cspmv.$(PSUFFIX) : zspmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zspmv.$(SUFFIX) zspmv.$(PSUFFIX) : zspmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xspmv.$(SUFFIX) xspmv.$(PSUFFIX) : zspmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -sspr.$(SUFFIX) sspr.$(PSUFFIX) : spr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dspr.$(SUFFIX) dspr.$(PSUFFIX) : spr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qspr.$(SUFFIX) qspr.$(PSUFFIX) : spr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cspr.$(SUFFIX) cspr.$(PSUFFIX) : zspr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zspr.$(SUFFIX) zspr.$(PSUFFIX) : zspr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xspr.$(SUFFIX) xspr.$(PSUFFIX) : zspr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -sspr2.$(SUFFIX) sspr2.$(PSUFFIX) : spr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dspr2.$(SUFFIX) dspr2.$(PSUFFIX) : spr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qspr2.$(SUFFIX) qspr2.$(PSUFFIX) : spr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cspr2.$(SUFFIX) cspr2.$(PSUFFIX) : zspr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zspr2.$(SUFFIX) zspr2.$(PSUFFIX) : zspr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xspr2.$(SUFFIX) xspr2.$(PSUFFIX) : zspr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -stbmv.$(SUFFIX) stbmv.$(PSUFFIX) : tbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dtbmv.$(SUFFIX) dtbmv.$(PSUFFIX) : tbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qtbmv.$(SUFFIX) qtbmv.$(PSUFFIX) : tbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ctbmv.$(SUFFIX) ctbmv.$(PSUFFIX) : ztbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ztbmv.$(SUFFIX) ztbmv.$(PSUFFIX) : ztbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xtbmv.$(SUFFIX) xtbmv.$(PSUFFIX) : ztbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -stbsv.$(SUFFIX) stbsv.$(PSUFFIX) : tbsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dtbsv.$(SUFFIX) dtbsv.$(PSUFFIX) : tbsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qtbsv.$(SUFFIX) qtbsv.$(PSUFFIX) : tbsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ctbsv.$(SUFFIX) ctbsv.$(PSUFFIX) : ztbsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ztbsv.$(SUFFIX) ztbsv.$(PSUFFIX) : ztbsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xtbsv.$(SUFFIX) xtbsv.$(PSUFFIX) : ztbsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -stpsv.$(SUFFIX) stpsv.$(PSUFFIX) : tpsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dtpsv.$(SUFFIX) dtpsv.$(PSUFFIX) : tpsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qtpsv.$(SUFFIX) qtpsv.$(PSUFFIX) : tpsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ctpsv.$(SUFFIX) ctpsv.$(PSUFFIX) : ztpsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ztpsv.$(SUFFIX) ztpsv.$(PSUFFIX) : ztpsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xtpsv.$(SUFFIX) xtpsv.$(PSUFFIX) : ztpsv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -stpmv.$(SUFFIX) stpmv.$(PSUFFIX) : tpmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dtpmv.$(SUFFIX) dtpmv.$(PSUFFIX) : tpmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qtpmv.$(SUFFIX) qtpmv.$(PSUFFIX) : tpmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ctpmv.$(SUFFIX) ctpmv.$(PSUFFIX) : ztpmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ztpmv.$(SUFFIX) ztpmv.$(PSUFFIX) : ztpmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xtpmv.$(SUFFIX) xtpmv.$(PSUFFIX) : ztpmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -chemv.$(SUFFIX) chemv.$(PSUFFIX) : zhemv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zhemv.$(SUFFIX) zhemv.$(PSUFFIX) : zhemv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xhemv.$(SUFFIX) xhemv.$(PSUFFIX) : zhemv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -chbmv.$(SUFFIX) chbmv.$(PSUFFIX) : zhbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zhbmv.$(SUFFIX) zhbmv.$(PSUFFIX) : zhbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xhbmv.$(SUFFIX) xhbmv.$(PSUFFIX) : zhbmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cher.$(SUFFIX) cher.$(PSUFFIX) : zher.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zher.$(SUFFIX) zher.$(PSUFFIX) : zher.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xher.$(SUFFIX) xher.$(PSUFFIX) : zher.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cher2.$(SUFFIX) cher2.$(PSUFFIX) : zher2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zher2.$(SUFFIX) zher2.$(PSUFFIX) : zher2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xher2.$(SUFFIX) xher2.$(PSUFFIX) : zher2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -chpmv.$(SUFFIX) chpmv.$(PSUFFIX) : zhpmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zhpmv.$(SUFFIX) zhpmv.$(PSUFFIX) : zhpmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xhpmv.$(SUFFIX) xhpmv.$(PSUFFIX) : zhpmv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -chpr.$(SUFFIX) chpr.$(PSUFFIX) : zhpr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zhpr.$(SUFFIX) zhpr.$(PSUFFIX) : zhpr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xhpr.$(SUFFIX) xhpr.$(PSUFFIX) : zhpr.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -chpr2.$(SUFFIX) chpr2.$(PSUFFIX) : zhpr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zhpr2.$(SUFFIX) zhpr2.$(PSUFFIX) : zhpr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ifeq ($(BUILD_BFLOAT16),1) -sbgemm.$(SUFFIX) sbgemm.$(PSUFFIX) : gemm.c ../param.h - $(CC) -c $(CFLAGS) $< -o $(@F) -endif - -sgemm.$(SUFFIX) sgemm.$(PSUFFIX) : gemm.c ../param.h - $(CC) -c $(CFLAGS) $< -o $(@F) - -dgemm.$(SUFFIX) dgemm.$(PSUFFIX) : gemm.c ../param.h - $(CC) -c $(CFLAGS) $< -o $(@F) - -qgemm.$(SUFFIX) qgemm.$(PSUFFIX) : gemm.c ../param.h - $(CC) -c $(CFLAGS) $< -o $(@F) - -cgemm.$(SUFFIX) cgemm.$(PSUFFIX) : gemm.c ../param.h - $(CC) -c $(CFLAGS) $< -o $(@F) - -zgemm.$(SUFFIX) zgemm.$(PSUFFIX) : gemm.c ../param.h - $(CC) -c $(CFLAGS) $< -o $(@F) - -xgemm.$(SUFFIX) xgemm.$(PSUFFIX) : gemm.c ../param.h - $(CC) -c $(CFLAGS) $< -o $(@F) - -ssymm.$(SUFFIX) ssymm.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dsymm.$(SUFFIX) dsymm.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qsymm.$(SUFFIX) qsymm.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -csymm.$(SUFFIX) csymm.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zsymm.$(SUFFIX) zsymm.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xsymm.$(SUFFIX) xsymm.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -strmm.$(SUFFIX) strmm.$(PSUFFIX) : trsm.c - $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) - -dtrmm.$(SUFFIX) dtrmm.$(PSUFFIX) : trsm.c - $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) - -qtrmm.$(SUFFIX) qtrmm.$(PSUFFIX) : trsm.c - $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) - -ctrmm.$(SUFFIX) ctrmm.$(PSUFFIX) : trsm.c - $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) - -ztrmm.$(SUFFIX) ztrmm.$(PSUFFIX) : trsm.c - $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) - -xtrmm.$(SUFFIX) xtrmm.$(PSUFFIX) : trsm.c - $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) - -strsm.$(SUFFIX) strsm.$(PSUFFIX) : trsm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dtrsm.$(SUFFIX) dtrsm.$(PSUFFIX) : trsm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qtrsm.$(SUFFIX) qtrsm.$(PSUFFIX) : trsm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ctrsm.$(SUFFIX) ctrsm.$(PSUFFIX) : trsm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ztrsm.$(SUFFIX) ztrsm.$(PSUFFIX) : trsm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xtrsm.$(SUFFIX) xtrsm.$(PSUFFIX) : trsm.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ssyrk.$(SUFFIX) ssyrk.$(PSUFFIX) : syrk.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dsyrk.$(SUFFIX) dsyrk.$(PSUFFIX) : syrk.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qsyrk.$(SUFFIX) qsyrk.$(PSUFFIX) : syrk.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -csyrk.$(SUFFIX) csyrk.$(PSUFFIX) : syrk.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zsyrk.$(SUFFIX) zsyrk.$(PSUFFIX) : syrk.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xsyrk.$(SUFFIX) xsyrk.$(PSUFFIX) : syrk.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ssyr2k.$(SUFFIX) ssyr2k.$(PSUFFIX) : syr2k.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dsyr2k.$(SUFFIX) dsyr2k.$(PSUFFIX) : syr2k.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qsyr2k.$(SUFFIX) qsyr2k.$(PSUFFIX) : syr2k.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -csyr2k.$(SUFFIX) csyr2k.$(PSUFFIX) : syr2k.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zsyr2k.$(SUFFIX) zsyr2k.$(PSUFFIX) : syr2k.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xsyr2k.$(SUFFIX) xsyr2k.$(PSUFFIX) : syr2k.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -chemm.$(SUFFIX) chemm.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) - -zhemm.$(SUFFIX) zhemm.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) - -xhemm.$(SUFFIX) xhemm.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) - -cherk.$(SUFFIX) cherk.$(PSUFFIX) : syrk.c - $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) - -zherk.$(SUFFIX) zherk.$(PSUFFIX) : syrk.c - $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) - -xherk.$(SUFFIX) xherk.$(PSUFFIX) : syrk.c - $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) - -cher2k.$(SUFFIX) cher2k.$(PSUFFIX) : syr2k.c - $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) - -zher2k.$(SUFFIX) zher2k.$(PSUFFIX) : syr2k.c - $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) - -xher2k.$(SUFFIX) xher2k.$(PSUFFIX) : syr2k.c - $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) - -cgemm3m.$(SUFFIX) cgemm3m.$(PSUFFIX) : gemm.c - $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) - -zgemm3m.$(SUFFIX) zgemm3m.$(PSUFFIX) : gemm.c - $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) - -xgemm3m.$(SUFFIX) xgemm3m.$(PSUFFIX) : gemm.c - $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) - -csymm3m.$(SUFFIX) csymm3m.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) - -zsymm3m.$(SUFFIX) zsymm3m.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) - -xsymm3m.$(SUFFIX) xsymm3m.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) - -chemm3m.$(SUFFIX) chemm3m.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) -DGEMM3M -DHEMM $< -o $(@F) - -zhemm3m.$(SUFFIX) zhemm3m.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) -DGEMM3M -DHEMM $< -o $(@F) - -xhemm3m.$(SUFFIX) xhemm3m.$(PSUFFIX) : symm.c - $(CC) -c $(CFLAGS) -DGEMM3M -DHEMM $< -o $(@F) - -cblas_isamax.$(SUFFIX) cblas_isamax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -cblas_idamax.$(SUFFIX) cblas_idamax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -cblas_icamax.$(SUFFIX) cblas_icamax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -cblas_izamax.$(SUFFIX) cblas_izamax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F) - -cblas_isamin.$(SUFFIX) cblas_isamin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -cblas_idamin.$(SUFFIX) cblas_idamin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -cblas_icamin.$(SUFFIX) cblas_icamin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -cblas_izamin.$(SUFFIX) cblas_izamin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F) - -cblas_ismax.$(SUFFIX) cblas_ismax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -UUSE_MIN $< -o $(@F) - -cblas_idmax.$(SUFFIX) cblas_idmax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -UUSE_MIN $< -o $(@F) - -cblas_ismin.$(SUFFIX) cblas_ismin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -DUSE_MIN $< -o $(@F) - -cblas_idmin.$(SUFFIX) cblas_idmin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -DUSE_MIN $< -o $(@F) - -cblas_icmax.$(SUFFIX) cblas_icmax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -UUSE_MIN $< -o $(@F) - -cblas_izmax.$(SUFFIX) cblas_izmax.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -UUSE_MIN $< -o $(@F) - -cblas_icmin.$(SUFFIX) cblas_icmin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -DUSE_MIN $< -o $(@F) - -cblas_izmin.$(SUFFIX) cblas_izmin.$(PSUFFIX) : imax.c - $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -DUSE_MIN $< -o $(@F) - -cblas_sasum.$(SUFFIX) cblas_sasum.$(PSUFFIX) : asum.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_dasum.$(SUFFIX) cblas_dasum.$(PSUFFIX) : asum.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_scasum.$(SUFFIX) cblas_scasum.$(PSUFFIX) : asum.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_dzasum.$(SUFFIX) cblas_dzasum.$(PSUFFIX) : asum.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_ssum.$(SUFFIX) cblas_ssum.$(PSUFFIX) : sum.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_dsum.$(SUFFIX) cblas_dsum.$(PSUFFIX) : sum.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_scsum.$(SUFFIX) cblas_scsum.$(PSUFFIX) : sum.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_dzsum.$(SUFFIX) cblas_dzsum.$(PSUFFIX) : sum.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_sdsdot.$(SUFFIX) cblas_sdsdot.$(PSUFFIX) : sdsdot.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_dsdot.$(SUFFIX) cblas_dsdot.$(PSUFFIX) : dsdot.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -ifeq ($(BUILD_BFLOAT16),1) -cblas_sbdot.$(SUFFIX) cblas_sbdot.$(PSUFFIX) : bf16dot.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) -cblas_sbstobf16.$(SUFFIX) cblas_sbstobf16.$(PSUFFIX) : tobf16.c - $(CC) $(CFLAGS) -DCBLAS -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) -cblas_sbdtobf16.$(SUFFIX) cblas_sbdtobf16.$(PSUFFIX) : tobf16.c - $(CC) $(CFLAGS) -DCBLAS -USINGLE_PREC -DDOUBLE_PREC -c $< -o $(@F) -cblas_sbf16tos.$(SUFFIX) cblas_sbf16tos.$(PSUFFIX) : bf16to.c - $(CC) $(CFLAGS) -DCBLAS -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) -cblas_dbf16tod.$(SUFFIX) cblas_dbf16tod.$(PSUFFIX) : bf16to.c - $(CC) $(CFLAGS) -DCBLAS -USINGLE_PREC -DDOUBLE_PREC -c $< -o $(@F) -endif - -cblas_sdot.$(SUFFIX) cblas_sdot.$(PSUFFIX) : dot.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_ddot.$(SUFFIX) cblas_ddot.$(PSUFFIX) : dot.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_cdotu.$(SUFFIX) cblas_cdotu.$(PSUFFIX) : zdot.c - $(CC) $(CFLAGS) -DCBLAS -c -UCONJ $< -o $(@F) - -cblas_cdotc.$(SUFFIX) cblas_cdotc.$(PSUFFIX) : zdot.c - $(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F) - -cblas_zdotu.$(SUFFIX) cblas_zdotu.$(PSUFFIX) : zdot.c - $(CC) $(CFLAGS) -DCBLAS -c -UCONJ $< -o $(@F) - -cblas_zdotc.$(SUFFIX) cblas_zdotc.$(PSUFFIX) : zdot.c - $(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F) - -cblas_cdotu_sub.$(SUFFIX) cblas_cdotu_sub.$(PSUFFIX) : zdot.c - $(CC) $(CFLAGS) -DCBLAS -DFORCE_USE_STACK -c -UCONJ $< -o $(@F) - -cblas_cdotc_sub.$(SUFFIX) cblas_cdotc_sub.$(PSUFFIX) : zdot.c - $(CC) $(CFLAGS) -DCBLAS -DFORCE_USE_STACK -c -DCONJ $< -o $(@F) - -cblas_zdotu_sub.$(SUFFIX) cblas_zdotu_sub.$(PSUFFIX) : zdot.c - $(CC) $(CFLAGS) -DCBLAS -DFORCE_USE_STACK -c -UCONJ $< -o $(@F) - -cblas_zdotc_sub.$(SUFFIX) cblas_zdotc_sub.$(PSUFFIX) : zdot.c - $(CC) $(CFLAGS) -DCBLAS -DFORCE_USE_STACK -c -DCONJ $< -o $(@F) - -cblas_snrm2.$(SUFFIX) cblas_snrm2.$(PSUFFIX) : nrm2.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_dnrm2.$(SUFFIX) cblas_dnrm2.$(PSUFFIX) : nrm2.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_scnrm2.$(SUFFIX) cblas_scnrm2.$(PSUFFIX) : nrm2.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_dznrm2.$(SUFFIX) cblas_dznrm2.$(PSUFFIX) : nrm2.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_saxpy.$(SUFFIX) cblas_saxpy.$(PSUFFIX) : axpy.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_zaxpy.$(SUFFIX) cblas_zaxpy.$(PSUFFIX) : zaxpy.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_scopy.$(SUFFIX) cblas_scopy.$(PSUFFIX) : copy.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_dcopy.$(SUFFIX) cblas_dcopy.$(PSUFFIX) : copy.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_ccopy.$(SUFFIX) cblas_ccopy.$(PSUFFIX) : copy.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_zcopy.$(SUFFIX) cblas_zcopy.$(PSUFFIX) : copy.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_sswap.$(SUFFIX) cblas_sswap.$(PSUFFIX) : swap.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_dswap.$(SUFFIX) cblas_dswap.$(PSUFFIX) : swap.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_cswap.$(SUFFIX) cblas_cswap.$(PSUFFIX) : zswap.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_zswap.$(SUFFIX) cblas_zswap.$(PSUFFIX) : zswap.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_srot.$(SUFFIX) cblas_srot.$(PSUFFIX) : rot.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_drot.$(SUFFIX) cblas_drot.$(PSUFFIX) : rot.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_srotg.$(SUFFIX) cblas_srotg.$(PSUFFIX): rotg.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_drotg.$(SUFFIX) cblas_drotg.$(PSUFFIX): rotg.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_crotg.$(SUFFIX) crotg.$(PSUFFIX): zrotg.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - -cblas_zrotg.$(SUFFIX) zrotg.$(PSUFFIX): zrotg.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - -cblas_srotm.$(SUFFIX) cblas_srotm.$(PSUFFIX): rotm.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_drotm.$(SUFFIX) cblas_drotm.$(PSUFFIX): rotm.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_srotmg.$(SUFFIX) cblas_srotmg.$(PSUFFIX): rotmg.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_drotmg.$(SUFFIX) cblas_drotmg.$(PSUFFIX): rotmg.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_sscal.$(SUFFIX) cblas_sscal.$(PSUFFIX) : scal.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_dscal.$(SUFFIX) cblas_dscal.$(PSUFFIX) : scal.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_cscal.$(SUFFIX) cblas_cscal.$(PSUFFIX) : zscal.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_zscal.$(SUFFIX) cblas_zscal.$(PSUFFIX) : zscal.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_csscal.$(SUFFIX) cblas_csscal.$(PSUFFIX) : zscal.c - $(CC) $(CFLAGS) -DCBLAS -c -DSSCAL $< -o $(@F) - -cblas_zdscal.$(SUFFIX) cblas_zdscal.$(PSUFFIX) : zscal.c - $(CC) $(CFLAGS) -DCBLAS -c -DSSCAL $< -o $(@F) - -cblas_csrot.$(SUFFIX) cblas_csrot.$(PSUFFIX) : zrot.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -cblas_zdrot.$(SUFFIX) cblas_zdrot.$(PSUFFIX) : zrot.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -ifeq ($(BUILD_BFLOAT16),1) -cblas_sbgemv.$(SUFFIX) cblas_sbgemv.$(PSUFFIX) : sbgemv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) -endif - -cblas_sgemv.$(SUFFIX) cblas_sgemv.$(PSUFFIX): gemv.c - $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< - -cblas_dgemv.$(SUFFIX) cblas_dgemv.$(PSUFFIX): gemv.c - $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< - -cblas_cgemv.$(SUFFIX) cblas_cgemv.$(PSUFFIX): zgemv.c - $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< - -cblas_zgemv.$(SUFFIX) cblas_zgemv.$(PSUFFIX): zgemv.c - $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< - -cblas_sger.$(SUFFIX) cblas_sger.$(PSUFFIX) : ger.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dger.$(SUFFIX) cblas_dger.$(PSUFFIX) : ger.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_cgeru.$(SUFFIX) cblas_cgeru.$(PSUFFIX) : zger.c - $(CC) -DCBLAS -c $(CFLAGS) -UCONJ $< -o $(@F) - -cblas_cgerc.$(SUFFIX) cblas_cgerc.$(PSUFFIX) : zger.c - $(CC) -DCBLAS -c $(CFLAGS) -DCONJ $< -o $(@F) - -cblas_zgeru.$(SUFFIX) cblas_zgeru.$(PSUFFIX) : zger.c - $(CC) -DCBLAS -c $(CFLAGS) -UCONJ $< -o $(@F) - -cblas_zgerc.$(SUFFIX) cblas_zgerc.$(PSUFFIX) : zger.c - $(CC) -DCBLAS -c $(CFLAGS) -DCONJ $< -o $(@F) - -cblas_strsv.$(SUFFIX) cblas_strsv.$(PSUFFIX) : trsv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dtrsv.$(SUFFIX) cblas_dtrsv.$(PSUFFIX) : trsv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ctrsv.$(SUFFIX) cblas_ctrsv.$(PSUFFIX) : ztrsv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ztrsv.$(SUFFIX) cblas_ztrsv.$(PSUFFIX) : ztrsv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_strmv.$(SUFFIX) cblas_strmv.$(PSUFFIX) : trmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dtrmv.$(SUFFIX) cblas_dtrmv.$(PSUFFIX) : trmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ctrmv.$(SUFFIX) cblas_ctrmv.$(PSUFFIX) : ztrmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ztrmv.$(SUFFIX) cblas_ztrmv.$(PSUFFIX) : ztrmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ssyr.$(SUFFIX) cblas_ssyr.$(PSUFFIX) : syr.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dsyr.$(SUFFIX) cblas_dsyr.$(PSUFFIX) : syr.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_cher.$(SUFFIX) cblas_cher.$(PSUFFIX) : zher.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_zher.$(SUFFIX) cblas_zher.$(PSUFFIX) : zher.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ssyr2.$(SUFFIX) cblas_ssyr2.$(PSUFFIX) : syr2.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dsyr2.$(SUFFIX) cblas_dsyr2.$(PSUFFIX) : syr2.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_cher2.$(SUFFIX) cblas_cher2.$(PSUFFIX) : zher2.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_zher2.$(SUFFIX) cblas_zher2.$(PSUFFIX) : zher2.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_sgbmv.$(SUFFIX) cblas_sgbmv.$(PSUFFIX): gbmv.c - $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< - -cblas_dgbmv.$(SUFFIX) cblas_dgbmv.$(PSUFFIX): gbmv.c - $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< - -cblas_cgbmv.$(SUFFIX) cblas_cgbmv.$(PSUFFIX): zgbmv.c - $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< - -cblas_zgbmv.$(SUFFIX) cblas_zgbmv.$(PSUFFIX): zgbmv.c - $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< - -cblas_ssbmv.$(SUFFIX) cblas_ssbmv.$(PSUFFIX) : sbmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dsbmv.$(SUFFIX) cblas_dsbmv.$(PSUFFIX) : sbmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_chbmv.$(SUFFIX) cblas_chbmv.$(PSUFFIX) : zhbmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_zhbmv.$(SUFFIX) cblas_zhbmv.$(PSUFFIX) : zhbmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_sspmv.$(SUFFIX) cblas_sspmv.$(PSUFFIX) : spmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dspmv.$(SUFFIX) cblas_dspmv.$(PSUFFIX) : spmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_sspr.$(SUFFIX) cblas_sspr.$(PSUFFIX) : spr.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dspr.$(SUFFIX) cblas_dspr.$(PSUFFIX) : spr.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_chpr.$(SUFFIX) cblas_chpr.$(PSUFFIX) : zhpr.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_zhpr.$(SUFFIX) cblas_zhpr.$(PSUFFIX) : zhpr.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_sspr2.$(SUFFIX) cblas_sspr2.$(PSUFFIX) : spr2.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dspr2.$(SUFFIX) cblas_dspr2.$(PSUFFIX) : spr2.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_chpr2.$(SUFFIX) cblas_chpr2.$(PSUFFIX) : zhpr2.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_zhpr2.$(SUFFIX) cblas_zhpr2.$(PSUFFIX) : zhpr2.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_stbmv.$(SUFFIX) cblas_stbmv.$(PSUFFIX) : tbmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dtbmv.$(SUFFIX) cblas_dtbmv.$(PSUFFIX) : tbmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ctbmv.$(SUFFIX) cblas_ctbmv.$(PSUFFIX) : ztbmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ztbmv.$(SUFFIX) cblas_ztbmv.$(PSUFFIX) : ztbmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_stbsv.$(SUFFIX) cblas_stbsv.$(PSUFFIX) : tbsv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dtbsv.$(SUFFIX) cblas_dtbsv.$(PSUFFIX) : tbsv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ctbsv.$(SUFFIX) cblas_ctbsv.$(PSUFFIX) : ztbsv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ztbsv.$(SUFFIX) cblas_ztbsv.$(PSUFFIX) : ztbsv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_stpmv.$(SUFFIX) cblas_stpmv.$(PSUFFIX) : tpmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dtpmv.$(SUFFIX) cblas_dtpmv.$(PSUFFIX) : tpmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ctpmv.$(SUFFIX) cblas_ctpmv.$(PSUFFIX) : ztpmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ztpmv.$(SUFFIX) cblas_ztpmv.$(PSUFFIX) : ztpmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_chpmv.$(SUFFIX) cblas_chpmv.$(PSUFFIX) : zhpmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_zhpmv.$(SUFFIX) cblas_zhpmv.$(PSUFFIX) : zhpmv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_stpsv.$(SUFFIX) cblas_stpsv.$(PSUFFIX) : tpsv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dtpsv.$(SUFFIX) cblas_dtpsv.$(PSUFFIX) : tpsv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ctpsv.$(SUFFIX) cblas_ctpsv.$(PSUFFIX) : ztpsv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ztpsv.$(SUFFIX) cblas_ztpsv.$(PSUFFIX) : ztpsv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ssymv.$(SUFFIX) cblas_ssymv.$(PSUFFIX) : symv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dsymv.$(SUFFIX) cblas_dsymv.$(PSUFFIX) : symv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_chemv.$(SUFFIX) cblas_chemv.$(PSUFFIX) : zhemv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_zhemv.$(SUFFIX) cblas_zhemv.$(PSUFFIX) : zhemv.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_sgemm.$(SUFFIX) cblas_sgemm.$(PSUFFIX) : gemm.c ../param.h - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -ifeq ($(BUILD_BFLOAT16),1) -cblas_sbgemm.$(SUFFIX) cblas_sbgemm.$(PSUFFIX) : gemm.c ../param.h - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) -endif - -cblas_dgemm.$(SUFFIX) cblas_dgemm.$(PSUFFIX) : gemm.c ../param.h - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_cgemm.$(SUFFIX) cblas_cgemm.$(PSUFFIX) : gemm.c ../param.h - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_zgemm.$(SUFFIX) cblas_zgemm.$(PSUFFIX) : gemm.c ../param.h - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ssymm.$(SUFFIX) cblas_ssymm.$(PSUFFIX) : symm.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dsymm.$(SUFFIX) cblas_dsymm.$(PSUFFIX) : symm.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_csymm.$(SUFFIX) cblas_csymm.$(PSUFFIX) : symm.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_zsymm.$(SUFFIX) cblas_zsymm.$(PSUFFIX) : symm.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ssyrk.$(SUFFIX) cblas_ssyrk.$(PSUFFIX) : syrk.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dsyrk.$(SUFFIX) cblas_dsyrk.$(PSUFFIX) : syrk.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_csyrk.$(SUFFIX) cblas_csyrk.$(PSUFFIX) : syrk.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_zsyrk.$(SUFFIX) cblas_zsyrk.$(PSUFFIX) : syrk.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ssyr2k.$(SUFFIX) cblas_ssyr2k.$(PSUFFIX) : syr2k.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dsyr2k.$(SUFFIX) cblas_dsyr2k.$(PSUFFIX) : syr2k.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_csyr2k.$(SUFFIX) cblas_csyr2k.$(PSUFFIX) : syr2k.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_zsyr2k.$(SUFFIX) cblas_zsyr2k.$(PSUFFIX) : syr2k.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_strmm.$(SUFFIX) cblas_strmm.$(PSUFFIX) : trsm.c - $(CC) -DCBLAS -c $(CFLAGS) -DTRMM $< -o $(@F) - -cblas_dtrmm.$(SUFFIX) cblas_dtrmm.$(PSUFFIX) : trsm.c - $(CC) -DCBLAS -c $(CFLAGS) -DTRMM $< -o $(@F) - -cblas_ctrmm.$(SUFFIX) cblas_ctrmm.$(PSUFFIX) : trsm.c - $(CC) -DCBLAS -c $(CFLAGS) -DTRMM $< -o $(@F) - -cblas_ztrmm.$(SUFFIX) cblas_ztrmm.$(PSUFFIX) : trsm.c - $(CC) -DCBLAS -c $(CFLAGS) -DTRMM $< -o $(@F) - -cblas_strsm.$(SUFFIX) cblas_strsm.$(PSUFFIX) : trsm.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_dtrsm.$(SUFFIX) cblas_dtrsm.$(PSUFFIX) : trsm.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ctrsm.$(SUFFIX) cblas_ctrsm.$(PSUFFIX) : trsm.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_ztrsm.$(SUFFIX) cblas_ztrsm.$(PSUFFIX) : trsm.c - $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) - -cblas_chemm.$(SUFFIX) cblas_chemm.$(PSUFFIX) : symm.c - $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) - -cblas_zhemm.$(SUFFIX) cblas_zhemm.$(PSUFFIX) : symm.c - $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) - -cblas_cherk.$(SUFFIX) cblas_cherk.$(PSUFFIX) : syrk.c - $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) - -cblas_zherk.$(SUFFIX) cblas_zherk.$(PSUFFIX) : syrk.c - $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) - -cblas_cher2k.$(SUFFIX) cblas_cher2k.$(PSUFFIX) : syr2k.c - $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) - -cblas_zher2k.$(SUFFIX) cblas_zher2k.$(PSUFFIX) : syr2k.c - $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) - -cblas_cgemm3m.$(SUFFIX) cblas_cgemm3m.$(PSUFFIX) : gemm.c - $(CC) -DCBLAS -c $(CFLAGS) -DGEMM3M $< -o $(@F) - -cblas_zgemm3m.$(SUFFIX) cblas_zgemm3m.$(PSUFFIX) : gemm.c - $(CC) -DCBLAS -c $(CFLAGS) -DGEMM3M $< -o $(@F) - - -sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : lapack/getf2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : lapack/getf2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qgetf2.$(SUFFIX) qgetf2.$(PSUFFIX) : getf2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : lapack/zgetf2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : lapack/zgetf2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xgetf2.$(SUFFIX) xgetf2.$(PSUFFIX) : zgetf2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : lapack/getrf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : lapack/getrf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qgetrf.$(SUFFIX) qgetrf.$(PSUFFIX) : getrf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : lapack/zgetrf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : lapack/zgetrf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xgetrf.$(SUFFIX) xgetrf.$(PSUFFIX) : zgetrf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -slauu2.$(SUFFIX) slauu2.$(PSUFFIX) : lapack/lauu2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dlauu2.$(SUFFIX) dlauu2.$(PSUFFIX) : lapack/lauu2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qlauu2.$(SUFFIX) qlauu2.$(PSUFFIX) : lauu2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -clauu2.$(SUFFIX) clauu2.$(PSUFFIX) : lapack/zlauu2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zlauu2.$(SUFFIX) zlauu2.$(PSUFFIX) : lapack/zlauu2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xlauu2.$(SUFFIX) xlauu2.$(PSUFFIX) : zlauu2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -slauum.$(SUFFIX) slauum.$(PSUFFIX) : lapack/lauum.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dlauum.$(SUFFIX) dlauum.$(PSUFFIX) : lapack/lauum.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qlauum.$(SUFFIX) qlauum.$(PSUFFIX) : lauum.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -clauum.$(SUFFIX) clauum.$(PSUFFIX) : lapack/zlauum.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : lapack/zlauum.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xlauum.$(SUFFIX) xlauum.$(PSUFFIX) : zlauum.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : lapack/potf2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : lapack/potf2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qpotf2.$(SUFFIX) qpotf2.$(PSUFFIX) : potf2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : lapack/zpotf2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : lapack/zpotf2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xpotf2.$(SUFFIX) xpotf2.$(PSUFFIX) : zpotf2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : lapack/potrf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : lapack/potrf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qpotrf.$(SUFFIX) qpotrf.$(PSUFFIX) : potrf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : lapack/zpotrf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : lapack/zpotrf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xpotrf.$(SUFFIX) xpotrf.$(PSUFFIX) : zpotrf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -strti2.$(SUFFIX) strti2.$(PSUFFIX) : lapack/trti2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dtrti2.$(SUFFIX) dtrti2.$(PSUFFIX) : lapack/trti2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qtrti2.$(SUFFIX) qtrti2.$(PSUFFIX) : trti2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ctrti2.$(SUFFIX) ctrti2.$(PSUFFIX) : lapack/ztrti2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ztrti2.$(SUFFIX) ztrti2.$(PSUFFIX) : lapack/ztrti2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xtrti2.$(SUFFIX) xtrti2.$(PSUFFIX) : ztrti2.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -strtri.$(SUFFIX) strtri.$(PSUFFIX) : lapack/trtri.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dtrtri.$(SUFFIX) dtrtri.$(PSUFFIX) : lapack/trtri.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qtrtri.$(SUFFIX) qtrtri.$(PSUFFIX) : trtri.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ctrtri.$(SUFFIX) ctrtri.$(PSUFFIX) : lapack/ztrtri.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : lapack/ztrtri.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xtrtri.$(SUFFIX) xtrtri.$(PSUFFIX) : ztrtri.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : lapack/laswp.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : lapack/laswp.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qlaswp.$(SUFFIX) qlaswp.$(PSUFFIX) : laswp.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -claswp.$(SUFFIX) claswp.$(PSUFFIX) : lapack/zlaswp.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : lapack/zlaswp.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xlaswp.$(SUFFIX) xlaswp.$(PSUFFIX) : zlaswp.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : lapack/getrs.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : lapack/getrs.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qgetrs.$(SUFFIX) qgetrs.$(PSUFFIX) : lapack/getrs.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : lapack/zgetrs.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : lapack/zgetrs.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : lapack/zgetrs.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -strtrs.$(SUFFIX) strtrs.$(PSUFFIX) : lapack/trtrs.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dtrtrs.$(SUFFIX) dtrtrs.$(PSUFFIX) : lapack/trtrs.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qtrtrs.$(SUFFIX) qtrtrs.$(PSUFFIX) : lapack/trtrs.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ctrtrs.$(SUFFIX) ctrtrs.$(PSUFFIX) : lapack/ztrtrs.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -ztrtrs.$(SUFFIX) ztrtrs.$(PSUFFIX) : lapack/ztrtrs.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xtrtrs.$(SUFFIX) xtrtrs.$(PSUFFIX) : lapack/ztrtrs.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : lapack/gesv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : lapack/gesv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qgesv.$(SUFFIX) qgesv.$(PSUFFIX) : gesv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : lapack/gesv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : lapack/gesv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xgesv.$(SUFFIX) xgesv.$(PSUFFIX) : gesv.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -spotri.$(SUFFIX) spotri.$(PSUFFIX) : lapack/potri.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dpotri.$(SUFFIX) dpotri.$(PSUFFIX) : lapack/potri.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qpotri.$(SUFFIX) qpotri.$(PSUFFIX) : potri.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cpotri.$(SUFFIX) cpotri.$(PSUFFIX) : lapack/zpotri.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zpotri.$(SUFFIX) zpotri.$(PSUFFIX) : lapack/zpotri.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xpotri.$(SUFFIX) xpotri.$(PSUFFIX) : zpotri.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -slarf.$(SUFFIX) slarf.$(PSUFFIX) : larf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dlarf.$(SUFFIX) dlarf.$(PSUFFIX) : larf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -qlarf.$(SUFFIX) qlarf.$(PSUFFIX) : larf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -clarf.$(SUFFIX) clarf.$(PSUFFIX) : larf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zlarf.$(SUFFIX) zlarf.$(PSUFFIX) : larf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -xlarf.$(SUFFIX) xlarf.$(PSUFFIX) : larf.c - $(CC) -c $(CFLAGS) $< -o $(@F) - - -############# BLAS EXTENSIONS ##################################### - -daxpby.$(SUFFIX) daxpby.$(PSUFFIX) : axpby.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -cblas_daxpby.$(SUFFIX) cblas_daxpby.$(PSUFFIX) : axpby.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -saxpby.$(SUFFIX) saxpby.$(PSUFFIX) : axpby.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -cblas_saxpby.$(SUFFIX) cblas_saxpby.$(PSUFFIX) : axpby.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -zaxpby.$(SUFFIX) zaxpby.$(PSUFFIX) : zaxpby.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -cblas_zaxpby.$(SUFFIX) cblas_zaxpby.$(PSUFFIX) : zaxpby.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -caxpby.$(SUFFIX) caxpby.$(PSUFFIX) : zaxpby.c - $(CC) $(CFLAGS) -c $< -o $(@F) - -cblas_caxpby.$(SUFFIX) cblas_caxpby.$(PSUFFIX) : zaxpby.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - -domatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : omatcopy.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cblas_domatcopy.$(SUFFIX) cblas_domatcopy.$(PSUFFIX) : omatcopy.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - -somatcopy.$(SUFFIX) somatcopy.$(PSUFFIX) : omatcopy.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cblas_somatcopy.$(SUFFIX) cblas_somatcopy.$(PSUFFIX) : omatcopy.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - -comatcopy.$(SUFFIX) comatcopy.$(PSUFFIX) : zomatcopy.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cblas_comatcopy.$(SUFFIX) cblas_comatcopy.$(PSUFFIX) : zomatcopy.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - -zomatcopy.$(SUFFIX) zomatcopy.$(PSUFFIX) : zomatcopy.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cblas_zomatcopy.$(SUFFIX) cblas_zomatcopy.$(PSUFFIX) : zomatcopy.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - -dimatcopy.$(SUFFIX) dimatcopy.$(PSUFFIX) : imatcopy.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cblas_dimatcopy.$(SUFFIX) cblas_dimatcopy.$(PSUFFIX) : imatcopy.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - -simatcopy.$(SUFFIX) simatcopy.$(PSUFFIX) : imatcopy.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cblas_simatcopy.$(SUFFIX) cblas_simatcopy.$(PSUFFIX) : imatcopy.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - -cimatcopy.$(SUFFIX) cimatcopy.$(PSUFFIX) : zimatcopy.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cblas_cimatcopy.$(SUFFIX) cblas_cimatcopy.$(PSUFFIX) : zimatcopy.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - -zimatcopy.$(SUFFIX) zimatcopy.$(PSUFFIX) : zimatcopy.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cblas_zimatcopy.$(SUFFIX) cblas_zimatcopy.$(PSUFFIX) : zimatcopy.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - -sgeadd.$(SUFFIX) sgeadd.$(PSUFFIX) : geadd.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -dgeadd.$(SUFFIX) dgeadd.$(PSUFFIX) : geadd.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cgeadd.$(SUFFIX) cgeadd.$(PSUFFIX) : zgeadd.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -zgeadd.$(SUFFIX) zgeadd.$(PSUFFIX) : zgeadd.c - $(CC) -c $(CFLAGS) $< -o $(@F) - -cblas_sgeadd.$(SUFFIX) cblas_sgeadd.$(PSUFFIX) : geadd.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - -cblas_dgeadd.$(SUFFIX) cblas_dgeadd.$(PSUFFIX) : geadd.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - -cblas_cgeadd.$(SUFFIX) cblas_cgeadd.$(PSUFFIX) : zgeadd.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - -cblas_zgeadd.$(SUFFIX) cblas_zgeadd.$(PSUFFIX) : zgeadd.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - -cblas_xerbla.$(SUFFIX) cblas_xerbla.$(PSUFFIX) : xerbla.c - $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) - + @rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib + @rm -f cblas.tmp cblas.tmp2 + @touch $(NETLIB_LAPACK_DIR)/make.inc + @$(MAKE) -C $(NETLIB_LAPACK_DIR) clean + @rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h + @$(MAKE) -C relapack clean + @rm -f *.grd Makefile.conf_last config_last.h + @(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out testing_results.txt) + @echo Done. From a8f249458de25e3dfcde1826a2a8c746270db398 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 13 Jan 2021 00:29:38 +0100 Subject: [PATCH 045/134] Build CBLAS interfaces for CROTG and ZROTG as well --- interface/Makefile | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/interface/Makefile b/interface/Makefile index 1a440c9c3..fab403c82 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -316,7 +316,7 @@ CCBLAS1OBJS = \ cblas_cscal.$(SUFFIX) cblas_csscal.$(SUFFIX) \ cblas_cswap.$(SUFFIX) cblas_scnrm2.$(SUFFIX) \ cblas_caxpby.$(SUFFIX) \ - cblas_icmin.$(SUFFIX) cblas_icmax.$(SUFFIX) cblas_scsum.$(SUFFIX) cblas_csrot.$(SUFFIX) + cblas_icmin.$(SUFFIX) cblas_icmax.$(SUFFIX) cblas_scsum.$(SUFFIX) cblas_csrot.$(SUFFIX) cblas_crotg.$(SUFFIX) CCBLAS2OBJS = \ cblas_cgemv.$(SUFFIX) cblas_cgerc.$(SUFFIX) cblas_cgeru.$(SUFFIX) \ @@ -346,7 +346,7 @@ CZBLAS1OBJS = \ cblas_zscal.$(SUFFIX) cblas_zdscal.$(SUFFIX) \ cblas_zswap.$(SUFFIX) cblas_dznrm2.$(SUFFIX) \ cblas_zaxpby.$(SUFFIX) \ - cblas_izmin.$(SUFFIX) cblas_izmax.$(SUFFIX) cblas_dzsum.$(SUFFIX) cblas_zdrot.$(SUFFIX) + cblas_izmin.$(SUFFIX) cblas_izmax.$(SUFFIX) cblas_dzsum.$(SUFFIX) cblas_zdrot.$(SUFFIX) cblas_zrotg.$(SUFFIX) CZBLAS2OBJS = \ @@ -1634,6 +1634,12 @@ cblas_srotg.$(SUFFIX) cblas_srotg.$(PSUFFIX): rotg.c cblas_drotg.$(SUFFIX) cblas_drotg.$(PSUFFIX): rotg.c $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) +cblas_crotg.$(SUFFIX) crotg.$(PSUFFIX): zrotg.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +cblas_zrotg.$(SUFFIX) zrotg.$(PSUFFIX): zrotg.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + cblas_srotm.$(SUFFIX) cblas_srotm.$(PSUFFIX): rotm.c $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) From 25c986db5ac17cfacf5c12469545ab7ad64c5af9 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 13 Jan 2021 00:30:27 +0100 Subject: [PATCH 046/134] Add prototypes for CBLAS_CROTG and CBLAS_ZROTG --- cblas.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cblas.h b/cblas.h index 8aafdb186..f0220eb99 100644 --- a/cblas.h +++ b/cblas.h @@ -130,6 +130,9 @@ void cblas_zdrot(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONS void cblas_srotg(float *a, float *b, float *c, float *s); void cblas_drotg(double *a, double *b, double *c, double *s); +void cblas_crotg(void *a, void *b, float *c, void *s); +void cblas_zrotg(void *a, void *b, double *c, void *s); + void cblas_srotm(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float *P); void cblas_drotm(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double *P); From da8d7f09f18efc3101dfac4e1ef1c9413a15f71b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 13 Jan 2021 09:46:53 +0100 Subject: [PATCH 047/134] try to work around gcc update problems --- .github/workflows/nightly-Homebrew-build.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/nightly-Homebrew-build.yml b/.github/workflows/nightly-Homebrew-build.yml index 8d7cfea2d..b025f8634 100644 --- a/.github/workflows/nightly-Homebrew-build.yml +++ b/.github/workflows/nightly-Homebrew-build.yml @@ -44,6 +44,11 @@ jobs: if: github.event_name != 'pull_request' run: brew update || true + - name: unlink installed gcc to allow updating + run: | + brew unlink gcc@8 + brew unlink gcc@9 + - name: Install prerequisites run: brew install --fetch-HEAD --HEAD --only-dependencies --keep-tmp openblas From 89ae305e11dacb4622f58b03e48b4bb361acf94c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 13 Jan 2021 12:30:26 +0100 Subject: [PATCH 048/134] Workaround for cmake having its own C_COMPILER variable --- cmake/utils.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 8f25c1b27..29b5a067b 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -74,6 +74,9 @@ macro(ParseMakefileVars MAKEFILE_IN) string(REGEX MATCH "ifneq \\(\\$\\(([_A-Z]+)\\),[ \t]*([0-9_A-Z]+)\\)" line_match "${makefile_line}") if (NOT "${line_match}" STREQUAL "") # message(STATUS "IFNEQ: ${line_match} first: ${CMAKE_MATCH_1} second: ${CMAKE_MATCH_2}") + if ( ${CMAKE_MATCH_1} STREQUAL C_COMPILER) + set (CMAKE_MATCH_1 CMAKE_C_COMPILER) + endif () if (NOT ( ${${CMAKE_MATCH_1}} STREQUAL ${CMAKE_MATCH_2})) # message (STATUS "condition is true") set (IfElse 1) From 6bbe6d5b9203c92394463b8a96fd4995db73d9f4 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 13 Jan 2021 22:36:04 +0100 Subject: [PATCH 049/134] Make compile-time BUFFERSIZE setting actually reach the compiler/preprocessor --- Makefile.system | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile.system b/Makefile.system index ca0879fe6..abc2c3dc5 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1279,6 +1279,10 @@ CCOMMON_OPT += -DUSE_PAPI EXTRALIB += -lpapi -lperfctr endif +ifdef BUFFERSIZE +CCOMMON_OPT += -DBUFFERSIZE=$(BUFFERSIZE) +endif + ifdef DYNAMIC_THREADS CCOMMON_OPT += -DDYNAMIC_THREADS endif From e3f40636839eddd79fe1260010464dd7fe03e772 Mon Sep 17 00:00:00 2001 From: Albert Ziegenhagel Date: Thu, 14 Jan 2021 10:00:49 +0100 Subject: [PATCH 050/134] Fix building "generic" TRMM kernel with CMake The CMake "TARGET_CORE" variables stores the "generic" target name in all lowercase letters, but gets compared to an all uppercase string, which results in the wrong TRMM kernel being selected. This commit converts the TARGET_CORE to all uppercase before comparing its value to make sure case mismatches are not an issue in the future anymore. --- kernel/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 6d8d759ad..f0793bdef 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -187,10 +187,11 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) endif () # Makefile.L3 set(USE_TRMM false) - if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) OR (TARGET_CORE MATCHES COOPERLAKE)) + string(TOUPPER ${TARGET_CORE} UC_TARGET_CORE) + if (ARM OR ARM64 OR (UC_TARGET_CORE MATCHES LONGSOON3B) OR (UC_TARGET_CORE MATCHES GENERIC) OR (UC_TARGET_CORE MATCHES HASWELL) OR (UC_TARGET_CORE MATCHES ZEN) OR (UC_TARGET_CORE MATCHES SKYLAKEX) OR (UC_TARGET_CORE MATCHES COOPERLAKE)) set(USE_TRMM true) endif () - if (ZARCH OR (TARGET_CORE MATCHES POWER8) OR (TARGET_CORE MATCHES POWER9) OR (TARGET_CORE MATCHES POWER10)) + if (ZARCH OR (UC_TARGET_CORE MATCHES POWER8) OR (UC_TARGET_CORE MATCHES POWER9) OR (UC_TARGET_CORE MATCHES POWER10)) set(USE_TRMM true) endif () From 202fc9e8ed509224761e9c310e3ca0b5a3346134 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Thu, 14 Jan 2021 19:40:31 -0700 Subject: [PATCH 051/134] Fix uninitialized argument value in dasum_k --- kernel/x86_64/dasum.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/x86_64/dasum.c b/kernel/x86_64/dasum.c index ddec21383..534f257d2 100644 --- a/kernel/x86_64/dasum.c +++ b/kernel/x86_64/dasum.c @@ -93,7 +93,6 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) #if defined(SMP) int nthreads; FLOAT dummy_alpha; - FLOAT * dummy_b; #endif FLOAT sumf = 0.0; @@ -115,7 +114,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) #else mode = BLAS_DOUBLE | BLAS_REAL; #endif - blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, dummy_b, 0, result, 0, (void *)asum_thread_function, nthreads); + blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, NULL, 0, result, 0, (void *)asum_thread_function, nthreads); ptr = (FLOAT *)result; for (i = 0; i < nthreads; i++) { sumf += (*ptr); From 6f32991eae430b37137f4635d7627b5fecbd24c7 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Thu, 14 Jan 2021 19:40:31 -0700 Subject: [PATCH 052/134] Don't define the mode variable when not needed in gemm functions --- interface/gemm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/interface/gemm.c b/interface/gemm.c index 860e588fe..6fde69049 100644 --- a/interface/gemm.c +++ b/interface/gemm.c @@ -246,6 +246,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS #ifdef SMP double MNK; +#if defined(USE_SIMPLE_THREADED_LEVEL3) || !defined(NO_AFFINITY) #ifndef COMPLEX #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_REAL; @@ -264,6 +265,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS #endif #endif #endif +#endif #if defined(SMP) && !defined(NO_AFFINITY) && !defined(USE_SIMPLE_THREADED_LEVEL3) int nodes; @@ -417,8 +419,10 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS sb = (XFLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); #ifdef SMP +#if defined(USE_SIMPLE_THREADED_LEVEL3) || !defined(NO_AFFINITY) mode |= (transa << BLAS_TRANSA_SHIFT); mode |= (transb << BLAS_TRANSB_SHIFT); +#endif MNK = (double) args.m * (double) args.n * (double) args.k; if ( MNK <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) ) From f1bf2603e6435202b600fb8e7dab3e4d124acb61 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Thu, 14 Jan 2021 19:40:32 -0700 Subject: [PATCH 053/134] Remove dead assignment to dflag in rotmg functions --- interface/rotmg.c | 1 - 1 file changed, 1 deletion(-) diff --git a/interface/rotmg.c b/interface/rotmg.c index ce3b146c1..3a5ca8f95 100644 --- a/interface/rotmg.c +++ b/interface/rotmg.c @@ -107,7 +107,6 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ dq1 = dp1 * *dx1; if(ABS(dq1) > ABS(dq2)) { - dflag = ZERO; dh11 = ONE; dh22 = ONE; dh21 = - dy1 / *dx1; From eff7c9166ecea213b99384ea8923ea08d7445398 Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Fri, 15 Jan 2021 13:40:34 -0600 Subject: [PATCH 054/134] Optimize cdot function for POWER10 This patch makes use of new POWER10 vector pair instructions for loads and stores. --- kernel/power/KERNEL.POWER10 | 4 - kernel/power/cdot.c | 8 ++ kernel/power/cdot_microk_power10.c | 177 +++++++++++++++++++++++++++++ 3 files changed, 185 insertions(+), 4 deletions(-) create mode 100644 kernel/power/cdot_microk_power10.c diff --git a/kernel/power/KERNEL.POWER10 b/kernel/power/KERNEL.POWER10 index d61f5194a..1cf7b0b7c 100644 --- a/kernel/power/KERNEL.POWER10 +++ b/kernel/power/KERNEL.POWER10 @@ -154,11 +154,7 @@ ZCOPYKERNEL = zcopy_power10.c SDOTKERNEL = sdot_power10.c DDOTKERNEL = ddot_power10.c DSDOTKERNEL = sdot_power10.c -ifneq ($(GCCVERSIONGTEQ9),1) -CDOTKERNEL = cdot_power9.S -else CDOTKERNEL = cdot.c -endif ZDOTKERNEL = zdot.c # SNRM2KERNEL = ../arm/nrm2.c diff --git a/kernel/power/cdot.c b/kernel/power/cdot.c index ef5e4710f..c53fe0c02 100644 --- a/kernel/power/cdot.c +++ b/kernel/power/cdot.c @@ -28,6 +28,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #include "common.h" +#if defined(POWER10) +#include "cdot_microk_power10.c" +#else #ifndef HAVE_KERNEL_8 #include @@ -99,6 +102,7 @@ static void cdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, float *dot) } #endif +#endif OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) { @@ -116,7 +120,11 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA if ((inc_x == 1) && (inc_y == 1)) { +#if defined(POWER10) + BLASLONG n1 = n & -16; +#else BLASLONG n1 = n & -8; +#endif BLASLONG j=0; if (n1){ diff --git a/kernel/power/cdot_microk_power10.c b/kernel/power/cdot_microk_power10.c new file mode 100644 index 000000000..399f2b180 --- /dev/null +++ b/kernel/power/cdot_microk_power10.c @@ -0,0 +1,177 @@ +/*************************************************************************** +Copyright (c) 2021, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#define HAVE_KERNEL_8 1 + +static void cdot_kernel_8 (long n, float *x, float *y, float *dot) +{ + __vector unsigned char mask = { 11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4}; + __asm__ + ( + "dcbt 0, %2 \n\t" + "dcbt 0, %3 \n\t" + + "xxlxor 32, 32, 32 \n\t" + "xxlxor 33, 33, 33 \n\t" + "xxlxor 34, 34, 34 \n\t" + "xxlxor 35, 35, 35 \n\t" + "xxlxor 36, 36, 36 \n\t" + "xxlxor 37, 37, 37 \n\t" + "xxlxor 38, 38, 38 \n\t" + "xxlxor 39, 39, 39 \n\t" + + "lxvp 40, 0(%2) \n\t" + "lxvp 42, 32(%2) \n\t" + "lxvp 44, 64(%2) \n\t" + "lxvp 46, 96(%2) \n\t" + "lxvp 48, 0(%3) \n\t" + "lxvp 50, 32(%3) \n\t" + "lxvp 52, 64(%3) \n\t" + "lxvp 54, 96(%3) \n\t" + + "xxperm 56, 48, %x7 \n\t" + "xxperm 57, 49, %x7 \n\t" + "xxperm 58, 50, %x7 \n\t" + "xxperm 59, 51, %x7 \n\t" + + "xxperm 60, 52, %x7 \n\t" + "xxperm 61, 53, %x7 \n\t" + "xxperm 62, 54, %x7 \n\t" + "xxperm 63, 55, %x7 \n\t" + + "addi %2, %2, 128 \n\t" + "addi %3, %3, 128 \n\t" + + "addic. %1, %1, -16 \n\t" + "ble two%= \n\t" + + ".align 5 \n" + "one%=: \n\t" + + "xvmaddasp 32, 40, 48 \n\t" // x0_r * y0_r , x0_i * y0_i + "xvmaddasp 34, 41, 49 \n\t" // x1_r * y1_r , x1_i * y1_i + "lxvp 48, 0(%3) \n\t" + + "xvmaddasp 36, 42, 50 \n\t" // x2_r * y2_r , x2_i * y2_i + "xvmaddasp 38, 43, 51 \n\t" // x3_r * y3_r , x3_i * y3_i + "lxvp 50, 32(%3) \n\t" + + "xvmaddasp 33, 40, 56 \n\t" // x0_r * y0_i , x0_i * y0_r + "xvmaddasp 35, 41, 57 \n\t" // x1_r * y1_i , x1_i * y1_r + "lxvp 40, 0(%2) \n\t" + + "xvmaddasp 37, 42, 58 \n\t" // x2_r * y2_i , x2_i * y2_r + "xvmaddasp 39, 43, 59 \n\t" // x3_r * y3_i , x3_i * y3_r + "lxvp 42, 32(%2) \n\t" + + "xxperm 56, 48, %x7 \n\t" + "xxperm 57, 49, %x7 \n\t" + "xxperm 58, 50, %x7 \n\t" + "xxperm 59, 51, %x7 \n\t" + + "xvmaddasp 32, 44, 52 \n\t" // x0_r * y0_r , x0_i * y0_i + "xvmaddasp 34, 45, 53 \n\t" // x1_r * y1_r , x1_i * y1_i + "lxvp 52, 64(%3) \n\t" + + "xvmaddasp 36, 46, 54 \n\t" // x2_r * y2_r , x2_i * y2_i + "xvmaddasp 38, 47, 55 \n\t" // x3_r * y3_r , x3_i * y3_i + "lxvp 54, 96(%3) \n\t" + + "xvmaddasp 33, 44, 60 \n\t" // x0_r * y0_i , x0_i * y0_r + "xvmaddasp 35, 45, 61 \n\t" // x1_r * y1_i , x1_i * y1_r + "lxvp 44, 64(%2) \n\t" + "xvmaddasp 37, 46, 62 \n\t" // x2_r * y2_i , x2_i * y2_r + "xvmaddasp 39, 47, 63 \n\t" // x3_r * y3_i , x3_i * y3_r + "lxvp 46, 96(%2) \n\t" + + "xxperm 60, 52, %x7 \n\t" + "xxperm 61, 53, %x7 \n\t" + "xxperm 62, 54, %x7 \n\t" + "xxperm 63, 55, %x7 \n\t" + + "addi %2, %2, 128 \n\t" + "addi %3, %3, 128 \n\t" + + "addic. %1, %1, -16 \n\t" + "bgt one%= \n" + + "two%=: \n\t" + + "xvmaddasp 32, 40, 48 \n\t" // x0_r * y0_r , x0_i * y0_i + "xvmaddasp 34, 41, 49 \n\t" // x1_r * y1_r , x1_i * y1_i + "xvmaddasp 36, 42, 50 \n\t" // x2_r * y2_r , x2_i * y2_i + "xvmaddasp 38, 43, 51 \n\t" // x3_r * y3_r , x3_i * y3_i + + "xvmaddasp 33, 40, 56 \n\t" // x0_r * y0_i , x0_i * y0_r + "xvmaddasp 35, 41, 57 \n\t" // x1_r * y1_i , x1_i * y1_r + "xvmaddasp 37, 42, 58 \n\t" // x2_r * y2_i , x2_i * y2_r + "xvmaddasp 39, 43, 59 \n\t" // x3_r * y3_i , x3_i * y3_r + + "xvmaddasp 32, 44, 52 \n\t" // x0_r * y0_r , x0_i * y0_i + "xvmaddasp 34, 45, 53 \n\t" // x1_r * y1_r , x1_i * y1_i + "xvmaddasp 36, 46, 54 \n\t" // x2_r * y2_r , x2_i * y2_i + "xvmaddasp 38, 47, 55 \n\t" // x3_r * y3_r , x3_i * y3_i + + "xvmaddasp 33, 44, 60 \n\t" // x0_r * y0_i , x0_i * y0_r + "xvmaddasp 35, 45, 61 \n\t" // x1_r * y1_i , x1_i * y1_r + "xvmaddasp 37, 46, 62 \n\t" // x2_r * y2_i , x2_i * y2_r + "xvmaddasp 39, 47, 63 \n\t" // x3_r * y3_i , x3_i * y3_r + + "xvaddsp 32, 32, 34 \n\t" + "xvaddsp 36, 36, 38 \n\t" + + "xvaddsp 33, 33, 35 \n\t" + "xvaddsp 37, 37, 39 \n\t" + + "xvaddsp 35, 32, 36 \n\t" + "xvaddsp 34, 33, 37 \n\t" + "xxswapd 32, 35 \n\t" + "xxswapd 33, 34 \n\t" + "xvaddsp 35, 35, 32 \n\t" + "xvaddsp 34, 34, 33 \n\t" + "xxpermdi 34, 34, 35, 2 \n\t" + "stxv 34, 0(%6) \n\t" + + "#n=%1 x=%4=%2 y=%5=%3 dot=%0=%6" + : + "=m" (*dot), + "+r" (n), // 1 + "+b" (x), // 2 + "+b" (y) // 3 + : + "m" (*x), + "m" (*y), + "b" (dot), // 6 + "wa" (mask) + : + "cr0", + "vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39", + "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47", + "vs48","vs49","vs50","vs51","vs52","vs53","vs54","vs55", + "vs56","vs57","vs58","vs59","vs60","vs61","vs62","vs63" + ); +} From b60de4447a5dd279af79276ae42dd7fa9ae8578f Mon Sep 17 00:00:00 2001 From: xoviat Date: Tue, 19 Jan 2021 08:57:44 -0600 Subject: [PATCH 055/134] add cortex-m platform --- .gitignore | 1 + README.md | 3 ++- cmake/system.cmake | 5 +++++ common.h | 13 +++++++++++-- driver/others/memory.c | 6 ++++-- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index bca79f043..8674c4536 100644 --- a/.gitignore +++ b/.gitignore @@ -91,3 +91,4 @@ benchmark/*.goto benchmark/smallscaling CMakeCache.txt CMakeFiles/* +.vscode diff --git a/README.md b/README.md index fed3936ee..174f951f4 100644 --- a/README.md +++ b/README.md @@ -212,7 +212,8 @@ Please note that it is not possible to combine support for different architectur - **Android**: Supported by the community. Please read . - **AIX**: Supported on PPC up to POWER8 - **Haiku**: Supported by the community. We don't actively test the library on this OS. -- **SunOS**: Supported by the community. We don't actively test the library on this OS: +- **SunOS**: Supported by the community. We don't actively test the library on this OS. +- **Cortex-M**: Supported by the community. Please read . ## Usage diff --git a/cmake/system.cmake b/cmake/system.cmake index 66e95c6d3..869cc62da 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -233,6 +233,11 @@ if (BINARY64) endif () endif () +if(EMBEDDED) + set(CCOMMON_OPT "${CCOMMON_OPT} -DOS_EMBEDDED") + set(CCOMMON_OPT "${CCOMMON_OPT} -mthumb -mcpu=cortex-m4 -mfloat-abi=hard -mfpu=fpv4-sp-d16") +endif() + if (NEED_PIC) if (${CMAKE_C_COMPILER} STREQUAL "IBM") set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large") diff --git a/common.h b/common.h index 2825407cb..862e0b4db 100644 --- a/common.h +++ b/common.h @@ -122,7 +122,7 @@ extern "C" { #define ATOM GOTO_ATOM #undef GOTO_ATOM #endif -#else +#elif !defined(OS_EMBEDDED) #include #ifndef NO_SYSV_IPC #include @@ -134,6 +134,9 @@ extern "C" { #if defined(SMP) || defined(USE_LOCKING) #include #endif +#else +#include +#include #endif #if defined(OS_SUNOS) @@ -488,10 +491,12 @@ static inline unsigned long long rpcc(void){ struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec; -#else +#elif !defined(OS_EMBEDDED) struct timeval tv; gettimeofday(&tv,NULL); return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000; +#else + return 0; #endif } #define RPCC_DEFINED @@ -521,6 +526,10 @@ static void __inline blas_lock(volatile BLASULONG *address){ #include "common_linux.h" #endif +#ifdef OS_EMBEDDED +#define DTB_DEFAULT_ENTRIES 64 +#endif + #define MMAP_ACCESS (PROT_READ | PROT_WRITE) #ifdef __NetBSD__ diff --git a/driver/others/memory.c b/driver/others/memory.c index f0521ab2d..2fb1f1f73 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -1668,16 +1668,18 @@ void gotoblas_dummy_for_PGI(void) { #ifndef MEM_LARGE_PAGES #define MEM_LARGE_PAGES 0x20000000 #endif -#else +#elif !defined(OS_EMBEDDED) #define ALLOC_MMAP #define ALLOC_MALLOC +#else +#define ALLOC_MALLOC #endif #include #include #include -#if !defined(OS_WINDOWS) || defined(OS_CYGWIN_NT) +#if (!defined(OS_WINDOWS) || defined(OS_CYGWIN_NT)) && !defined(OS_EMBEDDED) #include #ifndef NO_SYSV_IPC #include From 63fa3c3f8f869c585d8c5aef6f580a967b64405c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 20 Jan 2021 15:41:04 +0100 Subject: [PATCH 056/134] Require gcc 11 for builtin_cpu_is(power10) fixes #3074 --- driver/others/dynamic_power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index f9feeb6e8..18f16f835 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -202,7 +202,7 @@ static gotoblas_t *get_coretype(void) { return &gotoblas_POWER10; #endif /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ -#if (!defined __GNUC__) || ( __GNUC__ >= 6) +#if (!defined __GNUC__) || ( __GNUC__ >= 11) if (__builtin_cpu_is("power10")) return &gotoblas_POWER9; #endif From 0b9e4d127881d1efcd10ac64bf5bc2b3af9666f7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 20 Jan 2021 18:30:05 +0100 Subject: [PATCH 057/134] Add gcc10/arm64 DYNAMIC_ARCH build --- .drone.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.drone.yml b/.drone.yml index b1c211d14..1bdeb8cdf 100644 --- a/.drone.yml +++ b/.drone.yml @@ -190,3 +190,25 @@ steps: - make -C ctest $COMMON_FLAGS - make -C utest $COMMON_FLAGS - make -C cpp_thread_test dgemm_tester +--- +kind: pipeline +name: arm64_gcc10 + +platform: + os: linux + arch: arm64 + +steps: +- name: Build and Test + image: ubuntu:20.04 + environment: + CC: gcc-10 + COMMON_FLAGS: 'TARGET=ARMV8 DYNAMIC_ARCH=1' + commands: + - echo "MAKE_FLAGS:= $COMMON_FLAGS" + - apt-get update -y + - apt-get install -y make $CC gfortran-10 perl python g++ + - $CC --version + - make QUIET_MAKE=1 $COMMON_FLAGS + - make -C test $COMMON_FLAGS + From 6178974cd9dfe5bde1c36c05ad87834a5c1a9ce9 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 20 Jan 2021 20:21:27 +0100 Subject: [PATCH 058/134] Update .drone.yml --- .drone.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.drone.yml b/.drone.yml index 1bdeb8cdf..38ded2015 100644 --- a/.drone.yml +++ b/.drone.yml @@ -203,6 +203,7 @@ steps: image: ubuntu:20.04 environment: CC: gcc-10 + FC: gfortran-10 COMMON_FLAGS: 'TARGET=ARMV8 DYNAMIC_ARCH=1' commands: - echo "MAKE_FLAGS:= $COMMON_FLAGS" @@ -210,5 +211,6 @@ steps: - apt-get install -y make $CC gfortran-10 perl python g++ - $CC --version - make QUIET_MAKE=1 $COMMON_FLAGS + - make -C utest $COMMON_FLAGS - make -C test $COMMON_FLAGS From b94dab5250469d4d30d1a21bf0e0b78eea3cf286 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 20 Jan 2021 21:34:36 +0100 Subject: [PATCH 059/134] patch to support power10 in builtin_cpu_is was backported to gcc 10.2, so allow that as wel --- driver/others/dynamic_power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 18f16f835..b8e5840a3 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -202,7 +202,7 @@ static gotoblas_t *get_coretype(void) { return &gotoblas_POWER10; #endif /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */ -#if (!defined __GNUC__) || ( __GNUC__ >= 11) +#if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) if (__builtin_cpu_is("power10")) return &gotoblas_POWER9; #endif From 439b93f6d285fa29dba71a61df7bb8cf32fe0971 Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Thu, 21 Jan 2021 13:24:45 -0600 Subject: [PATCH 060/134] Optimize s/drot function for POWER10 This patch makes use of new POWER10 vector pair instructions for loads and stores. --- kernel/power/drot.c | 22 ++++- kernel/power/drot_microk_power10.c | 148 ++++++++++++++++++++++++++++ kernel/power/srot.c | 22 ++++- kernel/power/srot_microk_power10.c | 151 +++++++++++++++++++++++++++++ 4 files changed, 341 insertions(+), 2 deletions(-) create mode 100644 kernel/power/drot_microk_power10.c create mode 100644 kernel/power/srot_microk_power10.c diff --git a/kernel/power/drot.c b/kernel/power/drot.c index 951c2f9c9..94d9d95a3 100644 --- a/kernel/power/drot.c +++ b/kernel/power/drot.c @@ -39,9 +39,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma GCC optimize "O1" -#if defined(POWER8) || defined(POWER9) || defined(POWER10) #if defined(__VEC__) || defined(__ALTIVEC__) +#if defined(POWER8) || defined(POWER9) #include "drot_microk_power8.c" +#elif defined(POWER10) +#include "drot_microk_power10.c" #endif #endif @@ -115,12 +117,30 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT if ( (inc_x == 1) && (inc_y == 1) ) { +#if defined(POWER10) + if ( n >= 16 ) + { + BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3; + for (i = 0; i < align; i++) { + temp = c*x[i] + s*y[i] ; + y[i] = c*y[i] - s*x[i] ; + x[i] = temp ; + } + } + BLASLONG n1 = (n-i) & -16; + if ( n1 > 0 ) + { + drot_kernel_16(n1,&x[i], &y[i], c, s); + i+=n1; + } +#else BLASLONG n1 = n & -16; if ( n1 > 0 ) { drot_kernel_16(n1, x1, y1, c, s); i=n1; } +#endif while(i < n) { diff --git a/kernel/power/drot_microk_power10.c b/kernel/power/drot_microk_power10.c new file mode 100644 index 000000000..e34e745c7 --- /dev/null +++ b/kernel/power/drot_microk_power10.c @@ -0,0 +1,148 @@ +/*************************************************************************** +Copyright (c) 2021, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#define HAVE_KERNEL_16 1 + +static void drot_kernel_16 (long n, double *x, double *y, double c, double s) +{ + __asm__ + ( + XXSPLTD_S(36,%x5,0) // load c to both dwords + XXSPLTD_S(37,%x6,0) // load s to both dwords + "lxvp 32, 0(%3) \n\t" // load x + "lxvp 34, 32(%3) \n\t" + "lxvp 48, 0(%4) \n\t" // load y + "lxvp 50, 32(%4) \n\t" + + "addic. %2, %2, -8 \n\t" + "ble two%= \n\t" + + ".align 5 \n" + "one%=: \n\t" + + "xvmuldp 40, 32, 36 \n\t" // c * x + "xvmuldp 41, 33, 36 \n\t" + "xvmuldp 42, 34, 36 \n\t" + "xvmuldp 43, 35, 36 \n\t" + + "xvmuldp 44, 32, 37 \n\t" // s * x + "xvmuldp 45, 33, 37 \n\t" + "xvmuldp 46, 34, 37 \n\t" + "xvmuldp 47, 35, 37 \n\t" + + "lxvp 32, 64(%3) \n\t" // load x + "lxvp 34, 96(%3) \n\t" + "xvmuldp 52, 48, 36 \n\t" // c * y + "xvmuldp 53, 49, 36 \n\t" + "xvmuldp 54, 50, 36 \n\t" + "xvmuldp 55, 51, 36 \n\t" + + "xvmuldp 38, 48, 37 \n\t" // s * y + "xvmuldp 39, 49, 37 \n\t" + "xvmuldp 56, 50, 37 \n\t" + "xvmuldp 57, 51, 37 \n\t" + + "lxvp 48, 64(%4) \n\t" // load y + "lxvp 50, 96(%4) \n\t" + + "xvadddp 40, 40, 38 \n\t" // c * x + s * y + "xvadddp 41, 41, 39 \n\t" // c * x + s * y + "xvadddp 42, 42, 56 \n\t" // c * x + s * y + "xvadddp 43, 43, 57 \n\t" // c * x + s * y + + "stxvp 40, 0(%3) \n\t" // store x + "stxvp 42, 32(%3) \n\t" + + "xvsubdp 52, 52, 44 \n\t" // c * y - s * x + "xvsubdp 53, 53, 45 \n\t" // c * y - s * x + "xvsubdp 54, 54, 46 \n\t" // c * y - s * x + "xvsubdp 55, 55, 47 \n\t" // c * y - s * x + + "stxvp 52, 0(%4) \n\t" // store y + "stxvp 54, 32(%4) \n\t" + + "addi %3, %3, 64 \n\t" + "addi %4, %4, 64 \n\t" + + "addic. %2, %2, -8 \n\t" + "bgt one%= \n" + + "two%=: \n\t" + + "xvmuldp 40, 32, 36 \n\t" // c * x + "xvmuldp 41, 33, 36 \n\t" + "xvmuldp 42, 34, 36 \n\t" + "xvmuldp 43, 35, 36 \n\t" + + "xvmuldp 52, 48, 36 \n\t" // c * y + "xvmuldp 53, 49, 36 \n\t" + "xvmuldp 54, 50, 36 \n\t" + "xvmuldp 55, 51, 36 \n\t" + + "xvmuldp 44, 32, 37 \n\t" // s * x + "xvmuldp 45, 33, 37 \n\t" + "xvmuldp 46, 34, 37 \n\t" + "xvmuldp 47, 35, 37 \n\t" + + "xvmuldp 38, 48, 37 \n\t" // s * y + "xvmuldp 39, 49, 37 \n\t" + "xvmuldp 56, 50, 37 \n\t" + "xvmuldp 57, 51, 37 \n\t" + + "xvadddp 40, 40, 38 \n\t" // c * x + s * y + "xvadddp 41, 41, 39 \n\t" // c * x + s * y + "xvadddp 42, 42, 56 \n\t" // c * x + s * y + "xvadddp 43, 43, 57 \n\t" // c * x + s * y + + "stxvp 40, 0(%3) \n\t" // store x + "stxvp 42, 32(%3) \n\t" + "xvsubdp 52, 52, 44 \n\t" // c * y - s * x + "xvsubdp 53, 53, 45 \n\t" // c * y - s * x + "xvsubdp 54, 54, 46 \n\t" // c * y - s * x + "xvsubdp 55, 55, 47 \n\t" // c * y - s * x + + "stxvp 52, 0(%4) \n\t" // store y + "stxvp 54, 32(%4) \n\t" + + "#n=%2 x=%0=%3 y=%1=%4 c=%5 s=%6\n" + : + "+m" (*x), + "+m" (*y), + "+r" (n), // 2 + "+b" (x), // 3 + "+b" (y) // 4 + : + "d" (c), // 5 + "d" (s) // 6 + : + "cr0", + "vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39", + "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47", + "vs48","vs49","vs50","vs51","vs52","vs53","vs54","vs55", + "vs56","vs57" + ); +} diff --git a/kernel/power/srot.c b/kernel/power/srot.c index a53342f61..3e4f93e2a 100644 --- a/kernel/power/srot.c +++ b/kernel/power/srot.c @@ -39,9 +39,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma GCC optimize "O1" -#if defined(POWER8) || defined(POWER9) || defined(POWER10) #if defined(__VEC__) || defined(__ALTIVEC__) +#if defined(POWER8) || defined(POWER9) #include "srot_microk_power8.c" +#elif defined(POWER10) +#include "srot_microk_power10.c" #endif #endif @@ -115,6 +117,23 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT if ( (inc_x == 1) && (inc_y == 1) ) { +#if defined(POWER10) + if ( n >= 16 ) + { + BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7; + for (i = 0; i < align; i++) { + temp = c*x[i] + s*y[i] ; + y[i] = c*y[i] - s*x[i] ; + x[i] = temp ; + } + } + BLASLONG n1 = (n-i) & -16; + if ( n1 > 0 ) + { + srot_kernel_16(n1, &x1[i], &y1[i], c, s); + i+=n1; + } +#else BLASLONG n1 = n & -16; if ( n1 > 0 ) { @@ -122,6 +141,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT i=n1; } +#endif while(i < n) { temp = c*x[i] + s*y[i] ; diff --git a/kernel/power/srot_microk_power10.c b/kernel/power/srot_microk_power10.c new file mode 100644 index 000000000..c54c30742 --- /dev/null +++ b/kernel/power/srot_microk_power10.c @@ -0,0 +1,151 @@ +/*************************************************************************** +Copyright (c) 2021, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#define HAVE_KERNEL_16 1 + +static void srot_kernel_16 (long n, float *x, float *y, float c, float s) +{ + __asm__ + ( + "xscvdpspn 36, %x5 \n\t" // load c to all words + "xxspltw 36, 36, 0 \n\t" + + "xscvdpspn 37, %x6 \n\t" // load s to all words + "xxspltw 37, 37, 0 \n\t" + "lxvp 32, 0(%3) \n\t" // load x + "lxvp 34, 32(%3) \n\t" + "lxvp 48, 0(%4) \n\t" // load y + "lxvp 50, 32(%4) \n\t" + + "addic. %2, %2, -16 \n\t" + "ble two%= \n\t" + + ".align 5 \n" + "one%=: \n\t" + + "xvmulsp 40, 32, 36 \n\t" // c * x + "xvmulsp 41, 33, 36 \n\t" + "xvmulsp 42, 34, 36 \n\t" + "xvmulsp 43, 35, 36 \n\t" + + "xvmulsp 44, 32, 37 \n\t" // s * x + "xvmulsp 45, 33, 37 \n\t" + "xvmulsp 46, 34, 37 \n\t" + "xvmulsp 47, 35, 37 \n\t" + + "lxvp 32, 64(%3) \n\t" // load x + "lxvp 34, 96(%3) \n\t" + "xvmulsp 52, 48, 36 \n\t" // c * y + "xvmulsp 53, 49, 36 \n\t" + "xvmulsp 54, 50, 36 \n\t" + "xvmulsp 55, 51, 36 \n\t" + + "xvmulsp 38, 48, 37 \n\t" // s * y + "xvmulsp 39, 49, 37 \n\t" + "xvmulsp 56, 50, 37 \n\t" + "xvmulsp 57, 51, 37 \n\t" + + "lxvp 48, 64(%4) \n\t" // load y + "lxvp 50, 96(%4) \n\t" + + "xvaddsp 40, 40, 38 \n\t" // c * x + s * y + "xvaddsp 41, 41, 39 \n\t" // c * x + s * y + "xvaddsp 42, 42, 56 \n\t" // c * x + s * y + "xvaddsp 43, 43, 57 \n\t" // c * x + s * y + + "stxvp 40, 0(%3) \n\t" // store x + "stxvp 42, 32(%3) \n\t" + + "xvsubsp 52, 52, 44 \n\t" // c * y - s * x + "xvsubsp 53, 53, 45 \n\t" // c * y - s * x + "xvsubsp 54, 54, 46 \n\t" // c * y - s * x + "xvsubsp 55, 55, 47 \n\t" // c * y - s * x + + "stxvp 52, 0(%4) \n\t" // store y + "stxvp 54, 32(%4) \n\t" + + "addi %3, %3, 64 \n\t" + "addi %4, %4, 64 \n\t" + + "addic. %2, %2, -16 \n\t" + "bgt one%= \n" + + "two%=: \n\t" + + "xvmulsp 40, 32, 36 \n\t" // c * x + "xvmulsp 41, 33, 36 \n\t" + "xvmulsp 42, 34, 36 \n\t" + "xvmulsp 43, 35, 36 \n\t" + + "xvmulsp 52, 48, 36 \n\t" // c * y + "xvmulsp 53, 49, 36 \n\t" + "xvmulsp 54, 50, 36 \n\t" + "xvmulsp 55, 51, 36 \n\t" + + "xvmulsp 44, 32, 37 \n\t" // s * x + "xvmulsp 45, 33, 37 \n\t" + "xvmulsp 46, 34, 37 \n\t" + "xvmulsp 47, 35, 37 \n\t" + + "xvmulsp 38, 48, 37 \n\t" // s * y + "xvmulsp 39, 49, 37 \n\t" + "xvmulsp 56, 50, 37 \n\t" + "xvmulsp 57, 51, 37 \n\t" + + "xvaddsp 40, 40, 38 \n\t" // c * x + s * y + "xvaddsp 41, 41, 39 \n\t" // c * x + s * y + "xvaddsp 42, 42, 56 \n\t" // c * x + s * y + "xvaddsp 43, 43, 57 \n\t" // c * x + s * y + + "stxvp 40, 0(%3) \n\t" // store x + "stxvp 42, 32(%3) \n\t" + "xvsubsp 52, 52, 44 \n\t" // c * y - s * x + "xvsubsp 53, 53, 45 \n\t" // c * y - s * x + "xvsubsp 54, 54, 46 \n\t" // c * y - s * x + "xvsubsp 55, 55, 47 \n\t" // c * y - s * x + + "stxvp 52, 0(%4) \n\t" // store y + "stxvp 54, 32(%4) \n\t" + + "#n=%2 x=%0=%3 y=%1=%4 c=%5 s=%6\n" + : + "+m" (*x), + "+m" (*y), + "+r" (n), // 2 + "+b" (x), // 3 + "+b" (y) // 4 + : + "f" (c), // 5 + "f" (s) // 6 + : + "cr0", + "vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39", + "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47", + "vs48","vs49","vs50","vs51","vs52","vs53","vs54","vs55", + "vs56","vs57" + ); +} From 2e8d6e869030843fec421831ed6388d84915c7c7 Mon Sep 17 00:00:00 2001 From: xoviat Date: Sat, 23 Jan 2021 22:12:17 -0600 Subject: [PATCH 061/134] add functions for embedded --- driver/others/memory.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/driver/others/memory.c b/driver/others/memory.c index 2fb1f1f73..b430fd5c1 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -1673,6 +1673,11 @@ void gotoblas_dummy_for_PGI(void) { #define ALLOC_MALLOC #else #define ALLOC_MALLOC + +inline int puts(const char *str) { return 0; } +inline int printf(const char *format, ...) { return 0; } +inline char *getenv(const char *name) { return ""; } +inline int atoi(const char *str) { return 0; } #endif #include From 3ede843d509a95b0d63f58484ab8977cb2ddc39f Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Sun, 24 Jan 2021 07:48:28 -0600 Subject: [PATCH 062/134] Optimize s/dscal function for POWER10 This patch makes use of new POWER10 vector pair instructions for loads and stores. --- kernel/power/dscal.c | 36 +++++++- kernel/power/dscal_microk_power10.c | 134 +++++++++++++++++++++++++++ kernel/power/sscal.c | 36 +++++++- kernel/power/sscal_microk_power10.c | 135 ++++++++++++++++++++++++++++ 4 files changed, 339 insertions(+), 2 deletions(-) create mode 100644 kernel/power/dscal_microk_power10.c create mode 100644 kernel/power/sscal_microk_power10.c diff --git a/kernel/power/dscal.c b/kernel/power/dscal.c index 39293252b..96c4e51bc 100644 --- a/kernel/power/dscal.c +++ b/kernel/power/dscal.c @@ -35,9 +35,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined(POWER8) || defined(POWER9) || defined(POWER10) #if defined(__VEC__) || defined(__ALTIVEC__) +#if defined(POWER8) || defined(POWER9) #include "dscal_microk_power8.c" +#elif defined(POWER10) +#include "dscal_microk_power10.c" #endif #endif @@ -100,12 +102,28 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS if ( da == 0.0 ) { +#if defined(POWER10) + if ( n >= 16 ) + { + BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3; + for (j = 0; j < align; j++) { + x[j] = 0.0; + } + } + BLASLONG n1 = (n-j) & -16; + if ( n1 > 0 ) + { + dscal_kernel_8_zero(n1, &x[j]); + j+=n1; + } +#else BLASLONG n1 = n & -16; if ( n1 > 0 ) { dscal_kernel_8_zero(n1, x); j=n1; } +#endif while(j < n) { @@ -118,12 +136,28 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS else { +#if defined(POWER10) + if ( n >= 16 ) + { + BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3; + for (j = 0; j < align; j++) { + x[j] = da * x[j]; + } + } + BLASLONG n1 = (n-j) & -16; + if ( n1 > 0 ) + { + dscal_kernel_8(n1, &x[j], da); + j+=n1; + } +#else BLASLONG n1 = n & -16; if ( n1 > 0 ) { dscal_kernel_8(n1, x, da); j=n1; } +#endif while(j < n) { diff --git a/kernel/power/dscal_microk_power10.c b/kernel/power/dscal_microk_power10.c new file mode 100644 index 000000000..d0d506f24 --- /dev/null +++ b/kernel/power/dscal_microk_power10.c @@ -0,0 +1,134 @@ +/*************************************************************************** +Copyright (c) 2021, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#define HAVE_KERNEL_8 1 + +static void dscal_kernel_8 (long n, double *x, double alpha) +{ + __asm__ + ( + "dcbt 0, %2 \n\t" + + XXSPLTD_S(48,%x3,0) + + "lxvp 32, 0(%2) \n\t" + "lxvp 34, 32(%2) \n\t" + "lxvp 36, 64(%2) \n\t" + "lxvp 38, 96(%2) \n\t" + + "addic. %1, %1, -16 \n\t" + "ble two%= \n\t" + + ".align 5 \n" + "one%=: \n\t" + + "xvmuldp 40, 32, 48 \n\t" + "xvmuldp 41, 33, 48 \n\t" + "xvmuldp 42, 34, 48 \n\t" + "xvmuldp 43, 35, 48 \n\t" + "lxvp 32, 128(%2) \n\t" + "lxvp 34, 160(%2) \n\t" + "xvmuldp 44, 36, 48 \n\t" + "xvmuldp 45, 37, 48 \n\t" + "xvmuldp 46, 38, 48 \n\t" + "xvmuldp 47, 39, 48 \n\t" + "lxvp 36, 192(%2) \n\t" + "lxvp 38, 224(%2) \n\t" + + "stxvp 40, 0(%2) \n\t" + "stxvp 42, 32(%2) \n\t" + "stxvp 44, 64(%2) \n\t" + "stxvp 46, 96(%2) \n\t" + + "addi %2, %2, 128 \n\t" + + "addic. %1, %1, -16 \n\t" + "bgt one%= \n" + + "two%=: \n\t" + + "xvmuldp 40, 32, 48 \n\t" + "xvmuldp 41, 33, 48 \n\t" + "xvmuldp 42, 34, 48 \n\t" + "xvmuldp 43, 35, 48 \n\t" + + "xvmuldp 44, 36, 48 \n\t" + "xvmuldp 45, 37, 48 \n\t" + "xvmuldp 46, 38, 48 \n\t" + "xvmuldp 47, 39, 48 \n\t" + + "stxvp 40, 0(%2) \n\t" + "stxvp 42, 32(%2) \n\t" + "stxvp 44, 64(%2) \n\t" + "stxvp 46, 96(%2) \n\t" + + "#n=%1 alpha=%3 x=%0=%2" + : + "+m" (*x), + "+r" (n), // 1 + "+b" (x) // 2 + : + "d" (alpha) // 3 + : + "cr0", + "vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39", + "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47","vs48" + ); +} + + +static void dscal_kernel_8_zero (long n, double *x) +{ + + __asm__ + ( + "xxlxor 32, 32, 32 \n\t" + "xxlxor 33, 33, 33 \n\t" + + ".align 5 \n" + "one%=: \n\t" + + "stxvp 32, 0(%2) \n\t" + "stxvp 32, 32(%2) \n\t" + "stxvp 32, 64(%2) \n\t" + "stxvp 32, 96(%2) \n\t" + + "addi %2, %2, 128 \n\t" + + "addic. %1, %1, -16 \n\t" + "bgt one%= \n" + + "#n=%1 x=%0=%2 " + : + "=m" (*x), + "+r" (n), // 1 + "+b" (x) // 2 + : + : + "cr0","vs32","vs33" + ); +} diff --git a/kernel/power/sscal.c b/kernel/power/sscal.c index de37e10a5..65572a8c1 100644 --- a/kernel/power/sscal.c +++ b/kernel/power/sscal.c @@ -35,9 +35,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined(POWER8) || defined(POWER9) || defined(POWER10) #if defined(__VEC__) || defined(__ALTIVEC__) +#if defined(POWER8) || defined(POWER9) #include "sscal_microk_power8.c" +#elif defined(POWER10) +#include "sscal_microk_power10.c" #endif #endif @@ -102,12 +104,28 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS if ( da == 0.0 ) { +#if defined(POWER10) + if ( n >= 32 ) + { + BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7; + for (j = 0; j < align; j++) { + x[j] = 0.0; + } + } + BLASLONG n1 = (n-j) & -32; + if ( n1 > 0 ) + { + sscal_kernel_16_zero(n1, &x[j]); + j+=n1; + } +#else BLASLONG n1 = n & -32; if ( n1 > 0 ) { sscal_kernel_16_zero(n1, x); j=n1; } +#endif while(j < n) { @@ -120,12 +138,28 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS else { +#if defined(POWER10) + if ( n >= 32 ) + { + BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7; + for (j = 0; j < align; j++) { + x[j] = da * x[j]; + } + } + BLASLONG n1 = (n-j) & -32; + if ( n1 > 0 ) + { + sscal_kernel_16(n1, &x[j], da); + j+=n1; + } +#else BLASLONG n1 = n & -32; if ( n1 > 0 ) { sscal_kernel_16(n1, x, da); j=n1; } +#endif while(j < n) { diff --git a/kernel/power/sscal_microk_power10.c b/kernel/power/sscal_microk_power10.c new file mode 100644 index 000000000..a523a1675 --- /dev/null +++ b/kernel/power/sscal_microk_power10.c @@ -0,0 +1,135 @@ +/*************************************************************************** +Copyright (c) 2021, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#define HAVE_KERNEL_16 1 + +static void sscal_kernel_16 (long n, float *x, float alpha) +{ + __asm__ + ( + "dcbt 0, %2 \n\t" + + "xscvdpspn 48, %x3 \n\t" + "xxspltw 48, 48, 0 \n\t" + + "lxvp 32, 0(%2) \n\t" + "lxvp 34, 32(%2) \n\t" + "lxvp 36, 64(%2) \n\t" + "lxvp 38, 96(%2) \n\t" + + "addic. %1, %1, -32 \n\t" + "ble two%= \n\t" + + ".align 5 \n" + "one%=: \n\t" + + "xvmulsp 40, 32, 48 \n\t" + "xvmulsp 41, 33, 48 \n\t" + "xvmulsp 42, 34, 48 \n\t" + "xvmulsp 43, 35, 48 \n\t" + "lxvp 32, 128(%2) \n\t" + "lxvp 34, 160(%2) \n\t" + "xvmulsp 44, 36, 48 \n\t" + "xvmulsp 45, 37, 48 \n\t" + "xvmulsp 46, 38, 48 \n\t" + "xvmulsp 47, 39, 48 \n\t" + "lxvp 36, 192(%2) \n\t" + "lxvp 38, 224(%2) \n\t" + + "stxvp 40, 0(%2) \n\t" + "stxvp 42, 32(%2) \n\t" + "stxvp 44, 64(%2) \n\t" + "stxvp 46, 96(%2) \n\t" + + "addi %2, %2, 128 \n\t" + + "addic. %1, %1, -32 \n\t" + "bgt one%= \n" + + "two%=: \n\t" + + "xvmulsp 40, 32, 48 \n\t" + "xvmulsp 41, 33, 48 \n\t" + "xvmulsp 42, 34, 48 \n\t" + "xvmulsp 43, 35, 48 \n\t" + + "xvmulsp 44, 36, 48 \n\t" + "xvmulsp 45, 37, 48 \n\t" + "xvmulsp 46, 38, 48 \n\t" + "xvmulsp 47, 39, 48 \n\t" + + "stxvp 40, 0(%2) \n\t" + "stxvp 42, 32(%2) \n\t" + "stxvp 44, 64(%2) \n\t" + "stxvp 46, 96(%2) \n\t" + + "#n=%1 alpha=%3 x=%0=%2" + : + "+m" (*x), + "+r" (n), // 1 + "+b" (x) // 2 + : + "f" (alpha) // 3 + : + "cr0", + "vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39", + "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47","vs48" + ); +} + + +static void sscal_kernel_16_zero (long n, float *x) +{ + + __asm__ + ( + "xxlxor 32, 32, 32 \n\t" + "xxlxor 33, 33, 33 \n\t" + + ".align 5 \n" + "one%=: \n\t" + + "stxvp 32, 0(%2) \n\t" + "stxvp 32, 32(%2) \n\t" + "stxvp 32, 64(%2) \n\t" + "stxvp 32, 96(%2) \n\t" + + "addi %2, %2, 128 \n\t" + + "addic. %1, %1, -32 \n\t" + "bgt one%= \n" + + "#n=%1 x=%0=%2 " + : + "=m" (*x), + "+r" (n), // 1 + "+b" (x) // 2 + : + : + "cr0","vs32","vs33" + ); +} From 9b2d69aa80b72f9958860a5e8bcadb89f0e81045 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 24 Jan 2021 23:18:01 +0100 Subject: [PATCH 063/134] Add DYNAMIC_LIST option for ARM64 --- Makefile.system | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile.system b/Makefile.system index abc2c3dc5..848c38797 100644 --- a/Makefile.system +++ b/Makefile.system @@ -625,6 +625,11 @@ DYNAMIC_CORE += THUNDERX2T99 DYNAMIC_CORE += TSV110 DYNAMIC_CORE += EMAG8180 DYNAMIC_CORE += THUNDERX3T110 +ifdef DYNAMIC_LIST +override DYNAMIC_CORE = ARMV8 $(DYNAMIC_LIST) +XCCOMMON_OPT = -DDYNAMIC_LIST -DDYN_ARMV8 +XCCOMMON_OPT += $(foreach dcore,$(DYNAMIC_LIST),-DDYN_$(dcore)) +endif endif ifeq ($(ARCH), mips64) From deb2e66bcce70c64b1e1d82612b24191563dedb5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 24 Jan 2021 23:18:52 +0100 Subject: [PATCH 064/134] Add DYNAMIC_LIST support for ARM64 --- driver/others/dynamic_arm64.c | 80 ++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 2 deletions(-) diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index 37c0694b6..a86a95890 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -41,8 +41,75 @@ #include #include #endif +#ifdef OS_DARWIN +#include +int32_t value; +size_t length=sizeof(value); +#endif extern gotoblas_t gotoblas_ARMV8; +#ifdef DYNAMIC_LIST +#ifdef DYN_CORTEXA53 +extern gotoblas_t gotoblas_CORTEXA53; +#else +#define gotoblas_CORTEXA53 gotoblas_ARMV8 +#endif +#ifdef DYN_CORTEXA57 +extern gotoblas_t gotoblas_CORTEXA57; +#else +#define gotoblas_CORTEXA57 gotoblas_ARMV8 +#endif +#ifdef DYN_CORTEXA72 +extern gotoblas_t gotoblas_CORTEXA72; +#else +#define gotoblas_CORTEXA72 gotoblas_ARMV8 +#endif +#ifdef DYN_CORTEXA73 +extern gotoblas_t gotoblas_CORTEXA73; +#else +#define gotoblas_CORTEXA73 gotoblas_ARMV8 +#endif +#ifdef DYN_FALKOR +extern gotoblas_t gotoblas_FALKOR; +#else +#define gotoblas_FALKOR gotoblas_ARMV8 +#endif +#ifdef DYN_TSV110 +extern gotoblas_t gotoblas_TSV110; +#else +#define gotoblas_TSV110 gotoblas_ARMV8 +#endif +#ifdef DYN_THUNDERX +extern gotoblas_t gotoblas_THUNDERX; +#else +#define gotoblas_THUNDERX gotoblas_ARMV8 +#endif +#ifdef DYN_THUNDERX2T99 +extern gotoblas_t gotoblas_THUNDERX2T99; +#else +#define gotoblas_THUNDERX2T99 gotoblas_ARMV8 +#endif +#ifdef DYN_THUNDERX3T110 +extern gotoblas_t gotoblas_THUNDERX3T110; +#else +#define gotoblas_THUNDERX3T110 gotoblas_ARMV8 +#endif +#ifdef DYN_EMAG8180 +extern gotoblas_t gotoblas_EMAG8180; +#else +#define gotoblas_EMAG8180 gotoblas_ARMV8 +#endif +#ifdef DYN_NEOVERSEN1 +extern gotoblas_t gotoblas_NEOVERSEN1; +#else +#define gotoblas_NEOVERSEN1 gotoblas_ARMV8 +#endif +#ifdef DYN_VORTEX +extern gotoblas_t gotoblas_VORTEX; +#else +#define gotoblas_VORTEX gotoblas_ARMV8 +#endif +#else extern gotoblas_t gotoblas_CORTEXA53; extern gotoblas_t gotoblas_CORTEXA57; extern gotoblas_t gotoblas_CORTEXA72; @@ -54,10 +121,12 @@ extern gotoblas_t gotoblas_TSV110; extern gotoblas_t gotoblas_EMAG8180; extern gotoblas_t gotoblas_NEOVERSEN1; extern gotoblas_t gotoblas_THUNDERX3T110; +extern gotoblas_t gotoblas_VORTEX; +#endif extern void openblas_warning(int verbose, const char * msg); -#define NUM_CORETYPES 12 +#define NUM_CORETYPES 13 /* * In case asm/hwcap.h is outdated on the build system, make sure @@ -68,7 +137,7 @@ extern void openblas_warning(int verbose, const char * msg); #endif #define get_cpu_ftr(id, var) ({ \ - __asm__ __volatile__("mrs %0, "#id : "=r" (var)); \ + __asm__ ("mrs %0, "#id : "=r" (var)); \ }) static char *corename[] = { @@ -84,6 +153,7 @@ static char *corename[] = { "emag8180", "neoversen1", "thunderx3t110", + "vortex", "unknown" }; @@ -100,6 +170,7 @@ char *gotoblas_corename(void) { if (gotoblas == &gotoblas_EMAG8180) return corename[ 9]; if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10]; if (gotoblas == &gotoblas_THUNDERX3T110) return corename[11]; + if (gotoblas == &gotoblas_VORTEX) return corename[12]; return corename[NUM_CORETYPES]; } @@ -131,6 +202,7 @@ static gotoblas_t *force_coretype(char *coretype) { case 9: return (&gotoblas_EMAG8180); case 10: return (&gotoblas_NEOVERSEN1); case 11: return (&gotoblas_THUNDERX3T110); + case 12: return (&gotoblas_VORTEX); } snprintf(message, 128, "Core not found: %s\n", coretype); openblas_warning(1, message); @@ -142,6 +214,10 @@ static gotoblas_t *get_coretype(void) { char coremsg[128]; #if (!defined OS_LINUX && !defined OS_ANDROID) +#ifdef DARWIN + sysctlbyname("hw.cpufamily",&value,&length,NULL,0); + if (value ==131287967) return CPU_VORTEX; +#endif return NULL; #else From 113840da12828418dedeb1392d55e45ae6a2a674 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Sun, 24 Jan 2021 22:20:44 -0700 Subject: [PATCH 065/134] Fix null pointer check in blas_memory_alloc --- driver/others/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index f0521ab2d..91d21a88e 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -1241,7 +1241,7 @@ UNLOCK_COMMAND(&alloc_lock); func = &memoryalloc[0]; - while ((func != NULL) && (map_address == (void *) -1)) { + while ((*func != NULL) && (map_address == (void *) -1)) { map_address = (*func)((void *)base_address); From cb61d3b46bb65787bff8452cd384e047c2f5687d Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 25 Jan 2021 13:13:20 +0100 Subject: [PATCH 066/134] Add DYNAMIC_LIST support for ARM64 --- cmake/arch.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 5457bfb07..4451f9eaa 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -45,6 +45,9 @@ endif () if (DYNAMIC_ARCH) if (ARM64) set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110) + if (DYNAMIC_LIST) + set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST}) + endif () endif () if (POWER) From 0cb9e9fc8d5b56eb0db42136dd8268671438ad27 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 25 Jan 2021 19:02:21 +0100 Subject: [PATCH 067/134] Remove the VORTEX support bits again for now --- driver/others/dynamic_arm64.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index a86a95890..6c68ba98a 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -41,11 +41,6 @@ #include #include #endif -#ifdef OS_DARWIN -#include -int32_t value; -size_t length=sizeof(value); -#endif extern gotoblas_t gotoblas_ARMV8; #ifdef DYNAMIC_LIST @@ -104,11 +99,6 @@ extern gotoblas_t gotoblas_NEOVERSEN1; #else #define gotoblas_NEOVERSEN1 gotoblas_ARMV8 #endif -#ifdef DYN_VORTEX -extern gotoblas_t gotoblas_VORTEX; -#else -#define gotoblas_VORTEX gotoblas_ARMV8 -#endif #else extern gotoblas_t gotoblas_CORTEXA53; extern gotoblas_t gotoblas_CORTEXA57; @@ -121,12 +111,11 @@ extern gotoblas_t gotoblas_TSV110; extern gotoblas_t gotoblas_EMAG8180; extern gotoblas_t gotoblas_NEOVERSEN1; extern gotoblas_t gotoblas_THUNDERX3T110; -extern gotoblas_t gotoblas_VORTEX; #endif extern void openblas_warning(int verbose, const char * msg); -#define NUM_CORETYPES 13 +#define NUM_CORETYPES 12 /* * In case asm/hwcap.h is outdated on the build system, make sure @@ -153,7 +142,6 @@ static char *corename[] = { "emag8180", "neoversen1", "thunderx3t110", - "vortex", "unknown" }; @@ -170,7 +158,6 @@ char *gotoblas_corename(void) { if (gotoblas == &gotoblas_EMAG8180) return corename[ 9]; if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10]; if (gotoblas == &gotoblas_THUNDERX3T110) return corename[11]; - if (gotoblas == &gotoblas_VORTEX) return corename[12]; return corename[NUM_CORETYPES]; } @@ -202,7 +189,6 @@ static gotoblas_t *force_coretype(char *coretype) { case 9: return (&gotoblas_EMAG8180); case 10: return (&gotoblas_NEOVERSEN1); case 11: return (&gotoblas_THUNDERX3T110); - case 12: return (&gotoblas_VORTEX); } snprintf(message, 128, "Core not found: %s\n", coretype); openblas_warning(1, message); @@ -214,10 +200,6 @@ static gotoblas_t *get_coretype(void) { char coremsg[128]; #if (!defined OS_LINUX && !defined OS_ANDROID) -#ifdef DARWIN - sysctlbyname("hw.cpufamily",&value,&length,NULL,0); - if (value ==131287967) return CPU_VORTEX; -#endif return NULL; #else From 856bc365338f7559639f341d76ca8746d1628ee5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 27 Jan 2021 13:41:45 +0100 Subject: [PATCH 068/134] Add exceptional shift to fix rare convergence problems --- lapack-netlib/SRC/chgeqz.f | 10 ++++++++-- lapack-netlib/SRC/zhgeqz.f | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/lapack-netlib/SRC/chgeqz.f b/lapack-netlib/SRC/chgeqz.f index 73d35621c..1616840ec 100644 --- a/lapack-netlib/SRC/chgeqz.f +++ b/lapack-netlib/SRC/chgeqz.f @@ -743,8 +743,14 @@ * * Exceptional shift. Chosen for no particularly good reason. * - ESHIFT = ESHIFT + (ASCALE*H(ILAST,ILAST-1))/ - $ (BSCALE*T(ILAST-1,ILAST-1)) + IF( ( IITER / 20 )*20.EQ.IITER .AND. + $ BSCALE*ABS1(T( ILAST, ILAST )).GT.SAFMIN ) THEN + ESHIFT = ESHIFT + ( ASCALE*H( ILAST, + $ ILAST ) )/( BSCALE*T( ILAST, ILAST ) ) + ELSE + ESHIFT = ESHIFT + ( ASCALE*H( ILAST, + $ ILAST-1 ) )/( BSCALE*T( ILAST-1, ILAST-1 ) ) + END IF SHIFT = ESHIFT END IF * diff --git a/lapack-netlib/SRC/zhgeqz.f b/lapack-netlib/SRC/zhgeqz.f index b51cba4f7..b21199e9e 100644 --- a/lapack-netlib/SRC/zhgeqz.f +++ b/lapack-netlib/SRC/zhgeqz.f @@ -744,8 +744,14 @@ * * Exceptional shift. Chosen for no particularly good reason. * - ESHIFT = ESHIFT + (ASCALE*H(ILAST,ILAST-1))/ - $ (BSCALE*T(ILAST-1,ILAST-1)) + IF( ( IITER / 20 )*20.EQ.IITER .AND. + $ BSCALE*ABS1(T( ILAST, ILAST )).GT.SAFMIN ) THEN + ESHIFT = ESHIFT + ( ASCALE*H( ILAST, + $ ILAST ) )/( BSCALE*T( ILAST, ILAST ) ) + ELSE + ESHIFT = ESHIFT + ( ASCALE*H( ILAST, + $ ILAST-1 ) )/( BSCALE*T( ILAST-1, ILAST-1 ) ) + END IF SHIFT = ESHIFT END IF * From 3165c915b6a0cd8f5104cd012d3189bdef206d63 Mon Sep 17 00:00:00 2001 From: xoviat Date: Wed, 27 Jan 2021 15:24:49 -0600 Subject: [PATCH 069/134] fix test helpers --- CMakeLists.txt | 2 +- lapack-netlib/TESTING/CMakeLists.txt | 198 ++++++++++++++------------- test/CMakeLists.txt | 4 +- 3 files changed, 109 insertions(+), 95 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c5ba3ceed..9c992a08b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -229,7 +229,7 @@ if (NOT NO_CBLAS) add_subdirectory(utest) endif() -if (NOT MSVC AND NOT NOFORTRAN) +if (NOT NOFORTRAN) # Build test and ctest add_subdirectory(test) if(NOT NO_CBLAS) diff --git a/lapack-netlib/TESTING/CMakeLists.txt b/lapack-netlib/TESTING/CMakeLists.txt index 80e6b3232..b4e2223f7 100644 --- a/lapack-netlib/TESTING/CMakeLists.txt +++ b/lapack-netlib/TESTING/CMakeLists.txt @@ -174,7 +174,20 @@ if(PYTHONINTERP_FOUND) endif() - +if(WIN32) +FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.ps1 +"if (Test-Path $args[2]) { Remove-Item -Force $args[2] } \n" +"$ErrorActionPreference = \"Stop\"\n" +"Get-Content $args[1] | & \"$($args[0]).exe\" | Out-File $args[2]\n" +"If ((Get-Content $args[2] | %{$_ -match \"FATAL\"}) -contains $true) {\n" +"echo Error\n" +"exit 1\n" +"} else {\n" +"exit 0\n" +"}\n" +) +set(helper_prefix powershell -ExecutionPolicy Bypass "${CMAKE_CURRENT_BINARY_DIR}/test_helper.ps1") +else() # $1 exec, $2 input, $3 output_result FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh "rm -f $3\n" @@ -187,51 +200,52 @@ FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh "exit 0\n" "fi\n" ) - +set(helper_prefix sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh") +endif() add_test(NAME "REAL_LAPACK_linear_equation_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/stest.in" "${CMAKE_CURRENT_BINARY_DIR}/stest.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/stest.in" "${CMAKE_CURRENT_BINARY_DIR}/stest.out" ) add_test(NAME "COMPLEX_LAPACK_linear_equation_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ctest.in" "${CMAKE_CURRENT_BINARY_DIR}/ctest.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ctest.in" "${CMAKE_CURRENT_BINARY_DIR}/ctest.out" ) add_test(NAME "DOUBLE_PRECISION_LAPACK_linear_equation_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN//xlintstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dtest.in" "${CMAKE_CURRENT_BINARY_DIR}/dtest.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/LIN//xlintstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dtest.in" "${CMAKE_CURRENT_BINARY_DIR}/dtest.out" ) add_test(NAME "COMPLEX16_LAPACK_linear_equation_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN//xlintstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ztest.in" "${CMAKE_CURRENT_BINARY_DIR}/ztest.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/LIN//xlintstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ztest.in" "${CMAKE_CURRENT_BINARY_DIR}/ztest.out" ) add_test(NAME "SINGLE-DOUBLE_PRECISION_LAPACK_prototype_linear_equation_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstds" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dstest.in" " ${CMAKE_CURRENT_BINARY_DIR}/dstest.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstds" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dstest.in" " ${CMAKE_CURRENT_BINARY_DIR}/dstest.out" ) # ======== COMPLEX-COMPLEX16 LIN TESTS ======================== add_test(NAME "Testing_COMPLEX-COMPLEX16_LAPACK_prototype_linear_equation_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstzc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zctest.in" " ${CMAKE_CURRENT_BINARY_DIR}/zctest.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstzc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zctest.in" " ${CMAKE_CURRENT_BINARY_DIR}/zctest.out" ) # ======== SINGLE RFP LIN TESTS ======================== add_test(NAME "Testing_REAL_LAPACK_RFP_prototype_linear_equation_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstrfs" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/stest_rfp.in" "${CMAKE_CURRENT_BINARY_DIR}/stest_rfp.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstrfs" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/stest_rfp.in" "${CMAKE_CURRENT_BINARY_DIR}/stest_rfp.out" ) # ======== COMPLEX16 RFP LIN TESTS ======================== add_test(NAME "Testing_DOUBLE_PRECISION_LAPACK_RFP_prototype_linear_equation_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstrfd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dtest_rfp.in" " ${CMAKE_CURRENT_BINARY_DIR}/dtest_rfp.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstrfd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dtest_rfp.in" " ${CMAKE_CURRENT_BINARY_DIR}/dtest_rfp.out" ) # ======== COMPLEX16 RFP LIN TESTS ======================== add_test(NAME "Testing_COMPLEX_LAPACK_RFP_prototype_linear_equation_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstrfc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ctest_rfp.in" " ${CMAKE_CURRENT_BINARY_DIR}/ctest_rfp.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstrfc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ctest_rfp.in" " ${CMAKE_CURRENT_BINARY_DIR}/ctest_rfp.out" ) # ======== COMPLEX16 RFP LIN TESTS ======================== add_test(NAME "Testing_COMPLEX16_LAPACK_RFP_prototype_linear_equation_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstrfz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ztest_rfp.in" " ${CMAKE_CURRENT_BINARY_DIR}/ztest_rfp.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/LIN/xlintstrfz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ztest_rfp.in" " ${CMAKE_CURRENT_BINARY_DIR}/ztest_rfp.out" ) # # @@ -239,327 +253,327 @@ add_test(NAME "Testing_COMPLEX16_LAPACK_RFP_prototype_linear_equation_routines" # add_test(NAME "SNEP:_Testing_Nonsymmetric_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/nep.in" " ${CMAKE_CURRENT_BINARY_DIR}/snep.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/nep.in" " ${CMAKE_CURRENT_BINARY_DIR}/snep.out" ) add_test(NAME "SSEP:_Testing_Symmetric_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sep.in" " ${CMAKE_CURRENT_BINARY_DIR}/ssep.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sep.in" " ${CMAKE_CURRENT_BINARY_DIR}/ssep.out" ) add_test(NAME "SSE2:_Testing_Symmetric_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/se2.in" " ${CMAKE_CURRENT_BINARY_DIR}/sse2.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/se2.in" " ${CMAKE_CURRENT_BINARY_DIR}/sse2.out" ) add_test(NAME "SSVD:_Testing_Singular_Value_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/svd.in" " ${CMAKE_CURRENT_BINARY_DIR}/ssvd.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/svd.in" " ${CMAKE_CURRENT_BINARY_DIR}/ssvd.out" ) add_test(NAME "SSEC:_Testing_REAL_Eigen_Condition_Routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sec.in" " ${CMAKE_CURRENT_BINARY_DIR}/sec.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sec.in" " ${CMAKE_CURRENT_BINARY_DIR}/sec.out" ) add_test(NAME "SSEV:_Testing_REAL_Nonsymmetric_Eigenvalue_Driver" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sed.in" " ${CMAKE_CURRENT_BINARY_DIR}/sed.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sed.in" " ${CMAKE_CURRENT_BINARY_DIR}/sed.out" ) add_test(NAME "SGG:_Testing_REAL_Nonsymmetric_Generalized_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sgg.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgg.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sgg.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgg.out" ) add_test(NAME "SGD:_Testing_REAL_Nonsymmetric_Generalized_Eigenvalue_Problem_driver_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sgd.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgd.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sgd.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgd.out" ) add_test(NAME "SSB:_Testing_REAL_Symmetric_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ssb.in" " ${CMAKE_CURRENT_BINARY_DIR}/ssb.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ssb.in" " ${CMAKE_CURRENT_BINARY_DIR}/ssb.out" ) add_test(NAME "SSG:_Testing_REAL_Symmetric_Generalized_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ssg.in" " ${CMAKE_CURRENT_BINARY_DIR}/ssg.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ssg.in" " ${CMAKE_CURRENT_BINARY_DIR}/ssg.out" ) add_test(NAME "SGEBAL:_Testing_the_balancing_of_a_REAL_general_matrix" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/sbal.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/sbal.out" ) add_test(NAME "SGEBAK:_Testing_the_back_transformation_of_a_REAL_balanced_matrix" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/sbak.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/sbak.out" ) add_test(NAME "SGGBAL:_Testing_the_balancing_of_a_pair_of_REAL_general_matrices" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sgbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgbal.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sgbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgbal.out" ) add_test(NAME "SGGBAK:_Testing_the_back_transformation_of_a_pair_of_REAL_balanced_matrices" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sgbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgbak.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sgbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgbak.out" ) add_test(NAME "SBB:_Testing_banded_Singular_Value_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sbb.in" " ${CMAKE_CURRENT_BINARY_DIR}/sbb.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sbb.in" " ${CMAKE_CURRENT_BINARY_DIR}/sbb.out" ) add_test(NAME "SGLM:_Testing_Generalized_Linear_Regression_Model_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/glm.in" " ${CMAKE_CURRENT_BINARY_DIR}/sglm.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/glm.in" " ${CMAKE_CURRENT_BINARY_DIR}/sglm.out" ) add_test(NAME "SGQR:_Testing_Generalized_QR_and_RQ_factorization_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gqr.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgqr.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gqr.in" " ${CMAKE_CURRENT_BINARY_DIR}/sgqr.out" ) add_test(NAME "SGSV:_Testing_Generalized_Singular_Value_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gsv.in" "${CMAKE_CURRENT_BINARY_DIR}/sgsv.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gsv.in" "${CMAKE_CURRENT_BINARY_DIR}/sgsv.out" ) add_test(NAME "SCSD:_Testing_CS_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csd.in" " ${CMAKE_CURRENT_BINARY_DIR}/scsd.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csd.in" " ${CMAKE_CURRENT_BINARY_DIR}/scsd.out" ) add_test(NAME "SLSE:_Testing_Constrained_Linear_Least_Squares_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/lse.in" " ${CMAKE_CURRENT_BINARY_DIR}/slse.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtsts" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/lse.in" " ${CMAKE_CURRENT_BINARY_DIR}/slse.out" ) # ======== COMPLEX EIG TESTS =========================== add_test(NAME "CNEP:_Testing_Nonsymmetric_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/nep.in" " ${CMAKE_CURRENT_BINARY_DIR}/cnep.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/nep.in" " ${CMAKE_CURRENT_BINARY_DIR}/cnep.out" ) add_test(NAME "CSEP:_Testing_Symmetric_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sep.in" " ${CMAKE_CURRENT_BINARY_DIR}/csep.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sep.in" " ${CMAKE_CURRENT_BINARY_DIR}/csep.out" ) add_test(NAME "CSE2:_Testing_Symmetric_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/se2.in" " ${CMAKE_CURRENT_BINARY_DIR}/cse2.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/se2.in" " ${CMAKE_CURRENT_BINARY_DIR}/cse2.out" ) add_test(NAME "CSVD:_Testing_Singular_Value_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/svd.in" " ${CMAKE_CURRENT_BINARY_DIR}/csvd.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/svd.in" " ${CMAKE_CURRENT_BINARY_DIR}/csvd.out" ) add_test(NAME "CEC:_Testing_COMPLEX_Eigen_Condition_Routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cec.in" " ${CMAKE_CURRENT_BINARY_DIR}/cec.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cec.in" " ${CMAKE_CURRENT_BINARY_DIR}/cec.out" ) add_test(NAME "CES:_Testing_COMPLEX_Nonsymmetric_Schur_Form_Driver" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ced.in" " ${CMAKE_CURRENT_BINARY_DIR}/ced.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ced.in" " ${CMAKE_CURRENT_BINARY_DIR}/ced.out" ) add_test(NAME "CGG:_Testing_COMPLEX_Nonsymmetric_Generalized_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cgg.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgg.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cgg.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgg.out" ) add_test(NAME "CGD:_Testing_COMPLEX_Nonsymmetric_Generalized_Eigenvalue_Problem_driver_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cgd.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgd.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cgd.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgd.out" ) add_test(NAME "CHB:_Testing_Hermitian_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csb.in" " ${CMAKE_CURRENT_BINARY_DIR}/csb.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csb.in" " ${CMAKE_CURRENT_BINARY_DIR}/csb.out" ) add_test(NAME "CSG:_Testing_Symmetric_Generalized_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csg.in" " ${CMAKE_CURRENT_BINARY_DIR}/csg.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csg.in" " ${CMAKE_CURRENT_BINARY_DIR}/csg.out" ) add_test(NAME "CGEBAL:_Testing_the_balancing_of_a_COMPLEX_general_matrix" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/cbal.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/cbal.out" ) add_test(NAME "CGEBAK:_Testing_the_back_transformation_of_a_COMPLEX_balanced_matrix" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/cbak.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/cbak.out" ) add_test(NAME "CGGBAL:_Testing_the_balancing_of_a_pair_of_COMPLEX_general_matrices" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cgbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgbal.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cgbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgbal.out" ) add_test(NAME "CGGBAK:_Testing_the_back_transformation_of_a_pair_of_COMPLEX_balanced_matrices" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cgbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgbak.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cgbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgbak.out" ) add_test(NAME "CBB:_Testing_banded_Singular_Value_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cbb.in" " ${CMAKE_CURRENT_BINARY_DIR}/cbb.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/cbb.in" " ${CMAKE_CURRENT_BINARY_DIR}/cbb.out" ) add_test(NAME "CGLM:_Testing_Generalized_Linear_Regression_Model_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/glm.in" " ${CMAKE_CURRENT_BINARY_DIR}/cglm.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/glm.in" " ${CMAKE_CURRENT_BINARY_DIR}/cglm.out" ) add_test(NAME "CGQR:_Testing_Generalized_QR_and_RQ_factorization_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gqr.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgqr.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gqr.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgqr.out" ) add_test(NAME "CGSV:_Testing_Generalized_Singular_Value_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gsv.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgsv.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gsv.in" " ${CMAKE_CURRENT_BINARY_DIR}/cgsv.out" ) add_test(NAME "CCSD:_Testing_CS_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csd.in" " ${CMAKE_CURRENT_BINARY_DIR}/ccsd.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csd.in" " ${CMAKE_CURRENT_BINARY_DIR}/ccsd.out" ) add_test(NAME "CLSE:_Testing_Constrained_Linear_Least_Squares_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/lse.in" " ${CMAKE_CURRENT_BINARY_DIR}/clse.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstc" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/lse.in" " ${CMAKE_CURRENT_BINARY_DIR}/clse.out" ) # ======== DOUBLE EIG TESTS =========================== add_test(NAME "DNEP:_Testing_Nonsymmetric_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/nep.in" " ${CMAKE_CURRENT_BINARY_DIR}/dnep.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/nep.in" " ${CMAKE_CURRENT_BINARY_DIR}/dnep.out" ) add_test(NAME "DSEP:_Testing_Symmetric_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sep.in" " ${CMAKE_CURRENT_BINARY_DIR}/dsep.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sep.in" " ${CMAKE_CURRENT_BINARY_DIR}/dsep.out" ) add_test(NAME "DSE2:_Testing_Symmetric_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/se2.in" " ${CMAKE_CURRENT_BINARY_DIR}/dse2.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/se2.in" " ${CMAKE_CURRENT_BINARY_DIR}/dse2.out" ) add_test(NAME "DSVD:_Testing_Singular_Value_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/svd.in" " ${CMAKE_CURRENT_BINARY_DIR}/dsvd.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/svd.in" " ${CMAKE_CURRENT_BINARY_DIR}/dsvd.out" ) add_test(NAME "DEC:_Testing_DOUBLE_PRECISION_Eigen_Condition_Routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dec.in" " ${CMAKE_CURRENT_BINARY_DIR}/dec.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dec.in" " ${CMAKE_CURRENT_BINARY_DIR}/dec.out" ) add_test(NAME "DEV:_Testing_DOUBLE_PRECISION_Nonsymmetric_Eigenvalue_Driver" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ded.in" " ${CMAKE_CURRENT_BINARY_DIR}/ded.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/ded.in" " ${CMAKE_CURRENT_BINARY_DIR}/ded.out" ) add_test(NAME "DGG:_Testing_DOUBLE_PRECISION_Nonsymmetric_Generalized_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dgg.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgg.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dgg.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgg.out" ) add_test(NAME "DGD:_Testing_DOUBLE_PRECISION_Nonsymmetric_Generalized_Eigenvalue_Problem_driver_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dgd.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgd.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dgd.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgd.out" ) add_test(NAME "DSB:_Testing_DOUBLE_PRECISION_Symmetric_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dsb.in" " ${CMAKE_CURRENT_BINARY_DIR}/dsb.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dsb.in" " ${CMAKE_CURRENT_BINARY_DIR}/dsb.out" ) add_test(NAME "DSG:_Testing_DOUBLE_PRECISION_Symmetric_Generalized_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dsg.in" " ${CMAKE_CURRENT_BINARY_DIR}/dsg.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dsg.in" " ${CMAKE_CURRENT_BINARY_DIR}/dsg.out" ) add_test(NAME "DGEBAL:_Testing_the_balancing_of_a_DOUBLE_PRECISION_general_matrix" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/dbal.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/dbal.out" ) add_test(NAME "DGEBAK:_Testing_the_back_transformation_of_a_DOUBLE_PRECISION_balanced_matrix" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/dbak.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/dbak.out" ) add_test(NAME "DGGBAL:_Testing_the_balancing_of_a_pair_of_DOUBLE_PRECISION_general_matrices" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dgbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgbal.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dgbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgbal.out" ) add_test(NAME "DGGBAK:_Testing_the_back_transformation_of_a_pair_of_DOUBLE_PRECISION_balanced_matrices" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dgbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgbak.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dgbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgbak.out" ) add_test(NAME "DBB:_Testing_banded_Singular_Value_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dbb.in" " ${CMAKE_CURRENT_BINARY_DIR}/dbb.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/dbb.in" " ${CMAKE_CURRENT_BINARY_DIR}/dbb.out" ) add_test(NAME "DGLM:_Testing_Generalized_Linear_Regression_Model_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/glm.in" " ${CMAKE_CURRENT_BINARY_DIR}/dglm.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/glm.in" " ${CMAKE_CURRENT_BINARY_DIR}/dglm.out" ) add_test(NAME "DGQR:_Testing_Generalized_QR_and_RQ_factorization_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gqr.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgqr.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gqr.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgqr.out" ) add_test(NAME "DGSV:_Testing_Generalized_Singular_Value_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gsv.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgsv.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gsv.in" " ${CMAKE_CURRENT_BINARY_DIR}/dgsv.out" ) add_test(NAME "DCSD:_Testing_CS_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csd.in" " ${CMAKE_CURRENT_BINARY_DIR}/dcsd.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csd.in" " ${CMAKE_CURRENT_BINARY_DIR}/dcsd.out" ) add_test(NAME "DLSE:_Testing_Constrained_Linear_Least_Squares_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/lse.in" " ${CMAKE_CURRENT_BINARY_DIR}/dlse.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstd" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/lse.in" " ${CMAKE_CURRENT_BINARY_DIR}/dlse.out" ) # ======== COMPLEX16 EIG TESTS =========================== add_test(NAME "ZNEP:_Testing_Nonsymmetric_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/nep.in" " ${CMAKE_CURRENT_BINARY_DIR}/znep.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/nep.in" " ${CMAKE_CURRENT_BINARY_DIR}/znep.out" ) add_test(NAME "ZSEP:_Testing_Symmetric_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sep.in" " ${CMAKE_CURRENT_BINARY_DIR}/zsep.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/sep.in" " ${CMAKE_CURRENT_BINARY_DIR}/zsep.out" ) add_test(NAME "ZSE2:_Testing_Symmetric_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/se2.in" " ${CMAKE_CURRENT_BINARY_DIR}/zse2.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/se2.in" " ${CMAKE_CURRENT_BINARY_DIR}/zse2.out" ) add_test(NAME "ZSVD:_Testing_Singular_Value_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/svd.in" " ${CMAKE_CURRENT_BINARY_DIR}/zsvd.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/svd.in" " ${CMAKE_CURRENT_BINARY_DIR}/zsvd.out" ) add_test(NAME "ZEC:_Testing_COMPLEX16_Eigen_Condition_Routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zec.in" " ${CMAKE_CURRENT_BINARY_DIR}/zec.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zec.in" " ${CMAKE_CURRENT_BINARY_DIR}/zec.out" ) add_test(NAME "ZES:_Testing_COMPLEX16_Nonsymmetric_Schur_Form_Driver" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zed.in" " ${CMAKE_CURRENT_BINARY_DIR}/zed.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zed.in" " ${CMAKE_CURRENT_BINARY_DIR}/zed.out" ) add_test(NAME "ZGG:_Testing_COMPLEX16_Nonsymmetric_Generalized_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zgg.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgg.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zgg.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgg.out" ) add_test(NAME "ZGD:_Testing_COMPLEX16_Nonsymmetric_Generalized_Eigenvalue_Problem_driver_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zgd.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgd.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zgd.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgd.out" ) add_test(NAME "ZHB:_Testing_Hermitian_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zsb.in" " ${CMAKE_CURRENT_BINARY_DIR}/zsb.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zsb.in" " ${CMAKE_CURRENT_BINARY_DIR}/zsb.out" ) add_test(NAME "ZSG:_Testing_Symmetric_Generalized_Eigenvalue_Problem_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zsg.in" " ${CMAKE_CURRENT_BINARY_DIR}/zsg.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zsg.in" " ${CMAKE_CURRENT_BINARY_DIR}/zsg.out" ) add_test(NAME "ZGEBAL:_Testing_the_balancing_of_a_COMPLEX16_general_matrix" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/zbal.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/zbal.out" ) add_test(NAME "ZGEBAK:_Testing_the_back_transformation_of_a_COMPLEX16_balanced_matrix" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/zbak.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/zbak.out" ) add_test(NAME "ZGGBAL:_Testing_the_balancing_of_a_pair_of_COMPLEX_general_matrices" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zgbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgbal.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zgbal.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgbal.out" ) add_test(NAME "ZGGBAK:_Testing_the_back_transformation_of_a_pair_of_COMPLEX16_balanced_matrices" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zgbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgbak.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zgbak.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgbak.out" ) add_test(NAME "ZBB:_Testing_banded_Singular_Value_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zbb.in" " ${CMAKE_CURRENT_BINARY_DIR}/zbb.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/zbb.in" " ${CMAKE_CURRENT_BINARY_DIR}/zbb.out" ) add_test(NAME "ZGLM:_Testing_Generalized_Linear_Regression_Model_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/glm.in" " ${CMAKE_CURRENT_BINARY_DIR}/zglm.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/glm.in" " ${CMAKE_CURRENT_BINARY_DIR}/zglm.out" ) add_test(NAME "ZGQR:_Testing_Generalized_QR_and_RQ_factorization_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gqr.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgqr.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gqr.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgqr.out" ) add_test(NAME "ZGSV:_Testing_Generalized_Singular_Value_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gsv.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgsv.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/gsv.in" " ${CMAKE_CURRENT_BINARY_DIR}/zgsv.out" ) add_test(NAME "ZCSD:_Testing_CS_Decomposition_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csd.in" " ${CMAKE_CURRENT_BINARY_DIR}/zcsd.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/csd.in" " ${CMAKE_CURRENT_BINARY_DIR}/zcsd.out" ) add_test(NAME "Constrained_Linear_Least_Squares_routines" - COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/lse.in" " ${CMAKE_CURRENT_BINARY_DIR}/zlse.out" + COMMAND ${helper_prefix} "${CMAKE_CURRENT_BINARY_DIR}/EIG/xeigtstz" "${PROJECT_SOURCE_DIR}/lapack-netlib/TESTING/lse.in" " ${CMAKE_CURRENT_BINARY_DIR}/zlse.out" ) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ccd1175a3..d338242ff 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -24,10 +24,10 @@ endforeach() # $1 exec, $2 input, $3 output_result if(WIN32) FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.ps1 -"Remove-Item -Force $args[2]\n" +"if (Test-Path $args[2]) { Remove-Item -Force $args[2] } \n" "$ErrorActionPreference = \"Stop\"\n" "Get-Content $args[1] | & $args[0]\n" -"If (Get-Content $args[2] | %{$_ -match \"FATAL\"}) {\n" +"If ((Get-Content $args[2] | %{$_ -match \"FATAL\"}) -contains $true) {\n" "echo Error\n" "exit 1\n" "} else {\n" From 3dfecaaf7cd86932117870dc6764a38c4006ed85 Mon Sep 17 00:00:00 2001 From: xoviat Date: Wed, 27 Jan 2021 16:39:15 -0600 Subject: [PATCH 070/134] require nofortran to be set on msvc --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c992a08b..4f34d5337 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,9 @@ include(GNUInstallDirs) include(CMakePackageConfigHelpers) +if(MSVC AND NOT DEFINED NOFORTRAN) + set(NOFORTRAN ON) +endif() ####### if(MSVC) From 609ea8027632bbe878be8e5db08be08996062732 Mon Sep 17 00:00:00 2001 From: xoviat Date: Wed, 27 Jan 2021 16:39:52 -0600 Subject: [PATCH 071/134] enable testing --- appveyor.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 1936059d5..a18a41960 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -76,7 +76,5 @@ build_script: - cmake --build . test_script: - - echo Running Test - - cd utest - - openblas_utest + - ctest -j2 From f87842483eee9d158f44d51d4c09662c3cff7526 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 29 Jan 2021 09:56:12 +0100 Subject: [PATCH 072/134] fix calculation of non-exceptional shift (from Reference-LAPACK PR 477) --- lapack-netlib/SRC/chgeqz.f | 27 +++++++++++++++++---------- lapack-netlib/SRC/zhgeqz.f | 27 +++++++++++++++++---------- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/lapack-netlib/SRC/chgeqz.f b/lapack-netlib/SRC/chgeqz.f index 1616840ec..0d3787915 100644 --- a/lapack-netlib/SRC/chgeqz.f +++ b/lapack-netlib/SRC/chgeqz.f @@ -320,12 +320,13 @@ $ C, SAFMIN, TEMP, TEMP2, TEMPR, ULP COMPLEX ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2, $ CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1, - $ U12, X + $ U12, X, ABI12, Y * .. * .. External Functions .. + COMPLEX CLADIV LOGICAL LSAME REAL CLANHS, SLAMCH - EXTERNAL LSAME, CLANHS, SLAMCH + EXTERNAL CLADIV, LLSAME, CLANHS, SLAMCH * .. * .. External Subroutines .. EXTERNAL CLARTG, CLASET, CROT, CSCAL, XERBLA @@ -729,15 +730,21 @@ AD22 = ( ASCALE*H( ILAST, ILAST ) ) / $ ( BSCALE*T( ILAST, ILAST ) ) ABI22 = AD22 - U12*AD21 + ABI12 = AD12 - U12*AD11 * - T1 = HALF*( AD11+ABI22 ) - RTDISC = SQRT( T1**2+AD12*AD21-AD11*AD22 ) - TEMP = REAL( T1-ABI22 )*REAL( RTDISC ) + - $ AIMAG( T1-ABI22 )*AIMAG( RTDISC ) - IF( TEMP.LE.ZERO ) THEN - SHIFT = T1 + RTDISC - ELSE - SHIFT = T1 - RTDISC + SHIFT = ABI22 + CTEMP = SQRT( ABI12 )*SQRT( AD21 ) + TEMP = ABS1( CTEMP ) + IF( CTEMP.NE.ZERO ) THEN + X = HALF*( AD11-SHIFT ) + TEMP2 = ABS1( X ) + TEMP = MAX( TEMP, ABS1( X ) ) + Y = TEMP*SQRT( ( X / TEMP )**2+( CTEMP / TEMP )**2 ) + IF( TEMP2.GT.ZERO ) THEN + IF( DBLE( X / TEMP2 )*DBLE( Y )+ + $ DIMAG( X / TEMP2 )*DIMAG( Y ).LT.ZERO )Y = -Y + END IF + SHIFT = SHIFT - CTEMP*CLADIV( CTEMP, ( X+Y ) ) END IF ELSE * diff --git a/lapack-netlib/SRC/zhgeqz.f b/lapack-netlib/SRC/zhgeqz.f index b21199e9e..b28ae47a4 100644 --- a/lapack-netlib/SRC/zhgeqz.f +++ b/lapack-netlib/SRC/zhgeqz.f @@ -320,12 +320,13 @@ $ C, SAFMIN, TEMP, TEMP2, TEMPR, ULP COMPLEX*16 ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2, $ CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1, - $ U12, X + $ U12, X, ABI12, Y * .. * .. External Functions .. + COMPLEX*16 ZLADIV LOGICAL LSAME DOUBLE PRECISION DLAMCH, ZLANHS - EXTERNAL LSAME, DLAMCH, ZLANHS + EXTERNAL ZLADIV, LSAME, DLAMCH, ZLANHS * .. * .. External Subroutines .. EXTERNAL XERBLA, ZLARTG, ZLASET, ZROT, ZSCAL @@ -730,15 +731,21 @@ AD22 = ( ASCALE*H( ILAST, ILAST ) ) / $ ( BSCALE*T( ILAST, ILAST ) ) ABI22 = AD22 - U12*AD21 + ABI12 = AD12 - U12*AD11 * - T1 = HALF*( AD11+ABI22 ) - RTDISC = SQRT( T1**2+AD12*AD21-AD11*AD22 ) - TEMP = DBLE( T1-ABI22 )*DBLE( RTDISC ) + - $ DIMAG( T1-ABI22 )*DIMAG( RTDISC ) - IF( TEMP.LE.ZERO ) THEN - SHIFT = T1 + RTDISC - ELSE - SHIFT = T1 - RTDISC + SHIFT = ABI22 + CTEMP = SQRT( ABI12 )*SQRT( AD21 ) + TEMP = ABS1( CTEMP ) + IF( CTEMP.NE.ZERO ) THEN + X = HALF*( AD11-SHIFT ) + TEMP2 = ABS1( X ) + TEMP = MAX( TEMP, ABS1( X ) ) + Y = TEMP*SQRT( ( X / TEMP )**2+( CTEMP / TEMP )**2 ) + IF( TEMP2.GT.ZERO ) THEN + IF( DBLE( X / TEMP2 )*DBLE( Y )+ + $ DIMAG( X / TEMP2 )*DIMAG( Y ).LT.ZERO )Y = -Y + END IF + SHIFT = SHIFT - CTEMP*ZLADIV( CTEMP, ( X+Y ) ) END IF ELSE * From c4b5abbe43d7c22215ef36ef4f7c1413c975678c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 29 Jan 2021 10:45:36 +0100 Subject: [PATCH 073/134] fix data type --- lapack-netlib/SRC/chgeqz.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lapack-netlib/SRC/chgeqz.f b/lapack-netlib/SRC/chgeqz.f index 0d3787915..4725e7169 100644 --- a/lapack-netlib/SRC/chgeqz.f +++ b/lapack-netlib/SRC/chgeqz.f @@ -741,8 +741,8 @@ TEMP = MAX( TEMP, ABS1( X ) ) Y = TEMP*SQRT( ( X / TEMP )**2+( CTEMP / TEMP )**2 ) IF( TEMP2.GT.ZERO ) THEN - IF( DBLE( X / TEMP2 )*DBLE( Y )+ - $ DIMAG( X / TEMP2 )*DIMAG( Y ).LT.ZERO )Y = -Y + IF( REAL( X / TEMP2 )*REAL( Y )+ + $ AIMAG( X / TEMP2 )*AIMAG( Y ).LT.ZERO )Y = -Y END IF SHIFT = SHIFT - CTEMP*CLADIV( CTEMP, ( X+Y ) ) END IF From 2056ffc227d85c5a72622baae26427493c5b0bbc Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Fri, 29 Jan 2021 13:51:43 -0600 Subject: [PATCH 074/134] Optimize cscal function for POWER10 This patch makes use of new POWER10 vector pair instructions for loads and stores. --- kernel/power/cscal_microk_power10.c | 176 ++++++++++++++++++++++++++++ kernel/power/zscal.c | 12 +- 2 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 kernel/power/cscal_microk_power10.c diff --git a/kernel/power/cscal_microk_power10.c b/kernel/power/cscal_microk_power10.c new file mode 100644 index 000000000..70b50809e --- /dev/null +++ b/kernel/power/cscal_microk_power10.c @@ -0,0 +1,176 @@ +/*************************************************************************** +Copyright (c) 2021, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#define HAVE_KERNEL_8 1 + +static void zscal_kernel_8 (long n, float *x, float alpha_r, float alpha_i) +{ + __vector float t0 = {-alpha_i, alpha_i, -alpha_i, alpha_i}; + __vector unsigned char mask = { 11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4}; + __asm__ + ( + "dcbt 0, %2 \n\t" + "xscvdpspn 32, %x3 \n\t" + "xxspltw 32, 32, 0 \n\t" + + "lxvp 40, 0(%2) \n\t" + "lxvp 42, 32(%2) \n\t" + "lxvp 44, 64(%2) \n\t" + "lxvp 46, 96(%2) \n\t" + + "addic. %1, %1, -16 \n\t" + "ble two%= \n\t" + + ".align 5 \n" + "one%=: \n\t" + + "xvmulsp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r + "xvmulsp 49, 41, 32 \n\t" + "xvmulsp 50, 42, 32 \n\t" + "xvmulsp 51, 43, 32 \n\t" + "xvmulsp 52, 44, 32 \n\t" + "xvmulsp 53, 45, 32 \n\t" + "xvmulsp 54, 46, 32 \n\t" + "xvmulsp 55, 47, 32 \n\t" + + "xxperm 34, 40, %x5 \n\t" + "xxperm 35, 41, %x5 \n\t" + "xxperm 36, 42, %x5 \n\t" + "xxperm 37, 43, %x5 \n\t" + "xxperm 38, 44, %x5 \n\t" + "xxperm 39, 45, %x5 \n\t" + "xxperm 56, 46, %x5 \n\t" + "xxperm 57, 47, %x5 \n\t" + + "xvmulsp 34, 34, %x4 \n\t" // x0_i * -alpha_i, x0_r * alpha_i + "xvmulsp 35, 35, %x4 \n\t" + + "lxvp 40, 128(%2) \n\t" + + "xvmulsp 36, 36, %x4 \n\t" + "xvmulsp 37, 37, %x4 \n\t" + + "lxvp 42, 160(%2) \n\t" + + "xvmulsp 38, 38, %x4 \n\t" + "xvmulsp 39, 39, %x4 \n\t" + + "lxvp 44, 192(%2) \n\t" + + "xvmulsp 56, 56, %x4 \n\t" + "xvmulsp 57, 57, %x4 \n\t" + + "lxvp 46, 224(%2) \n\t" + + "xvaddsp 48, 48, 34 \n\t" + "xvaddsp 49, 49, 35 \n\t" + "xvaddsp 50, 50, 36 \n\t" + "xvaddsp 51, 51, 37 \n\t" + + "stxvp 48, 0(%2) \n\t" + + "xvaddsp 52, 52, 38 \n\t" + "xvaddsp 53, 53, 39 \n\t" + + "stxvp 50, 32(%2) \n\t" + + "xvaddsp 54, 54, 56 \n\t" + "xvaddsp 55, 55, 57 \n\t" + + "stxvp 52, 64(%2) \n\t" + "stxvp 54, 96(%2) \n\t" + + "addi %2, %2, 128 \n\t" + + "addic. %1, %1, -16 \n\t" + "bgt one%= \n" + + "two%=: \n\t" + + "xvmulsp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r + "xvmulsp 49, 41, 32 \n\t" + "xvmulsp 50, 42, 32 \n\t" + "xvmulsp 51, 43, 32 \n\t" + "xvmulsp 52, 44, 32 \n\t" + "xvmulsp 53, 45, 32 \n\t" + "xvmulsp 54, 46, 32 \n\t" + "xvmulsp 55, 47, 32 \n\t" + + "xxperm 34, 40, %x5 \n\t" + "xxperm 35, 41, %x5 \n\t" + "xxperm 36, 42, %x5 \n\t" + "xxperm 37, 43, %x5 \n\t" + "xxperm 38, 44, %x5 \n\t" + "xxperm 39, 45, %x5 \n\t" + "xxperm 56, 46, %x5 \n\t" + "xxperm 57, 47, %x5 \n\t" + + + "xvmulsp 34, 34, %x4 \n\t" // x0_i * -alpha_i, x0_r * alpha_i + "xvmulsp 35, 35, %x4 \n\t" + "xvmulsp 36, 36, %x4 \n\t" + "xvmulsp 37, 37, %x4 \n\t" + "xvmulsp 38, 38, %x4 \n\t" + "xvmulsp 39, 39, %x4 \n\t" + "xvmulsp 56, 56, %x4 \n\t" + "xvmulsp 57, 57, %x4 \n\t" + + "xvaddsp 48, 48, 34 \n\t" + "xvaddsp 49, 49, 35 \n\t" + "xvaddsp 50, 50, 36 \n\t" + "xvaddsp 51, 51, 37 \n\t" + + "stxvp 48, 0(%2) \n\t" + + "xvaddsp 52, 52, 38 \n\t" + "xvaddsp 53, 53, 39 \n\t" + + "stxvp 50, 32(%2) \n\t" + + "xvaddsp 54, 54, 56 \n\t" + "xvaddsp 55, 55, 57 \n\t" + + "stxvp 52, 64(%2) \n\t" + "stxvp 54, 96(%2) \n\t" + + "#n=%1 x=%0=%2 alpha=(%3,%4)\n" + : + "+m" (*x), + "+r" (n), // 1 + "+b" (x) // 2 + : + "f" (alpha_r), // 3 + "wa" (t0), // 4 + "wa" (mask) // 5 + : + "cr0", + "vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39", + "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47", + "vs48","vs49","vs50","vs51","vs52","vs53","vs54","vs55", + "vs56","vs57" + ); +} diff --git a/kernel/power/zscal.c b/kernel/power/zscal.c index 5526f4d67..31b3682b9 100644 --- a/kernel/power/zscal.c +++ b/kernel/power/zscal.c @@ -38,11 +38,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma GCC optimize "O1" -#if defined(POWER8) || defined(POWER9) || defined(POWER10) #if defined(__VEC__) || defined(__ALTIVEC__) +#if defined(POWER8) || defined(POWER9) #if defined(DOUBLE) #include "zscal_microk_power8.c" #endif +#elif defined(POWER10) +#if defined(DOUBLE) +#include "zscal_microk_power8.c" +#else +#include "cscal_microk_power10.c" +#endif #endif #endif @@ -145,7 +151,11 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, F { +#if defined(DOUBLE) n1 = n & -8; +#else + n1 = n & -16; +#endif if ( n1 > 0 ) { zscal_kernel_8(n1, x, da_r, da_i); From bd906e341005fc0bf460ebcf3f6d31433ecef0be Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 30 Jan 2021 16:46:25 +0100 Subject: [PATCH 075/134] fix copy-paste error in build rules for cblas_crotg and cblas_zrotg --- interface/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/interface/Makefile b/interface/Makefile index fab403c82..3252601d2 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -1634,10 +1634,10 @@ cblas_srotg.$(SUFFIX) cblas_srotg.$(PSUFFIX): rotg.c cblas_drotg.$(SUFFIX) cblas_drotg.$(PSUFFIX): rotg.c $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) -cblas_crotg.$(SUFFIX) crotg.$(PSUFFIX): zrotg.c +cblas_crotg.$(SUFFIX) cblas_crotg.$(PSUFFIX): zrotg.c $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) -cblas_zrotg.$(SUFFIX) zrotg.$(PSUFFIX): zrotg.c +cblas_zrotg.$(SUFFIX) cblas_zrotg.$(PSUFFIX): zrotg.c $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) cblas_srotm.$(SUFFIX) cblas_srotm.$(PSUFFIX): rotm.c From 6fa9860dbe39757cd42c472dc0f2e00a552355b7 Mon Sep 17 00:00:00 2001 From: xoviat <49173759+xoviat@users.noreply.github.com> Date: Sat, 30 Jan 2021 21:28:12 -0600 Subject: [PATCH 076/134] appveyor: cleanup and add openmp run --- appveyor.yml | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 1936059d5..1db95d220 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -30,10 +30,11 @@ environment: CONDA_INSTALL_LOCN: C:\\Miniconda36-x64 matrix: - COMPILER: clang-cl - WITH_FORTRAN: yes + WITH_FORTRAN: ON + USE_OPENMP: ON - COMPILER: clang-cl DYNAMIC_ARCH: ON - WITH_FORTRAN: no + WITH_FORTRAN: OFF - COMPILER: cl - COMPILER: MinGW64-gcc-7.2.0-mingw DYNAMIC_ARCH: OFF @@ -47,12 +48,7 @@ environment: install: - if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat - if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force - - if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake - - - if [%WITH_FORTRAN%]==[no] conda install --yes --quiet ninja - - if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet -c isuruf kitware-ninja - - if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet flang - + - if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake ninja flang=11.0.1 - if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 - if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" - if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" @@ -68,8 +64,9 @@ before_build: - if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 .. - if [%COMPILER%]==[MinGW-gcc-6.3.0-32] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 .. - if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 .. - - if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON .. - - if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 .. + - if [%WITH_FORTRAN%]==[OFF] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON .. + - if [%WITH_FORTRAN%]==[ON] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 .. + - if [%USE_OPENMP%]==[ON] cmake -DUSE_OPENMP=ON .. - if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON -DDYNAMIC_LIST='CORE2;NEHALEM;SANDYBRIDGE;BULLDOZER;HASWELL' .. build_script: From eb1d2344f7809c63a9cb5ae4ce05e255b15ec2c7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 1 Feb 2021 19:45:25 +0100 Subject: [PATCH 077/134] Fix compiler version check for Intel Cooperlake support (clang-cl does not accept -dumpversion) --- cmake/system.cmake | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/cmake/system.cmake b/cmake/system.cmake index 66e95c6d3..1d4e62463 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -148,16 +148,20 @@ endif () include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") if (DEFINED TARGET) if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512) -# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") + if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) - if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1) + if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 10.1 OR ${CMAKE_C_COMPILER_VERSION} VERSION_EQUAL 10.1) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake") else() set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") endif() -# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") -# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") -# endif() + elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") + if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 8.99) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake") + else() + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") + endif() + endif() endif() if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") From 774b9f86534c74403cfb417cde906dd034cd707e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 1 Feb 2021 20:18:53 +0100 Subject: [PATCH 078/134] handle AppleClang in Cooperlake support condition --- cmake/system.cmake | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cmake/system.cmake b/cmake/system.cmake index 1d4e62463..1336e19a2 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -4,6 +4,13 @@ ## set(NETLIB_LAPACK_DIR "${PROJECT_SOURCE_DIR}/lapack-netlib") +1 + +## + +2 + +## Author: Hank Anderson # System detection, via CMake. include("${PROJECT_SOURCE_DIR}/cmake/system_check.cmake") @@ -150,12 +157,12 @@ if (DEFINED TARGET) if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512) if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) - if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 10.1 OR ${CMAKE_C_COMPILER_VERSION} VERSION_EQUAL 10.1) + if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 10.09) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake") else() set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") endif() - elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") + elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang") if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 8.99) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake") else() From 99ac042702da18bcf7627c410c4d7eb36213bd6f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 1 Feb 2021 21:02:53 +0100 Subject: [PATCH 079/134] remove spurious lines (probably editor malfunction) --- cmake/system.cmake | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cmake/system.cmake b/cmake/system.cmake index 1336e19a2..a5996b9be 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -4,13 +4,6 @@ ## set(NETLIB_LAPACK_DIR "${PROJECT_SOURCE_DIR}/lapack-netlib") -1 - -## - -2 - -## Author: Hank Anderson # System detection, via CMake. include("${PROJECT_SOURCE_DIR}/cmake/system_check.cmake") From 95e19e2e231b01f104e0acc68bfd7589c39c4213 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 2 Feb 2021 10:53:46 +0100 Subject: [PATCH 080/134] fix case in compiler name check Co-authored-by: xoviat <49173759+xoviat@users.noreply.github.com> --- cmake/system.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/system.cmake b/cmake/system.cmake index a5996b9be..d52af3aa1 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -155,7 +155,7 @@ if (DEFINED TARGET) else() set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") endif() - elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang") + elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang") if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 8.99) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake") else() From d7a77091a3468354ac57ee76a682c04ac9c5ad03 Mon Sep 17 00:00:00 2001 From: Jake Arkinstall <65358059+jake-arkinstall@users.noreply.github.com> Date: Wed, 10 Feb 2021 12:11:17 +0000 Subject: [PATCH 081/134] Addressed issue #3100, removing an unnecessary write to the include directory --- cmake/lapacke.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake index f10905c4d..54a583887 100644 --- a/cmake/lapacke.cmake +++ b/cmake/lapacke.cmake @@ -2499,6 +2499,5 @@ foreach (Utils_FILE ${Utils_SRC}) endforeach () set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/LAPACKE/include") -configure_file("${lapacke_include_dir}/lapacke_mangling_with_flags.h.in" "${lapacke_include_dir}/lapacke_mangling.h" COPYONLY) include_directories(${lapacke_include_dir}) set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}") From ece3ce581e3ec530eaccfe7f284c52e115ec7aa9 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 10 Feb 2021 14:22:59 +0100 Subject: [PATCH 082/134] Strip parenthesized (pkgversion) data from GCC version string to avoid misinterpretation --- f_check | 1 + 1 file changed, 1 insertion(+) diff --git a/f_check b/f_check index e9aca4ff9..ffe9c6b46 100644 --- a/f_check +++ b/f_check @@ -75,6 +75,7 @@ if ($compiler eq "") { } elsif ($data =~ /GNU/ || $data =~ /GCC/ ) { + $data =~ s/\(+.*?\)+//g; $data =~ /(\d+)\.(\d+).(\d+)/; $major = $1; $minor = $2; From db348dcff2b3267e40de634bda9173370dd6b001 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Feb 2021 09:23:05 +0100 Subject: [PATCH 083/134] Enable optimized srot/drot kernels from Haswell --- kernel/x86_64/KERNEL.ZEN | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/x86_64/KERNEL.ZEN b/kernel/x86_64/KERNEL.ZEN index 7bb308fea..a66394be3 100644 --- a/kernel/x86_64/KERNEL.ZEN +++ b/kernel/x86_64/KERNEL.ZEN @@ -97,3 +97,5 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c +SROTKERNEL = srot.c +DROTKERNEL = drot.c From 46509953a9dd1907f05465e2212d4477cb26b14c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Feb 2021 09:24:16 +0100 Subject: [PATCH 084/134] Use Haswell optimizations for Zen as well --- kernel/x86_64/drot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/drot.c b/kernel/x86_64/drot.c index 66e9ff907..ab5048bd1 100644 --- a/kernel/x86_64/drot.c +++ b/kernel/x86_64/drot.c @@ -2,7 +2,7 @@ #if defined(SKYLAKEX) #include "drot_microk_skylakex-2.c" -#elif defined(HASWELL) +#elif defined(HASWELL) || defined(ZEN) #include "drot_microk_haswell-2.c" #endif From 950c047b49c159fd8a8804ecae351cccc2865d02 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Feb 2021 09:24:51 +0100 Subject: [PATCH 085/134] Use Haswell optimizations for Zen as well --- kernel/x86_64/srot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/srot.c b/kernel/x86_64/srot.c index 3264d251a..587cf8e40 100644 --- a/kernel/x86_64/srot.c +++ b/kernel/x86_64/srot.c @@ -2,7 +2,7 @@ #if defined(SKYLAKEX) #include "srot_microk_skylakex-2.c" -#elif defined(HASWELL) +#elif defined(HASWELL) || defined(ZEN) #include "srot_microk_haswell-2.c" #endif From ce7ddd8921fa784079face668eab93c778623cac Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Feb 2021 09:25:36 +0100 Subject: [PATCH 086/134] Use Haswell optimizations for Zen as well --- kernel/x86_64/sasum.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/sasum.c b/kernel/x86_64/sasum.c index d0cea9bee..a021741c7 100644 --- a/kernel/x86_64/sasum.c +++ b/kernel/x86_64/sasum.c @@ -11,7 +11,7 @@ #if defined(SKYLAKEX) #include "sasum_microk_skylakex-2.c" -#elif defined(HASWELL) +#elif defined(HASWELL) || defined(ZEN) #include "sasum_microk_haswell-2.c" #endif From 47691c031fa128ed65f630dd009a943465a2d92f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Feb 2021 09:26:15 +0100 Subject: [PATCH 087/134] Use Haswell optimizations for Zen as well --- kernel/x86_64/dasum.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/dasum.c b/kernel/x86_64/dasum.c index 534f257d2..8af9e798b 100644 --- a/kernel/x86_64/dasum.c +++ b/kernel/x86_64/dasum.c @@ -6,7 +6,7 @@ #if defined(SKYLAKEX) #include "dasum_microk_skylakex-2.c" -#elif defined(HASWELL) +#elif defined(HASWELL) || defined(ZEN) #include "dasum_microk_haswell-2.c" #endif From ae53e3e23343739e61439e39cbcac1f0d684b134 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Feb 2021 20:16:27 +0100 Subject: [PATCH 088/134] Recognize Intel Tiger Lake as SkylakeX --- cpuid_x86.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/cpuid_x86.c b/cpuid_x86.c index aca37da45..44704fcd9 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1418,6 +1418,15 @@ int get_cpuname(void){ case 9: case 8: switch (model) { + case 12: // Tiger Lake + if(support_avx512()) + return CPUTYPE_SKYLAKEX; + if(support_avx2()) + return CPUTYPE_HASWELL; + if(support_avx()) + return CPUTYPE_SANDYBRIDGE; + else + return CPUTYPE_NEHALEM; case 14: // Kaby Lake and refreshes if(support_avx2()) return CPUTYPE_HASWELL; @@ -2124,6 +2133,16 @@ int get_coretype(void){ break; case 9: case 8: + if (model == 12) { // Tiger Lake + if(support_avx512()) + return CPUTYPE_SKYLAKEX; + if(support_avx2()) + return CPUTYPE_HASWELL; + if(support_avx()) + return CPUTYPE_SANDYBRIDGE; + else + return CPUTYPE_NEHALEM; + } if (model == 14) { // Kaby Lake if(support_avx()) #ifndef NO_AVX2 From e4e5042e3859583387eb43c143c57bab671002a9 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Feb 2021 20:17:11 +0100 Subject: [PATCH 089/134] Recognize Intel Tiger Lake as SkylakeX --- driver/others/dynamic.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index 7845d6951..158e1b3da 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -644,6 +644,21 @@ static gotoblas_t *get_coretype(void){ return NULL; case 9: case 8: + if (model == 12) { // Tiger Lake + if (support_avx512()) + return &gotoblas_SKYLAKEX; + if(support_avx2()){ + openblas_warning(FALLBACK_VERBOSE, HASWELL_FALLBACK); + return &gotoblas_HASWELL; + } + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { + openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); + return &gotoblas_NEHALEM; + } + } if (model == 14 ) { // Kaby Lake, Coffee Lake if(support_avx2()) return &gotoblas_HASWELL; From 63fa6c832ea142ecac3c61e2ce542949ae8ccdcb Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Thu, 11 Feb 2021 21:28:03 -0600 Subject: [PATCH 090/134] Fix build issue on POWER8 with DYNAMIC_ARCH Running make DYNAMIC_ARCH=1 on POWER 8 BE with gcc10.2 version, gives the following error due to the difference in UNROLL_M/N. 'No rule to make target 'dgemm_incopy_POWER10.o', needed by kernel' --- param.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/param.h b/param.h index 6a790ab61..9ba25de6a 100644 --- a/param.h +++ b/param.h @@ -2443,8 +2443,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 8 +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#define DGEMM_DEFAULT_UNROLL_M 16 +#define DGEMM_DEFAULT_UNROLL_N 4 +#else #define DGEMM_DEFAULT_UNROLL_M 8 #define DGEMM_DEFAULT_UNROLL_N 8 +#endif #define CGEMM_DEFAULT_UNROLL_M 8 #define CGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_M 8 From b0bded3f2f3da67a1e8ac1ab10a04a73838a13cd Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 18 Feb 2021 11:14:05 +0100 Subject: [PATCH 091/134] Fix get_num_procs() in the USE_TLS branch for non-glibc systems --- driver/others/memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index 0d4b2ff31..75203a7b0 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -222,11 +222,11 @@ int get_num_procs(void); #else int get_num_procs(void) { static int nums = 0; + +#if defined(__GLIBC_PREREQ) cpu_set_t cpuset,*cpusetp; size_t size; int ret; - -#if defined(__GLIBC_PREREQ) #if !__GLIBC_PREREQ(2, 7) int i; #if !__GLIBC_PREREQ(2, 6) From dbbf92c1d120c22c0ce7d5b8e1d7ec35f9bace34 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 18 Feb 2021 13:46:50 -0500 Subject: [PATCH 092/134] Fix race in blas_thread_shutdown. blas_server_avail was read without holding server_lock. If multiple threads call blas_thread_shutdown simultaneously, for example, by calling fork(), then they can attempt to shut down multiple times. This can lead to a segmentation fault. --- driver/others/blas_server.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index 5e0943c2e..fa07a1ea4 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -1024,38 +1024,39 @@ int BLASFUNC(blas_thread_shutdown)(void){ int i; - if (!blas_server_avail) return 0; - LOCK_COMMAND(&server_lock); - for (i = 0; i < blas_num_threads - 1; i++) { + if (blas_server_avail) { + + for (i = 0; i < blas_num_threads - 1; i++) { - pthread_mutex_lock (&thread_status[i].lock); + pthread_mutex_lock (&thread_status[i].lock); - atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)-1); - thread_status[i].status = THREAD_STATUS_WAKEUP; - pthread_cond_signal (&thread_status[i].wakeup); + atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)-1); + thread_status[i].status = THREAD_STATUS_WAKEUP; + pthread_cond_signal (&thread_status[i].wakeup); - pthread_mutex_unlock(&thread_status[i].lock); + pthread_mutex_unlock(&thread_status[i].lock); - } + } - for(i = 0; i < blas_num_threads - 1; i++){ - pthread_join(blas_threads[i], NULL); - } + for(i = 0; i < blas_num_threads - 1; i++){ + pthread_join(blas_threads[i], NULL); + } - for(i = 0; i < blas_num_threads - 1; i++){ - pthread_mutex_destroy(&thread_status[i].lock); - pthread_cond_destroy (&thread_status[i].wakeup); - } + for(i = 0; i < blas_num_threads - 1; i++){ + pthread_mutex_destroy(&thread_status[i].lock); + pthread_cond_destroy (&thread_status[i].wakeup); + } #ifdef NEED_STACKATTR - pthread_attr_destory(&attr); + pthread_attr_destroy(&attr); #endif - blas_server_avail = 0; + blas_server_avail = 0; + } UNLOCK_COMMAND(&server_lock); return 0; From 1a3ad4b670e2d8b28ce8616202970c3b6359e407 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 22 Feb 2021 19:40:36 +0100 Subject: [PATCH 093/134] Fix signatures of the TLS-mode dll_callback and p_process_term functions for Win64 --- driver/others/memory.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index 75203a7b0..63fa6a566 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -1619,10 +1619,12 @@ static int on_process_term(void) #else #pragma data_seg(".CRT$XLB") #endif -static void (APIENTRY *dll_callback)(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain; + #ifdef _WIN64 +static const PIMAGE_TLS_CALLBACK dll_callback(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain; #pragma const_seg() #else +static void (APIENTRY *dll_callback)(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain; #pragma data_seg() #endif @@ -1631,10 +1633,12 @@ static void (APIENTRY *dll_callback)(HINSTANCE h, DWORD ul_reason_for_call, PVOI #else #pragma data_seg(".CRT$XTU") #endif -static int(*p_process_term)(void) = on_process_term; + #ifdef _WIN64 +static const int(*p_process_term)(void) = on_process_term; #pragma const_seg() #else +static int(*p_process_term)(void) = on_process_term; #pragma data_seg() #endif #endif From b1eed27a542019a102f97647aa77a219a5124783 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 22 Feb 2021 21:35:42 +0100 Subject: [PATCH 094/134] Replace naive omatcopy_rt with 4x4 blocked implementation as suggested by MigMuc in issue 2532 --- kernel/arm/omatcopy_rt.c | 224 ++++++++++++++++++++++++++++++++++----- 1 file changed, 198 insertions(+), 26 deletions(-) diff --git a/kernel/arm/omatcopy_rt.c b/kernel/arm/omatcopy_rt.c index 9d58350d5..d6a3df619 100644 --- a/kernel/arm/omatcopy_rt.c +++ b/kernel/arm/omatcopy_rt.c @@ -1,5 +1,5 @@ /*************************************************************************** -Copyright (c) 2013, The OpenBLAS Project +Copyright (c) 2021, The OpenBLAS Project All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -27,36 +27,208 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -/***************************************************** - * 2014/06/09 Saar - * - * Order rowMajor - * Trans - * -******************************************************/ - int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) -{ - BLASLONG i,j; - FLOAT *aptr,*bptr; - if ( rows <= 0 ) return(0); - if ( cols <= 0 ) return(0); + BLASLONG i, j; + FLOAT *a_offset, *a_offset1, *a_offset2, *a_offset3, *a_offset4; + FLOAT *b_offset, *b_offset1, *b_offset2, *b_offset3, *b_offset4; - aptr = a; + if (rows <= 0) return 0; + if (cols <= 0) return 0; - for ( i=0; i> 2); + if (i > 0) { + do { + a_offset1 = a_offset; + a_offset2 = a_offset1 + lda; + a_offset3 = a_offset2 + lda; + a_offset4 = a_offset3 + lda; + a_offset += 4 * lda; + b_offset1 = b_offset; + b_offset2 = b_offset1 + ldb; + b_offset3 = b_offset2 + ldb; + b_offset4 = b_offset3 + ldb; + b_offset += 4; + + j = (cols >> 2); + if (j > 0) { + do { + /* Column 1 of MAT_B */ + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; // Row 1 of MAT_A + *(b_offset2 + 0) = *(a_offset1 + 1)*alpha; + *(b_offset3 + 0) = *(a_offset1 + 2)*alpha; + *(b_offset4 + 0) = *(a_offset1 + 3)*alpha; + + /* Column 2 of MAT_B */ + *(b_offset1 + 1) = *(a_offset2 + 0)*alpha; // Row 2 of MAT_A + *(b_offset2 + 1) = *(a_offset2 + 1)*alpha; + *(b_offset3 + 1) = *(a_offset2 + 2)*alpha; + *(b_offset4 + 1) = *(a_offset2 + 3)*alpha; + + /* Column 3 of MAT_B */ + *(b_offset1 + 2) = *(a_offset3 + 0)*alpha; // Row 3 of MAT_A + *(b_offset2 + 2) = *(a_offset3 + 1)*alpha; + *(b_offset3 + 2) = *(a_offset3 + 2)*alpha; + *(b_offset4 + 2) = *(a_offset3 + 3)*alpha; + + /* Column 4 of MAT_B */ + *(b_offset1 + 3) = *(a_offset4 + 0)*alpha; // Row 4 of MAT_A + *(b_offset2 + 3) = *(a_offset4 + 1)*alpha; + *(b_offset3 + 3) = *(a_offset4 + 2)*alpha; + *(b_offset4 + 3) = *(a_offset4 + 3)*alpha; + + a_offset1 += 4; + a_offset2 += 4; + a_offset3 += 4; + a_offset4 += 4; + b_offset1 += ldb * 4; + b_offset2 += ldb * 4; + b_offset3 += ldb * 4; + b_offset4 += ldb * 4; + + j--; + } while (j > 0); + } // if(j > 0) + + + if (cols & 2) { + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + *(b_offset2 + 0) = *(a_offset1 + 1)*alpha; + + *(b_offset1 + 1) = *(a_offset2 + 0)*alpha; + *(b_offset2 + 1) = *(a_offset2 + 1)*alpha; + + *(b_offset1 + 2) = *(a_offset3 + 0)*alpha; + *(b_offset2 + 2) = *(a_offset3 + 1)*alpha; + + *(b_offset1 + 3) = *(a_offset4 + 0)*alpha; + *(b_offset2 + 3) = *(a_offset4 + 1)*alpha; + + a_offset1 += 2; + a_offset2 += 2; + a_offset3 += 2; + a_offset4 += 2; + + b_offset1 += ldb*2; + + } + + if (cols & 1) { + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + + *(b_offset1 + 1) = *(a_offset2 + 0)*alpha; + + *(b_offset1 + 2) = *(a_offset3 + 0)*alpha; + + *(b_offset1 + 3) = *(a_offset4 + 0)*alpha; + } + + i--; + } while (i > 0); + } + + + if (rows & 2) { + a_offset1 = a_offset; + a_offset2 = a_offset1 + lda; + a_offset += 2 * lda; + + b_offset1 = b_offset; + b_offset2 = b_offset1 + ldb; + b_offset3 = b_offset2 + ldb; + b_offset4 = b_offset3 + ldb; + b_offset += 2; + + j = (cols >> 2); + if (j > 0){ + do { + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + *(b_offset2 + 0) = *(a_offset1 + 1)*alpha; + *(b_offset3 + 0) = *(a_offset1 + 2)*alpha; + *(b_offset4 + 0) = *(a_offset1 + 3)*alpha; + + *(b_offset1 + 1) = *(a_offset2 + 0)*alpha; + *(b_offset2 + 1) = *(a_offset2 + 1)*alpha; + *(b_offset3 + 1) = *(a_offset2 + 2)*alpha; + *(b_offset4 + 1) = *(a_offset2 + 3)*alpha; + + a_offset1 += 4; + a_offset2 += 4; + b_offset1 += ldb * 4; + b_offset2 += ldb * 4; + b_offset3 += ldb * 4; + b_offset4 += ldb * 4; + + j--; + } while (j > 0); + } + + + if (cols & 2){ + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + *(b_offset2 + 0) = *(a_offset1 + 1)*alpha; + + *(b_offset1 + 1) = *(a_offset2 + 0)*alpha; + *(b_offset2 + 1) = *(a_offset2 + 1)*alpha; + + a_offset1 += 2; + a_offset2 += 2; + b_offset1 += ldb*2; + + } + + + if (cols & 1){ + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + *(b_offset1 + 1) = *(a_offset2 + 0)*alpha; + } + } // if (rows & 2) + + + if (rows & 1) { + a_offset1 = a_offset; + a_offset += lda; + + b_offset1 = b_offset; + b_offset2 = b_offset1 + ldb; + b_offset3 = b_offset2 + ldb; + b_offset4 = b_offset3 + ldb; + + j = (cols >> 2); + if (j > 0){ + do { + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + *(b_offset2 + 0) = *(a_offset1 + 1)*alpha; + *(b_offset3 + 0) = *(a_offset1 + 2)*alpha; + *(b_offset4 + 0) = *(a_offset1 + 3)*alpha; + + a_offset1 += 4; + b_offset1 += ldb * 4; + b_offset2 += ldb * 4; + b_offset3 += ldb * 4; + b_offset4 += ldb * 4; + + j--; + } while (j > 0); + } + + if (cols & 2){ + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + *(b_offset2 + 0) = *(a_offset1 + 1)*alpha; + + a_offset1 += 2; + b_offset1 += ldb * 2; + } + + if (cols & 1){ + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + } + } + + return 0; } - From 0a4546b742104580cee77fe8f01d9cbb20d4161b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 23 Feb 2021 13:14:35 +0100 Subject: [PATCH 095/134] Typo fix --- kernel/arm/omatcopy_rt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/arm/omatcopy_rt.c b/kernel/arm/omatcopy_rt.c index d6a3df619..3d90ac6e4 100644 --- a/kernel/arm/omatcopy_rt.c +++ b/kernel/arm/omatcopy_rt.c @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) - +{ BLASLONG i, j; FLOAT *a_offset, *a_offset1, *a_offset2, *a_offset3, *a_offset4; FLOAT *b_offset, *b_offset1, *b_offset2, *b_offset3, *b_offset4; From cceeee7806a6647ef06044fd74c4349565eeb1f5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 24 Feb 2021 09:00:54 +0100 Subject: [PATCH 096/134] Add optimized omatcopy_rt --- kernel/x86_64/omatcopy_rt.c | 371 ++++++++++++++++++++++++++++++++++++ 1 file changed, 371 insertions(+) create mode 100644 kernel/x86_64/omatcopy_rt.c diff --git a/kernel/x86_64/omatcopy_rt.c b/kernel/x86_64/omatcopy_rt.c new file mode 100644 index 000000000..ac25ea74b --- /dev/null +++ b/kernel/x86_64/omatcopy_rt.c @@ -0,0 +1,371 @@ +/*************************************************************************** +Copyright (c) 2021, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + +#ifdef HAVE_AVX + +/* +r: %0 = src, %1 = dst, %2 = src_ld, %3 = dst_ld, %4 = dst_tmp */ +/* m: %5 = num_rows, %6 = alpha */ +/* xmm15 = alpha */ +#define TRANS_4x4(a1_no,a2_no,a3_no,a4_no,t1_no,t2_no,t3_no,t4_no)\ + "vunpcklps %%xmm"#a2_no",%%xmm"#a1_no",%%xmm"#t1_no"; vunpckhps %%xmm"#a2_no",%%xmm"#a1_no",%%xmm"#t2_no";"\ + "vunpcklps %%xmm"#a4_no",%%xmm"#a3_no",%%xmm"#t3_no"; vunpckhps %%xmm"#a4_no",%%xmm"#a3_no",%%xmm"#t4_no";"\ + "vunpcklpd %%xmm"#t3_no",%%xmm"#t1_no",%%xmm"#a1_no"; vunpckhpd %%xmm"#t3_no",%%xmm"#t1_no",%%xmm"#a2_no";"\ + "vunpcklpd %%xmm"#t4_no",%%xmm"#t2_no",%%xmm"#a3_no"; vunpckhpd %%xmm"#t4_no",%%xmm"#t2_no",%%xmm"#a4_no";" + +#define TRANS_4x8(a1_no,a2_no,a3_no,a4_no,t1_no,t2_no,t3_no,t4_no)\ + "vunpcklps %%ymm"#a2_no",%%ymm"#a1_no",%%ymm"#t1_no"; vunpckhps %%ymm"#a2_no",%%ymm"#a1_no",%%ymm"#t2_no";"\ + "vunpcklps %%ymm"#a4_no",%%ymm"#a3_no",%%ymm"#t3_no"; vunpckhps %%ymm"#a4_no",%%ymm"#a3_no",%%ymm"#t4_no";"\ + "vunpcklpd %%ymm"#t3_no",%%ymm"#t1_no",%%ymm"#a1_no"; vunpckhpd %%ymm"#t3_no",%%ymm"#t1_no",%%ymm"#a2_no";"\ + "vunpcklpd %%ymm"#t4_no",%%ymm"#t2_no",%%ymm"#a3_no"; vunpckhpd %%ymm"#t4_no",%%ymm"#t2_no",%%ymm"#a4_no";" + +#define SAVE_4x4(b1_no,b2_no,b3_no,b4_no)\ + "vmovups %%xmm"#b1_no",(%4); vmovups %%xmm"#b2_no",(%4,%3,1); leaq (%4,%3,2),%4;"\ + "vmovups %%xmm"#b3_no",(%4); vmovups %%xmm"#b4_no",(%4,%3,1); leaq (%4,%3,2),%4;" + +#define SAVE_4x8(b1_no,b2_no,b3_no,b4_no) SAVE_4x4(b1_no,b2_no,b3_no,b4_no)\ + "vextractf128 $1,%%ymm"#b1_no",(%4); vextractf128 $1,%%ymm"#b2_no",(%4,%3,1); leaq (%4,%3,2),%4;"\ + "vextractf128 $1,%%ymm"#b3_no",(%4); vextractf128 $1,%%ymm"#b4_no",(%4,%3,1); leaq (%4,%3,2),%4;" + +#define COPY_4x16 "movq %1,%4; addq $16,%1;"\ + "vmulps (%0),%%ymm15,%%ymm0; vmulps 32(%0),%%ymm15,%%ymm4; vmulps (%0,%2,1),%%ymm15,%%ymm1; vmulps 32(%0,%2,1),%%ymm15,%%ymm5; leaq (%0,%2,2),%0;"\ + "vmulps (%0),%%ymm15,%%ymm2; vmulps 32(%0),%%ymm15,%%ymm6; vmulps (%0,%2,1),%%ymm15,%%ymm3; vmulps 32(%0,%2,1),%%ymm15,%%ymm7; leaq (%0,%2,2),%0;"\ + TRANS_4x8(0,1,2,3,8,9,10,11) SAVE_4x8(0,1,2,3)\ + TRANS_4x8(4,5,6,7,8,9,10,11) SAVE_4x8(4,5,6,7) + +#define COPY_4x8 "movq %1,%4; addq $16,%1;"\ + "vmulps (%0),%%ymm15,%%ymm0; vmulps (%0,%2,1),%%ymm15,%%ymm1; leaq (%0,%2,2),%0;"\ + "vmulps (%0),%%ymm15,%%ymm2; vmulps (%0,%2,1),%%ymm15,%%ymm3; leaq (%0,%2,2),%0;"\ + TRANS_4x8(0,1,2,3,8,9,10,11) SAVE_4x8(0,1,2,3) + +#define COPY_4x4 "movq %1,%4; addq $16,%1;"\ + "vmulps (%0),%%xmm15,%%xmm0; vmulps (%0,%2,1),%%xmm15,%%xmm1; leaq (%0,%2,2),%0;"\ + "vmulps (%0),%%xmm15,%%xmm2; vmulps (%0,%2,1),%%xmm15,%%xmm3; leaq (%0,%2,2),%0;"\ + TRANS_4x4(0,1,2,3,8,9,10,11) SAVE_4x4(0,1,2,3) + +#define COPY_4x2 \ + "vmovsd (%0),%%xmm0; vmovhpd (%0,%2,1),%%xmm0,%%xmm0; vmulps %%xmm15,%%xmm0,%%xmm0; leaq (%0,%2,2),%0;"\ + "vmovsd (%0),%%xmm1; vmovhpd (%0,%2,1),%%xmm1,%%xmm1; vmulps %%xmm15,%%xmm1,%%xmm1; leaq (%0,%2,2),%0;"\ + "vpermilps $216,%%xmm0,%%xmm0; vpermilps $216,%%xmm1,%%xmm1; vunpcklpd %%xmm1,%%xmm0,%%xmm2; vunpckhpd %%xmm1,%%xmm0,%%xmm3;"\ + "vmovups %%xmm2,(%1); vmovups %%xmm3,(%1,%3,1); addq $16,%1;" + +#define COPY_4x1 \ + "vmovss (%0),%%xmm0; vinsertps $16,(%0,%2,1),%%xmm0,%%xmm0; leaq (%0,%2,2),%0;"\ + "vinsertps $32,(%0),%%xmm0,%%xmm0; vinsertps $48,(%0,%2,1),%%xmm0,%%xmm0; leaq (%0,%2,2),%0;"\ + "vmulps %%xmm15,%%xmm0,%%xmm0; vmovups %%xmm0,(%1); addq $16,%1;" + +#define SAVE_2x4(c1_no,c2_no,t1_no,t2_no) \ + "vunpcklps %%xmm"#c2_no",%%xmm"#c1_no",%%xmm"#t1_no"; vmulps %%xmm15,%%xmm"#t1_no",%%xmm"#t1_no";"\ + "vmovsd %%xmm"#t1_no",(%4); vmovhpd %%xmm"#t1_no",(%4,%3,1); leaq (%4,%3,2),%4;"\ + "vunpckhps %%xmm"#c2_no",%%xmm"#c1_no",%%xmm"#t2_no"; vmulps %%xmm15,%%xmm"#t2_no",%%xmm"#t2_no";"\ + "vmovsd %%xmm"#t2_no",(%4); vmovhpd %%xmm"#t2_no",(%4,%3,1); leaq (%4,%3,2),%4;" + +#define COPY_2x16 "movq %1,%4; addq $8,%1;"\ + "vmovups (%0),%%ymm0; vmovups 32(%0),%%ymm2; vmovups (%0,%2,1),%%ymm1; vmovups 32(%0,%2,1),%%ymm3; leaq (%0,%2,2),%0;"\ + "vextractf128 $1,%%ymm0,%%xmm4; vextractf128 $1,%%ymm2,%%xmm6; vextractf128 $1,%%ymm1,%%xmm5; vextractf128 $1,%%ymm3,%%xmm7;"\ + SAVE_2x4(0,1,8,9) SAVE_2x4(4,5,8,9) SAVE_2x4(2,3,8,9) SAVE_2x4(6,7,8,9) + +#define COPY_2x8 "movq %1,%4; addq $8,%1;"\ + "vmovups (%0),%%ymm0; vmovups (%0,%2,1),%%ymm1; leaq (%0,%2,2),%0;"\ + "vextractf128 $1,%%ymm0,%%xmm2; vextractf128 $1,%%ymm1,%%xmm3;"\ + SAVE_2x4(0,1,4,5) SAVE_2x4(2,3,4,5) + +#define COPY_2x4 "movq %1,%4; addq $8,%1;"\ + "vmovups (%0),%%xmm0; vmovups (%0,%2,1),%%xmm1; leaq (%0,%2,2),%0;"\ + SAVE_2x4(0,1,4,5) + +#define COPY_2x2 \ + "vmovsd (%0),%%xmm0; vmovhpd (%0,%2,1),%%xmm0,%%xmm0; vmulps %%xmm15,%%xmm0,%%xmm0; leaq (%0,%2,2),%0; vpermilps $216,%%xmm0,%%xmm0;"\ + "vmovsd %%xmm0,(%1); vmovhpd %%xmm0,(%1,%3,1); addq $8,%1;" + +#define COPY_2x1 \ + "vmovss (%0),%%xmm0; vinsertps $16,(%0,%2,1),%%xmm0,%%xmm0; vmulps %%xmm15,%%xmm0,%%xmm0; leaq (%0,%2,2),%0; vmovsd %%xmm0,(%1); addq $8,%1;" + +#define SAVE_1x4(c1_no)\ + "vmulps %%xmm15,%%xmm"#c1_no",%%xmm"#c1_no"; vmovss %%xmm"#c1_no",(%4); vextractps $1,%%xmm"#c1_no",(%4,%3,1); leaq (%4,%3,2),%4;"\ + "vextractps $2,%%xmm"#c1_no",(%4); vextractps $3,%%xmm"#c1_no",(%4,%3,1); leaq (%4,%3,2),%4;" + +#define COPY_1x16 "movq %1,%4; addq $4,%1;"\ + "vmovups (%0),%%xmm1;" SAVE_1x4(1) "vmovups 16(%0),%%xmm2;" SAVE_1x4(2)\ + "vmovups 32(%0),%%xmm1;" SAVE_1x4(1) "vmovups 48(%0),%%xmm2;" SAVE_1x4(2) "addq %2,%0;" + +#define COPY_1x8 "movq %1,%4; addq $4,%1;"\ + "vmovups (%0),%%xmm1;" SAVE_1x4(1) "vmovups 16(%0),%%xmm2;" SAVE_1x4(2) "addq %2,%0;" + +#define COPY_1x4 "movq %1,%4; addq $4,%1; vmovups (%0),%%xmm1;" SAVE_1x4(1) "addq %2,%0;" + +#define COPY_1x2 "vmovsd (%0),%%xmm1; addq %2,%0; vmulps %%xmm15,%%xmm1,%%xmm1; vmovss %%xmm1,(%1); vextractps $1,%%xmm1,(%1,%3,1); addq $4,%1;" + +#define COPY_1x1 "vmulss (%0),%%xmm15,%%xmm1; vmovss %%xmm1,(%1); addq %2,%0; addq $4,%1;" + +#define COMPUTE(ndim){\ + src = src_base; dst = dst_base;\ + __asm__ __volatile__(\ + "vbroadcastss %6,%%ymm15; movq %5,%%r11; cmpq $4,%%r11; jb "#ndim"32f;"\ + #ndim"31:\n\t"\ + COPY_4x##ndim "subq $4,%%r11; cmpq $4,%%r11; jnb "#ndim"31b;"\ + #ndim"32:\n\t"\ + "cmpq $2,%%r11; jb "#ndim"33f;"\ + COPY_2x##ndim "subq $2,%%r11;"\ + #ndim"33:\n\t"\ + "testq %%r11,%%r11; jz "#ndim"34f;"\ + COPY_1x##ndim "subq $1,%%r11;"\ + #ndim"34:\n\t"\ + :"+r"(src),"+r"(dst),"+r"(src_ld_bytes),"+r"(dst_ld_bytes),"+r"(dst_tmp):"m"(num_rows),"m"(ALPHA):"r11","cc","memory"\ + ,"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15");\ +} +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb){ + float *src, *dst, *dst_tmp, *src_base, *dst_base; + uint64_t src_ld_bytes = (uint64_t)lda * sizeof(float), dst_ld_bytes = (uint64_t)ldb * sizeof(float), num_rows = 0; + BLASLONG cols_left, rows_done; float ALPHA = alpha; + if(ALPHA==0.0){ + dst_base = b; + for(cols_left=cols;cols_left>0;cols_left--) {memset(dst_base,0,rows*sizeof(float)); dst_base += ldb;} + return 0; + } + for(rows_done=0;rows_done ROWS_OF_BLOCK) num_rows = ROWS_OF_BLOCK; + cols_left = cols; src_base = a + (int64_t)lda * (int64_t)rows_done; dst_base = b + rows_done; + if(ldb%1024>3 && ldb%1024<1021) for(;cols_left>15;cols_left-=16){COMPUTE(16) src_base += 16; dst_base += 16 * ldb;} + for(;cols_left>7;cols_left-=8){COMPUTE(8) src_base += 8; dst_base += 8 * ldb;} + for(;cols_left>3;cols_left-=4){COMPUTE(4) src_base += 4; dst_base += 4 * ldb;} + for(;cols_left>1;cols_left-=2){COMPUTE(2) src_base += 2; dst_base += 2 * ldb;} + if(cols_left>0){COMPUTE(1) src_base ++; dst_base += ldb;} + } + return 0; +} + +#else + +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) + + BLASLONG i, j; + FLOAT *a_offset, *a_offset1, *a_offset2, *a_offset3, *a_offset4; + FLOAT *b_offset, *b_offset1, *b_offset2, *b_offset3, *b_offset4; + + if (rows <= 0) return 0; + if (cols <= 0) return 0; + + a_offset = a; + b_offset = b; + + i = (rows >> 2); + if (i > 0) { + do { + a_offset1 = a_offset; + a_offset2 = a_offset1 + lda; + a_offset3 = a_offset2 + lda; + a_offset4 = a_offset3 + lda; + a_offset += 4 * lda; + + b_offset1 = b_offset; + b_offset2 = b_offset1 + ldb; + b_offset3 = b_offset2 + ldb; + b_offset4 = b_offset3 + ldb; + b_offset += 4; + + j = (cols >> 2); + if (j > 0) { + do { + /* Column 1 of MAT_B */ + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; // Row 1 of MAT_A + *(b_offset2 + 0) = *(a_offset1 + 1)*alpha; + *(b_offset3 + 0) = *(a_offset1 + 2)*alpha; + *(b_offset4 + 0) = *(a_offset1 + 3)*alpha; + + /* Column 2 of MAT_B */ + *(b_offset1 + 1) = *(a_offset2 + 0)*alpha; // Row 2 of MAT_A + *(b_offset2 + 1) = *(a_offset2 + 1)*alpha; + *(b_offset3 + 1) = *(a_offset2 + 2)*alpha; + *(b_offset4 + 1) = *(a_offset2 + 3)*alpha; + + /* Column 3 of MAT_B */ + *(b_offset1 + 2) = *(a_offset3 + 0)*alpha; // Row 3 of MAT_A + *(b_offset2 + 2) = *(a_offset3 + 1)*alpha; + *(b_offset3 + 2) = *(a_offset3 + 2)*alpha; + *(b_offset4 + 2) = *(a_offset3 + 3)*alpha; + + /* Column 4 of MAT_B */ + *(b_offset1 + 3) = *(a_offset4 + 0)*alpha; // Row 4 of MAT_A + *(b_offset2 + 3) = *(a_offset4 + 1)*alpha; + *(b_offset3 + 3) = *(a_offset4 + 2)*alpha; + *(b_offset4 + 3) = *(a_offset4 + 3)*alpha; + + a_offset1 += 4; + a_offset2 += 4; + a_offset3 += 4; + a_offset4 += 4; + b_offset1 += ldb * 4; + b_offset2 += ldb * 4; + b_offset3 += ldb * 4; + b_offset4 += ldb * 4; + + j--; + } while (j > 0); + } // if(j > 0) + + + if (cols & 2) { + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + *(b_offset2 + 0) = *(a_offset1 + 1)*alpha; + + *(b_offset1 + 1) = *(a_offset2 + 0)*alpha; + *(b_offset2 + 1) = *(a_offset2 + 1)*alpha; + + *(b_offset1 + 2) = *(a_offset3 + 0)*alpha; + *(b_offset2 + 2) = *(a_offset3 + 1)*alpha; + + *(b_offset1 + 3) = *(a_offset4 + 0)*alpha; + *(b_offset2 + 3) = *(a_offset4 + 1)*alpha; + + a_offset1 += 2; + a_offset2 += 2; + a_offset3 += 2; + a_offset4 += 2; + + b_offset1 += ldb*2; + + } + + if (cols & 1) { + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + + *(b_offset1 + 1) = *(a_offset2 + 0)*alpha; + + *(b_offset1 + 2) = *(a_offset3 + 0)*alpha; + + *(b_offset1 + 3) = *(a_offset4 + 0)*alpha; + } + + i--; + } while (i > 0); + } + + + if (rows & 2) { + a_offset1 = a_offset; + a_offset2 = a_offset1 + lda; + a_offset += 2 * lda; + + b_offset1 = b_offset; + b_offset2 = b_offset1 + ldb; + b_offset3 = b_offset2 + ldb; + b_offset4 = b_offset3 + ldb; + b_offset += 2; + + j = (cols >> 2); + if (j > 0){ + do { + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + *(b_offset2 + 0) = *(a_offset1 + 1)*alpha; + *(b_offset3 + 0) = *(a_offset1 + 2)*alpha; + *(b_offset4 + 0) = *(a_offset1 + 3)*alpha; + + *(b_offset1 + 1) = *(a_offset2 + 0)*alpha; + *(b_offset2 + 1) = *(a_offset2 + 1)*alpha; + *(b_offset3 + 1) = *(a_offset2 + 2)*alpha; + *(b_offset4 + 1) = *(a_offset2 + 3)*alpha; + + a_offset1 += 4; + a_offset2 += 4; + b_offset1 += ldb * 4; + b_offset2 += ldb * 4; + b_offset3 += ldb * 4; + b_offset4 += ldb * 4; + + j--; + } while (j > 0); + } + + + if (cols & 2){ + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + *(b_offset2 + 0) = *(a_offset1 + 1)*alpha; + + *(b_offset1 + 1) = *(a_offset2 + 0)*alpha; + *(b_offset2 + 1) = *(a_offset2 + 1)*alpha; + + a_offset1 += 2; + a_offset2 += 2; + b_offset1 += ldb*2; + + } + + + if (cols & 1){ + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + *(b_offset1 + 1) = *(a_offset2 + 0)*alpha; + } + } // if (rows & 2) + + + if (rows & 1) { + a_offset1 = a_offset; + a_offset += lda; + + b_offset1 = b_offset; + b_offset2 = b_offset1 + ldb; + b_offset3 = b_offset2 + ldb; + b_offset4 = b_offset3 + ldb; + + j = (cols >> 2); + if (j > 0){ + do { + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + *(b_offset2 + 0) = *(a_offset1 + 1)*alpha; + *(b_offset3 + 0) = *(a_offset1 + 2)*alpha; + *(b_offset4 + 0) = *(a_offset1 + 3)*alpha; + + a_offset1 += 4; + b_offset1 += ldb * 4; + b_offset2 += ldb * 4; + b_offset3 += ldb * 4; + b_offset4 += ldb * 4; + + j--; + } while (j > 0); + } + + if (cols & 2){ + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + *(b_offset2 + 0) = *(a_offset1 + 1)*alpha; + + a_offset1 += 2; + b_offset1 += ldb * 2; + } + + if (cols & 1){ + *(b_offset1 + 0) = *(a_offset1 + 0)*alpha; + } + } + + return 0; +} + +#endif From 6f5667b4d4f395a5ccc8458abd053a35c7744f1d Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 24 Feb 2021 09:03:41 +0100 Subject: [PATCH 097/134] Enable optimized S/D OMATCOPY_RT --- kernel/x86_64/KERNEL | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL index b92f480e9..5da79cc3f 100644 --- a/kernel/x86_64/KERNEL +++ b/kernel/x86_64/KERNEL @@ -489,3 +489,6 @@ XGEMM3MKERNEL = xgemm3m_kernel_2x2.S SSUMKERNEL = ../arm/sum.c DSUMKERNEL = ../arm/sum.c + +SOMATCOPY_RT = omatcopy_rt.c +DOMATCOPY_RT = omatcopy_rt.c From 325b398e3cefa8d04e6cb7e949d047e41e417271 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 24 Feb 2021 09:13:12 +0100 Subject: [PATCH 098/134] Update omatcopy_rt.c --- kernel/x86_64/omatcopy_rt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/x86_64/omatcopy_rt.c b/kernel/x86_64/omatcopy_rt.c index ac25ea74b..e8cef22c1 100644 --- a/kernel/x86_64/omatcopy_rt.c +++ b/kernel/x86_64/omatcopy_rt.c @@ -29,7 +29,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef HAVE_AVX -/* +r: %0 = src, %1 = dst, %2 = src_ld, %3 = dst_ld, %4 = dst_tmp */ +#define ROWS_OF_BLOCK 384 + + /* +r: %0 = src, %1 = dst, %2 = src_ld, %3 = dst_ld, %4 = dst_tmp */ /* m: %5 = num_rows, %6 = alpha */ /* xmm15 = alpha */ #define TRANS_4x4(a1_no,a2_no,a3_no,a4_no,t1_no,t2_no,t3_no,t4_no)\ From 292d1af1a04c60a24219dcb5db25de003171a97f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 24 Feb 2021 09:34:14 +0100 Subject: [PATCH 099/134] Update omatcopy_rt.c --- kernel/x86_64/omatcopy_rt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/omatcopy_rt.c b/kernel/x86_64/omatcopy_rt.c index e8cef22c1..e695f00c5 100644 --- a/kernel/x86_64/omatcopy_rt.c +++ b/kernel/x86_64/omatcopy_rt.c @@ -166,7 +166,7 @@ int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLO #else int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) - +{ BLASLONG i, j; FLOAT *a_offset, *a_offset1, *a_offset2, *a_offset3, *a_offset4; FLOAT *b_offset, *b_offset1, *b_offset2, *b_offset3, *b_offset4; From ec6b354c32e939605331e2081590815a86413ca8 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Wed, 24 Feb 2021 14:07:20 +0100 Subject: [PATCH 100/134] use /usr/bin/env perl --- c_check | 2 +- exports/gensymbol | 2 +- f_check | 2 +- interface/create | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/c_check b/c_check index 9c8b1abac..e24943a29 100644 --- a/c_check +++ b/c_check @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl #use File::Basename; # use File::Temp qw(tempfile); diff --git a/exports/gensymbol b/exports/gensymbol index 857a17a9e..e7210a030 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # Changelog # 2017/09/03 staticfloat diff --git a/f_check b/f_check index ffe9c6b46..d044f2547 100644 --- a/f_check +++ b/f_check @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); diff --git a/interface/create b/interface/create index b7be8ab6e..0b9cefa2b 100755 --- a/interface/create +++ b/interface/create @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl $count = 0; From 441c1161058feaa7119e84b86eb2d2a69929cc5c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 25 Feb 2021 13:47:34 +0100 Subject: [PATCH 101/134] fix undefined CC again --- f_check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/f_check b/f_check index d044f2547..2664e2d4a 100644 --- a/f_check +++ b/f_check @@ -330,7 +330,7 @@ if ($link ne "") { $flags =~ s/\@/\,/g; $linker_L .= "-Wl,". $flags . " " ; } - if ($flags =~ /-lgomp/ && $CC =~ /clang/) { + if ($flags =~ /-lgomp/ && $ENV("CC") =~ /clang/) { $flags = "-lomp"; } From 736f0146c33cbfa8e256ba85a28c304e1af7c620 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 26 Feb 2021 04:18:04 +0100 Subject: [PATCH 102/134] Revert "Fix undefined CC in f_check (again)" --- f_check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/f_check b/f_check index 2664e2d4a..d044f2547 100644 --- a/f_check +++ b/f_check @@ -330,7 +330,7 @@ if ($link ne "") { $flags =~ s/\@/\,/g; $linker_L .= "-Wl,". $flags . " " ; } - if ($flags =~ /-lgomp/ && $ENV("CC") =~ /clang/) { + if ($flags =~ /-lgomp/ && $CC =~ /clang/) { $flags = "-lomp"; } From 2d369bd916355e1b2c9612d962554948b6c5bb5f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 26 Feb 2021 09:09:43 +0100 Subject: [PATCH 103/134] fix undefined CC variable --- f_check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/f_check b/f_check index d044f2547..fe947bf66 100644 --- a/f_check +++ b/f_check @@ -330,7 +330,7 @@ if ($link ne "") { $flags =~ s/\@/\,/g; $linker_L .= "-Wl,". $flags . " " ; } - if ($flags =~ /-lgomp/ && $CC =~ /clang/) { + if ($flags =~ /-lgomp/ && $ENV{"CC"} =~ /clang/) { $flags = "-lomp"; } From 0571c3187b12afd7e55dfdd482743bf3134edc82 Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Fri, 26 Feb 2021 20:56:34 -0600 Subject: [PATCH 104/134] POWER10: Rename mma builtins The LLVM and GCC teams agreed to rename the __builtin_mma_assemble_pair and __builtin_mma_disassemble_pair built-ins to __builtin_vsx_assemble_pair and __builtin_vsx_disassemble_pair respectively. This patch is to make corresponding changes in dgemm kernel. Also made changes in inputs to those builtins to avoid some potential typecasting issues. Reference gcc commit id:77ef995c1fbcab76a2a69b9f4700bcfd005d8e62 --- kernel/power/dgemm_kernel_power10.c | 77 ++++++++++++++--------------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/kernel/power/dgemm_kernel_power10.c b/kernel/power/dgemm_kernel_power10.c index b531799a6..e918e61c3 100644 --- a/kernel/power/dgemm_kernel_power10.c +++ b/kernel/power/dgemm_kernel_power10.c @@ -29,7 +29,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. typedef __vector unsigned char vec_t; typedef FLOAT v4sf_t __attribute__ ((vector_size (16))); -typedef FLOAT v2sf_t __attribute__ ((vector_size (8))); +#if !__has_builtin(__builtin_vsx_assemble_pair) +#define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair +#endif + +#if !__has_builtin(__builtin_vsx_disassemble_pair) +#define __builtin_vsx_disassemble_pair __builtin_mma_disassemble_pair +#endif #ifdef TRMMKERNEL #define SAVE_ACC(ACC, J) \ @@ -186,8 +192,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, vec_t *rowA = (vec_t *) & AO[0]; vec_t *rb = (vec_t *) & BO[0]; __vector_pair rowB, rowB1; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); - __builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]); + __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]); __builtin_mma_xvf64ger (&acc2, rowB, rowA[1]); @@ -200,8 +206,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, { rowA = (vec_t *) & AO[l << 3]; rb = (vec_t *) & BO[l << 3]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); - __builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]); + __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]); __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]); @@ -242,8 +248,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, vec_t *rowA = (vec_t *) & AO[0]; __vector_pair rowB, rowB1; vec_t *rb = (vec_t *) & BO[0]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); - __builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]); + __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]); __builtin_mma_xvf64ger (&acc2, rowB, rowA[1]); @@ -252,8 +258,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, { rowA = (vec_t *) & AO[l << 2]; rb = (vec_t *) & BO[l << 3]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); - __builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]); + __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]); __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]); @@ -286,16 +292,16 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, vec_t *rowA = (vec_t *) & AO[0]; __vector_pair rowB, rowB1; vec_t *rb = (vec_t *) & BO[0]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); - __builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]); + __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]); for (l = 1; l < temp; l++) { rowA = (vec_t *) & AO[l << 1]; rb = (vec_t *) & BO[l << 3]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); - __builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]); + __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]); } @@ -398,7 +404,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, vec_t *rowA = (vec_t *) & AO[0]; __vector_pair rowB; vec_t *rb = (vec_t *) & BO[0]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB, rowA[1]); __builtin_mma_xvf64ger (&acc2, rowB, rowA[2]); @@ -407,7 +413,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, { rowA = (vec_t *) & AO[l << 3]; rb = (vec_t *) & BO[l << 2]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]); @@ -440,14 +446,14 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, vec_t *rowA = (vec_t *) & AO[0]; __vector_pair rowB; vec_t *rb = (vec_t *) & BO[0]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB, rowA[1]); for (l = 1; l < temp; l++) { rowA = (vec_t *) & AO[l << 2]; rb = (vec_t *) & BO[l << 2]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); } @@ -476,13 +482,13 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, vec_t *rowA = (vec_t *) & AO[0]; __vector_pair rowB; vec_t *rb = (vec_t *) & BO[0]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); for (l = 1; l < temp; l++) { rowA = (vec_t *) & AO[l << 1]; rb = (vec_t *) & BO[l << 2]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); } SAVE_ACC (&acc0, 0); @@ -562,11 +568,9 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, v4sf_t result[4]; __vector_quad acc0, acc1, acc2, acc3; BLASLONG l = 0; - FLOAT t[4] = { 0, 0, 0, 0 }; - t[0] = BO[0], t[1] = BO[1]; __vector_pair rowB; - vec_t *rb = (vec_t *) & t[0]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + vec_t *rb = (vec_t *) & BO[0]; + __builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]); vec_t *rowA = (vec_t *) & AO[0]; __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB, rowA[1]); @@ -574,9 +578,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, __builtin_mma_xvf64ger (&acc3, rowB, rowA[3]); for (l = 1; l < temp; l++) { - t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1]; - rb = (vec_t *) & t[0]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + rb = (vec_t *) & BO[l << 1]; + __builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]); rowA = (vec_t *) & AO[l << 3]; __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); @@ -607,19 +610,16 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, v4sf_t result[4]; __vector_quad acc0, acc1; BLASLONG l = 0; - FLOAT t[4] = { 0, 0, 0, 0 }; - t[0] = BO[0], t[1] = BO[1]; __vector_pair rowB; - vec_t *rb = (vec_t *) & t[0]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + vec_t *rb = (vec_t *) & BO[0]; + __builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]); vec_t *rowA = (vec_t *) & AO[0]; __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB, rowA[1]); for (l = 1; l < temp; l++) { - t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1]; - rb = (vec_t *) & t[0]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + rb = (vec_t *) & BO[l << 1]; + __builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]); rowA = (vec_t *) & AO[l << 2]; __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); @@ -646,18 +646,15 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, v4sf_t result[4]; __vector_quad acc0; BLASLONG l = 0; - FLOAT t[4] = { 0, 0, 0, 0 }; - t[0] = BO[0], t[1] = BO[1]; __vector_pair rowB; - vec_t *rb = (vec_t *) & t[0]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + vec_t *rb = (vec_t *) & BO[0]; + __builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]); vec_t *rowA = (vec_t *) & AO[0]; __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); for (l = 1; l < temp; l++) { - t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1]; - rb = (vec_t *) & t[0]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + rb = (vec_t *) & BO[l << 1]; + __builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]); rowA = (vec_t *) & AO[l << 1]; __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); } From 0197519dd71eb894e8ce02b78383242032b8c207 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 28 Feb 2021 18:46:08 +0100 Subject: [PATCH 105/134] Delete cchkee.f --- lapack-netlib/TESTING/EIG/cchkee.f | 2507 ---------------------------- 1 file changed, 2507 deletions(-) delete mode 100644 lapack-netlib/TESTING/EIG/cchkee.f diff --git a/lapack-netlib/TESTING/EIG/cchkee.f b/lapack-netlib/TESTING/EIG/cchkee.f deleted file mode 100644 index f2a5f8d41..000000000 --- a/lapack-netlib/TESTING/EIG/cchkee.f +++ /dev/null @@ -1,2507 +0,0 @@ -*> \brief \b CCHKEE -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* PROGRAM CCHKEE -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> CCHKEE tests the COMPLEX LAPACK subroutines for the matrix -*> eigenvalue problem. The test paths in this version are -*> -*> NEP (Nonsymmetric Eigenvalue Problem): -*> Test CGEHRD, CUNGHR, CHSEQR, CTREVC, CHSEIN, and CUNMHR -*> -*> SEP (Hermitian Eigenvalue Problem): -*> Test CHETRD, CUNGTR, CSTEQR, CSTERF, CSTEIN, CSTEDC, -*> and drivers CHEEV(X), CHBEV(X), CHPEV(X), -*> CHEEVD, CHBEVD, CHPEVD -*> -*> SVD (Singular Value Decomposition): -*> Test CGEBRD, CUNGBR, and CBDSQR -*> and the drivers CGESVD, CGESDD -*> -*> CEV (Nonsymmetric Eigenvalue/eigenvector Driver): -*> Test CGEEV -*> -*> CES (Nonsymmetric Schur form Driver): -*> Test CGEES -*> -*> CVX (Nonsymmetric Eigenvalue/eigenvector Expert Driver): -*> Test CGEEVX -*> -*> CSX (Nonsymmetric Schur form Expert Driver): -*> Test CGEESX -*> -*> CGG (Generalized Nonsymmetric Eigenvalue Problem): -*> Test CGGHD3, CGGBAL, CGGBAK, CHGEQZ, and CTGEVC -*> -*> CGS (Generalized Nonsymmetric Schur form Driver): -*> Test CGGES -*> -*> CGV (Generalized Nonsymmetric Eigenvalue/eigenvector Driver): -*> Test CGGEV -*> -*> CGX (Generalized Nonsymmetric Schur form Expert Driver): -*> Test CGGESX -*> -*> CXV (Generalized Nonsymmetric Eigenvalue/eigenvector Expert Driver): -*> Test CGGEVX -*> -*> CSG (Hermitian Generalized Eigenvalue Problem): -*> Test CHEGST, CHEGV, CHEGVD, CHEGVX, CHPGST, CHPGV, CHPGVD, -*> CHPGVX, CHBGST, CHBGV, CHBGVD, and CHBGVX -*> -*> CHB (Hermitian Band Eigenvalue Problem): -*> Test CHBTRD -*> -*> CBB (Band Singular Value Decomposition): -*> Test CGBBRD -*> -*> CEC (Eigencondition estimation): -*> Test CTRSYL, CTREXC, CTRSNA, and CTRSEN -*> -*> CBL (Balancing a general matrix) -*> Test CGEBAL -*> -*> CBK (Back transformation on a balanced matrix) -*> Test CGEBAK -*> -*> CGL (Balancing a matrix pair) -*> Test CGGBAL -*> -*> CGK (Back transformation on a matrix pair) -*> Test CGGBAK -*> -*> GLM (Generalized Linear Regression Model): -*> Tests CGGGLM -*> -*> GQR (Generalized QR and RQ factorizations): -*> Tests CGGQRF and CGGRQF -*> -*> GSV (Generalized Singular Value Decomposition): -*> Tests CGGSVD, CGGSVP, CTGSJA, CLAGS2, CLAPLL, and CLAPMT -*> -*> CSD (CS decomposition): -*> Tests CUNCSD -*> -*> LSE (Constrained Linear Least Squares): -*> Tests CGGLSE -*> -*> Each test path has a different set of inputs, but the data sets for -*> the driver routines xEV, xES, xVX, and xSX can be concatenated in a -*> single input file. The first line of input should contain one of the -*> 3-character path names in columns 1-3. The number of remaining lines -*> depends on what is found on the first line. -*> -*> The number of matrix types used in testing is often controllable from -*> the input file. The number of matrix types for each path, and the -*> test routine that describes them, is as follows: -*> -*> Path name(s) Types Test routine -*> -*> CHS or NEP 21 CCHKHS -*> CST or SEP 21 CCHKST (routines) -*> 18 CDRVST (drivers) -*> CBD or SVD 16 CCHKBD (routines) -*> 5 CDRVBD (drivers) -*> CEV 21 CDRVEV -*> CES 21 CDRVES -*> CVX 21 CDRVVX -*> CSX 21 CDRVSX -*> CGG 26 CCHKGG (routines) -*> CGS 26 CDRGES -*> CGX 5 CDRGSX -*> CGV 26 CDRGEV -*> CXV 2 CDRGVX -*> CSG 21 CDRVSG -*> CHB 15 CCHKHB -*> CBB 15 CCHKBB -*> CEC - CCHKEC -*> CBL - CCHKBL -*> CBK - CCHKBK -*> CGL - CCHKGL -*> CGK - CCHKGK -*> GLM 8 CCKGLM -*> GQR 8 CCKGQR -*> GSV 8 CCKGSV -*> CSD 3 CCKCSD -*> LSE 8 CCKLSE -*> -*>----------------------------------------------------------------------- -*> -*> NEP input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NPARMS, INTEGER -*> Number of values of the parameters NB, NBMIN, NX, NS, and -*> MAXB. -*> -*> line 5: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 6: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for the minimum blocksize NBMIN. -*> -*> line 7: NXVAL, INTEGER array, dimension (NPARMS) -*> The values for the crossover point NX. -*> -*> line 8: INMIN, INTEGER array, dimension (NPARMS) -*> LAHQR vs TTQRE crossover point, >= 11 -*> -*> line 9: INWIN, INTEGER array, dimension (NPARMS) -*> recommended deflation window size -*> -*> line 10: INIBL, INTEGER array, dimension (NPARMS) -*> nibble crossover point -*> -*> line 11: ISHFTS, INTEGER array, dimension (NPARMS) -*> number of simultaneous shifts) -*> -*> line 12: IACC22, INTEGER array, dimension (NPARMS) -*> select structured matrix multiply: 0, 1 or 2) -*> -*> line 13: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. To have all of the test -*> ratios printed, use THRESH = 0.0 . -*> -*> line 14: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 14 was 2: -*> -*> line 15: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 15-EOF: The remaining lines occur in sets of 1 or 2 and allow -*> the user to specify the matrix types. Each line contains -*> a 3-character path name in columns 1-3, and the number -*> of matrix types must be the first nonblank item in columns -*> 4-80. If the number of matrix types is at least 1 but is -*> less than the maximum number of possible types, a second -*> line will be read to get the numbers of the matrix types to -*> be used. For example, -*> NEP 21 -*> requests all of the matrix types for the nonsymmetric -*> eigenvalue problem, while -*> NEP 4 -*> 9 10 11 12 -*> requests only matrices of type 9, 10, 11, and 12. -*> -*> The valid 3-character path names are 'NEP' or 'CHS' for the -*> nonsymmetric eigenvalue routines. -*> -*>----------------------------------------------------------------------- -*> -*> SEP or CSG input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NPARMS, INTEGER -*> Number of values of the parameters NB, NBMIN, and NX. -*> -*> line 5: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 6: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for the minimum blocksize NBMIN. -*> -*> line 7: NXVAL, INTEGER array, dimension (NPARMS) -*> The values for the crossover point NX. -*> -*> line 8: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 9: TSTCHK, LOGICAL -*> Flag indicating whether or not to test the LAPACK routines. -*> -*> line 10: TSTDRV, LOGICAL -*> Flag indicating whether or not to test the driver routines. -*> -*> line 11: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 12: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 12 was 2: -*> -*> line 13: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 13-EOF: Lines specifying matrix types, as for NEP. -*> The valid 3-character path names are 'SEP' or 'CST' for the -*> Hermitian eigenvalue routines and driver routines, and -*> 'CSG' for the routines for the Hermitian generalized -*> eigenvalue problem. -*> -*>----------------------------------------------------------------------- -*> -*> SVD input file: -*> -*> line 2: NN, INTEGER -*> Number of values of M and N. -*> -*> line 3: MVAL, INTEGER array, dimension (NN) -*> The values for the matrix row dimension M. -*> -*> line 4: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix column dimension N. -*> -*> line 5: NPARMS, INTEGER -*> Number of values of the parameter NB, NBMIN, NX, and NRHS. -*> -*> line 6: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 7: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for the minimum blocksize NBMIN. -*> -*> line 8: NXVAL, INTEGER array, dimension (NPARMS) -*> The values for the crossover point NX. -*> -*> line 9: NSVAL, INTEGER array, dimension (NPARMS) -*> The values for the number of right hand sides NRHS. -*> -*> line 10: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 11: TSTCHK, LOGICAL -*> Flag indicating whether or not to test the LAPACK routines. -*> -*> line 12: TSTDRV, LOGICAL -*> Flag indicating whether or not to test the driver routines. -*> -*> line 13: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 14: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 14 was 2: -*> -*> line 15: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 15-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path names are 'SVD' or 'CBD' for both the -*> SVD routines and the SVD driver routines. -*> -*>----------------------------------------------------------------------- -*> -*> CEV and CES data files: -*> -*> line 1: 'CEV' or 'CES' in columns 1 to 3. -*> -*> line 2: NSIZES, INTEGER -*> Number of sizes of matrices to use. Should be at least 0 -*> and at most 20. If NSIZES = 0, no testing is done -*> (although the remaining 3 lines are still read). -*> -*> line 3: NN, INTEGER array, dimension(NSIZES) -*> Dimensions of matrices to be tested. -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHSEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 5: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> If it is 0., all test case data will be printed. -*> -*> line 6: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 6 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 8 and following: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'CEV' to test CGEEV, or -*> 'CES' to test CGEES. -*> -*>----------------------------------------------------------------------- -*> -*> The CVX data has two parts. The first part is identical to CEV, -*> and the second part consists of test matrices with precomputed -*> solutions. -*> -*> line 1: 'CVX' in columns 1-3. -*> -*> line 2: NSIZES, INTEGER -*> If NSIZES = 0, no testing of randomly generated examples -*> is done, but any precomputed examples are tested. -*> -*> line 3: NN, INTEGER array, dimension(NSIZES) -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> -*> line 5: THRESH, REAL -*> -*> line 6: NEWSD, INTEGER -*> -*> If line 6 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> -*> lines 8 and following: The first line contains 'CVX' in columns 1-3 -*> followed by the number of matrix types, possibly with -*> a second line to specify certain matrix types. -*> If the number of matrix types = 0, no testing of randomly -*> generated examples is done, but any precomputed examples -*> are tested. -*> -*> remaining lines : Each matrix is stored on 1+N+N**2 lines, where N is -*> its dimension. The first line contains the dimension N and -*> ISRT (two integers). ISRT indicates whether the last N lines -*> are sorted by increasing real part of the eigenvalue -*> (ISRT=0) or by increasing imaginary part (ISRT=1). The next -*> N**2 lines contain the matrix rowwise, one entry per line. -*> The last N lines correspond to each eigenvalue. Each of -*> these last N lines contains 4 real values: the real part of -*> the eigenvalues, the imaginary part of the eigenvalue, the -*> reciprocal condition number of the eigenvalues, and the -*> reciprocal condition number of the vector eigenvector. The -*> end of data is indicated by dimension N=0. Even if no data -*> is to be tested, there must be at least one line containing -*> N=0. -*> -*>----------------------------------------------------------------------- -*> -*> The CSX data is like CVX. The first part is identical to CEV, and the -*> second part consists of test matrices with precomputed solutions. -*> -*> line 1: 'CSX' in columns 1-3. -*> -*> line 2: NSIZES, INTEGER -*> If NSIZES = 0, no testing of randomly generated examples -*> is done, but any precomputed examples are tested. -*> -*> line 3: NN, INTEGER array, dimension(NSIZES) -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> -*> line 5: THRESH, REAL -*> -*> line 6: NEWSD, INTEGER -*> -*> If line 6 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> -*> lines 8 and following: The first line contains 'CSX' in columns 1-3 -*> followed by the number of matrix types, possibly with -*> a second line to specify certain matrix types. -*> If the number of matrix types = 0, no testing of randomly -*> generated examples is done, but any precomputed examples -*> are tested. -*> -*> remaining lines : Each matrix is stored on 3+N**2 lines, where N is -*> its dimension. The first line contains the dimension N, the -*> dimension M of an invariant subspace, and ISRT. The second -*> line contains M integers, identifying the eigenvalues in the -*> invariant subspace (by their position in a list of -*> eigenvalues ordered by increasing real part (if ISRT=0) or -*> by increasing imaginary part (if ISRT=1)). The next N**2 -*> lines contain the matrix rowwise. The last line contains the -*> reciprocal condition number for the average of the selected -*> eigenvalues, and the reciprocal condition number for the -*> corresponding right invariant subspace. The end of data in -*> indicated by a line containing N=0, M=0, and ISRT = 0. Even -*> if no data is to be tested, there must be at least one line -*> containing N=0, M=0 and ISRT=0. -*> -*>----------------------------------------------------------------------- -*> -*> CGG input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NPARMS, INTEGER -*> Number of values of the parameters NB, NBMIN, NBCOL, NS, and -*> MAXB. -*> -*> line 5: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 6: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for NBMIN, the minimum row dimension for blocks. -*> -*> line 7: NSVAL, INTEGER array, dimension (NPARMS) -*> The values for the number of shifts. -*> -*> line 8: MXBVAL, INTEGER array, dimension (NPARMS) -*> The values for MAXB, used in determining minimum blocksize. -*> -*> line 9: IACC22, INTEGER array, dimension (NPARMS) -*> select structured matrix multiply: 1 or 2) -*> -*> line 10: NBCOL, INTEGER array, dimension (NPARMS) -*> The values for NBCOL, the minimum column dimension for -*> blocks. -*> -*> line 11: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 12: TSTCHK, LOGICAL -*> Flag indicating whether or not to test the LAPACK routines. -*> -*> line 13: TSTDRV, LOGICAL -*> Flag indicating whether or not to test the driver routines. -*> -*> line 14: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 15: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 15 was 2: -*> -*> line 16: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 17-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'CGG' for the generalized -*> eigenvalue problem routines and driver routines. -*> -*>----------------------------------------------------------------------- -*> -*> CGS and CGV input files: -*> -*> line 1: 'CGS' or 'CGV' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension(NN) -*> Dimensions of matrices to be tested. -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHGEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 5: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> If it is 0., all test case data will be printed. -*> -*> line 6: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits. -*> -*> line 7: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 17 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 7-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'CGS' for the generalized -*> eigenvalue problem routines and driver routines. -*> -*>----------------------------------------------------------------------- -*> -*> CGX input file: -*> line 1: 'CGX' in columns 1 to 3. -*> -*> line 2: N, INTEGER -*> Value of N. -*> -*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHGEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 4: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> Information will be printed about each test for which the -*> test ratio is greater than or equal to the threshold. -*> -*> line 5: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 6: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 6 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> If line 2 was 0: -*> -*> line 7-EOF: Precomputed examples are tested. -*> -*> remaining lines : Each example is stored on 3+2*N*N lines, where N is -*> its dimension. The first line contains the dimension (a -*> single integer). The next line contains an integer k such -*> that only the last k eigenvalues will be selected and appear -*> in the leading diagonal blocks of $A$ and $B$. The next N*N -*> lines contain the matrix A, one element per line. The next N*N -*> lines contain the matrix B. The last line contains the -*> reciprocal of the eigenvalue cluster condition number and the -*> reciprocal of the deflating subspace (associated with the -*> selected eigencluster) condition number. The end of data is -*> indicated by dimension N=0. Even if no data is to be tested, -*> there must be at least one line containing N=0. -*> -*>----------------------------------------------------------------------- -*> -*> CXV input files: -*> line 1: 'CXV' in columns 1 to 3. -*> -*> line 2: N, INTEGER -*> Value of N. -*> -*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHGEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 4: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> Information will be printed about each test for which the -*> test ratio is greater than or equal to the threshold. -*> -*> line 5: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 6: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 6 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> If line 2 was 0: -*> -*> line 7-EOF: Precomputed examples are tested. -*> -*> remaining lines : Each example is stored on 3+2*N*N lines, where N is -*> its dimension. The first line contains the dimension (a -*> single integer). The next N*N lines contain the matrix A, one -*> element per line. The next N*N lines contain the matrix B. -*> The next line contains the reciprocals of the eigenvalue -*> condition numbers. The last line contains the reciprocals of -*> the eigenvector condition numbers. The end of data is -*> indicated by dimension N=0. Even if no data is to be tested, -*> there must be at least one line containing N=0. -*> -*>----------------------------------------------------------------------- -*> -*> CHB input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NK, INTEGER -*> Number of values of K. -*> -*> line 5: KVAL, INTEGER array, dimension (NK) -*> The values for the matrix dimension K. -*> -*> line 6: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 7 was 2: -*> -*> line 8: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 8-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'CHB'. -*> -*>----------------------------------------------------------------------- -*> -*> CBB input file: -*> -*> line 2: NN, INTEGER -*> Number of values of M and N. -*> -*> line 3: MVAL, INTEGER array, dimension (NN) -*> The values for the matrix row dimension M. -*> -*> line 4: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix column dimension N. -*> -*> line 4: NK, INTEGER -*> Number of values of K. -*> -*> line 5: KVAL, INTEGER array, dimension (NK) -*> The values for the matrix bandwidth K. -*> -*> line 6: NPARMS, INTEGER -*> Number of values of the parameter NRHS -*> -*> line 7: NSVAL, INTEGER array, dimension (NPARMS) -*> The values for the number of right hand sides NRHS. -*> -*> line 8: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 9: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 9 was 2: -*> -*> line 10: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 10-EOF: Lines specifying matrix types, as for SVD. -*> The 3-character path name is 'CBB'. -*> -*>----------------------------------------------------------------------- -*> -*> CEC input file: -*> -*> line 2: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> lines 3-EOF: -*> -*> Input for testing the eigencondition routines consists of a set of -*> specially constructed test cases and their solutions. The data -*> format is not intended to be modified by the user. -*> -*>----------------------------------------------------------------------- -*> -*> CBL and CBK input files: -*> -*> line 1: 'CBL' in columns 1-3 to test CGEBAL, or 'CBK' in -*> columns 1-3 to test CGEBAK. -*> -*> The remaining lines consist of specially constructed test cases. -*> -*>----------------------------------------------------------------------- -*> -*> CGL and CGK input files: -*> -*> line 1: 'CGL' in columns 1-3 to test CGGBAL, or 'CGK' in -*> columns 1-3 to test CGGBAK. -*> -*> The remaining lines consist of specially constructed test cases. -*> -*>----------------------------------------------------------------------- -*> -*> GLM data file: -*> -*> line 1: 'GLM' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M (row dimension). -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P (row dimension). -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N (column dimension), note M <= N <= M+P. -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GLM' for the generalized -*> linear regression model routines. -*> -*>----------------------------------------------------------------------- -*> -*> GQR data file: -*> -*> line 1: 'GQR' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M. -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P. -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N. -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GQR' for the generalized -*> QR and RQ routines. -*> -*>----------------------------------------------------------------------- -*> -*> GSV data file: -*> -*> line 1: 'GSV' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M (row dimension). -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P (row dimension). -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N (column dimension). -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GSV' for the generalized -*> SVD routines. -*> -*>----------------------------------------------------------------------- -*> -*> CSD data file: -*> -*> line 1: 'CSD' in columns 1 to 3. -*> -*> line 2: NM, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NM) -*> Values of M (row and column dimension of orthogonal matrix). -*> -*> line 4: PVAL, INTEGER array, dimension(NM) -*> Values of P (row dimension of top-left block). -*> -*> line 5: NVAL, INTEGER array, dimension(NM) -*> Values of N (column dimension of top-left block). -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'CSD' for the CSD routine. -*> -*>----------------------------------------------------------------------- -*> -*> LSE data file: -*> -*> line 1: 'LSE' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M. -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P. -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N, note P <= N <= P+M. -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GSV' for the generalized -*> SVD routines. -*> -*>----------------------------------------------------------------------- -*> -*> NMAX is currently set to 132 and must be at least 12 for some of the -*> precomputed examples, and LWORK = NMAX*(5*NMAX+20) in the parameter -*> statements below. For SVD, we assume NRHS may be as big as N. The -*> parameter NEED is set to 14 to allow for 14 N-by-N matrices for CGG. -*> \endverbatim -* -* Arguments: -* ========== -* -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date June 2016 -* -*> \ingroup complex_eig -* -* ===================================================================== - PROGRAM CCHKEE -* -* -- LAPACK test routine (version 3.7.0) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* June 2016 -* -* ===================================================================== -* -* .. Parameters .. - INTEGER NMAX - PARAMETER ( NMAX = 132 ) - INTEGER NCMAX - PARAMETER ( NCMAX = 20 ) - INTEGER NEED - PARAMETER ( NEED = 14 ) - INTEGER LWORK - PARAMETER ( LWORK = NMAX*( 5*NMAX+20 ) ) - INTEGER LIWORK - PARAMETER ( LIWORK = NMAX*( NMAX+20 ) ) - INTEGER MAXIN - PARAMETER ( MAXIN = 20 ) - INTEGER MAXT - PARAMETER ( MAXT = 30 ) - INTEGER NIN, NOUT - PARAMETER ( NIN = 5, NOUT = 6 ) -* .. -* .. Local Scalars .. - LOGICAL CBB, CBK, CBL, CES, CEV, CGG, CGK, CGL, CGS, - $ CGV, CGX, CHB, CSD, CSX, CVX, CXV, FATAL, GLM, - $ GQR, GSV, LSE, NEP, SEP, SVD, TSTCHK, TSTDIF, - $ TSTDRV, TSTERR - CHARACTER C1 - CHARACTER*3 C3, PATH - CHARACTER*32 VNAME - CHARACTER*10 INTSTR - CHARACTER*80 LINE - INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD, - $ NK, NN, NPARMS, NRHS, NTYPES, - $ VERS_MAJOR, VERS_MINOR, VERS_PATCH - REAL EPS, S1, S2, THRESH, THRSHN -* .. -* .. Local Arrays .. - LOGICAL DOTYPE( MAXT ), LOGWRK( NMAX ) - INTEGER IOLDSD( 4 ), ISEED( 4 ), IWORK( LIWORK ), - $ KVAL( MAXIN ), MVAL( MAXIN ), MXBVAL( MAXIN ), - $ NBCOL( MAXIN ), NBMIN( MAXIN ), NBVAL( MAXIN ), - $ NSVAL( MAXIN ), NVAL( MAXIN ), NXVAL( MAXIN ), - $ PVAL( MAXIN ) - INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ), - $ ISHFTS( MAXIN ), IACC22( MAXIN ) - REAL ALPHA( NMAX ), BETA( NMAX ), DR( NMAX, 12 ), - $ RESULT( 500 ), RWORK( LWORK ), S( NMAX*NMAX ) - COMPLEX A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ), - $ C( NCMAX*NCMAX, NCMAX*NCMAX ), DC( NMAX, 6 ), - $ TAUA( NMAX ), TAUB( NMAX ), WORK( LWORK ), - $ X( 5*NMAX ) -* .. -* .. External Functions .. - LOGICAL LSAMEN - REAL SECOND, SLAMCH - EXTERNAL LSAMEN, SECOND, SLAMCH -* .. -* .. External Subroutines .. - EXTERNAL ALAREQ, CCHKBB, CCHKBD, CCHKBK, CCHKBL, CCHKEC, - $ CCHKGG, CCHKGK, CCHKGL, CCHKHB, CCHKHS, CCHKST, - $ CCKCSD, CCKGLM, CCKGQR, CCKGSV, CCKLSE, CDRGES, - $ CDRGEV, CDRGSX, CDRGVX, CDRVBD, CDRVES, CDRVEV, - $ CDRVSG, CDRVST, CDRVSX, CDRVVX, CERRBD, - $ CERRED, CERRGG, CERRHS, CERRST, ILAVER, XLAENV, - $ CDRGES3, CDRGEV3, - $ CCHKST2STG, CDRVST2STG, CCHKHB2STG -* .. -* .. Intrinsic Functions .. - INTRINSIC LEN, MIN -* .. -* .. Scalars in Common .. - LOGICAL LERR, OK - CHARACTER*32 SRNAMT - INTEGER INFOT, MAXB, NPROC, NSHIFT, NUNIT, SELDIM, - $ SELOPT -* .. -* .. Arrays in Common .. - LOGICAL SELVAL( 20 ) - INTEGER IPARMS( 100 ) - REAL SELWI( 20 ), SELWR( 20 ) -* .. -* .. Common blocks .. - COMMON / CENVIR / NPROC, NSHIFT, MAXB - COMMON / CLAENV / IPARMS - COMMON / INFOC / INFOT, NUNIT, OK, LERR - COMMON / SRNAMC / SRNAMT - COMMON / SSLCT / SELOPT, SELDIM, SELVAL, SELWR, SELWI -* .. -* .. Data statements .. - DATA INTSTR / '0123456789' / - DATA IOLDSD / 0, 0, 0, 1 / -* .. -* .. Executable Statements .. -* - A = 0.0 - B = 0.0 - C = 0.0 - DC = 0.0 - S1 = SECOND( ) - FATAL = .FALSE. - NUNIT = NOUT -* -* Return to here to read multiple sets of data -* - 10 CONTINUE -* -* Read the first line and set the 3-character test path -* - READ( NIN, FMT = '(A80)', END = 380 )LINE - PATH = LINE( 1: 3 ) - NEP = LSAMEN( 3, PATH, 'NEP' ) .OR. LSAMEN( 3, PATH, 'CHS' ) - SEP = LSAMEN( 3, PATH, 'SEP' ) .OR. LSAMEN( 3, PATH, 'CST' ) .OR. - $ LSAMEN( 3, PATH, 'CSG' ) .OR. LSAMEN( 3, PATH, 'SE2' ) - SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'CBD' ) - CEV = LSAMEN( 3, PATH, 'CEV' ) - CES = LSAMEN( 3, PATH, 'CES' ) - CVX = LSAMEN( 3, PATH, 'CVX' ) - CSX = LSAMEN( 3, PATH, 'CSX' ) - CGG = LSAMEN( 3, PATH, 'CGG' ) - CGS = LSAMEN( 3, PATH, 'CGS' ) - CGX = LSAMEN( 3, PATH, 'CGX' ) - CGV = LSAMEN( 3, PATH, 'CGV' ) - CXV = LSAMEN( 3, PATH, 'CXV' ) - CHB = LSAMEN( 3, PATH, 'CHB' ) - CBB = LSAMEN( 3, PATH, 'CBB' ) - GLM = LSAMEN( 3, PATH, 'GLM' ) - GQR = LSAMEN( 3, PATH, 'GQR' ) .OR. LSAMEN( 3, PATH, 'GRQ' ) - GSV = LSAMEN( 3, PATH, 'GSV' ) - CSD = LSAMEN( 3, PATH, 'CSD' ) - LSE = LSAMEN( 3, PATH, 'LSE' ) - CBL = LSAMEN( 3, PATH, 'CBL' ) - CBK = LSAMEN( 3, PATH, 'CBK' ) - CGL = LSAMEN( 3, PATH, 'CGL' ) - CGK = LSAMEN( 3, PATH, 'CGK' ) -* -* Report values of parameters. -* - IF( PATH.EQ.' ' ) THEN - GO TO 10 - ELSE IF( NEP ) THEN - WRITE( NOUT, FMT = 9987 ) - ELSE IF( SEP ) THEN - WRITE( NOUT, FMT = 9986 ) - ELSE IF( SVD ) THEN - WRITE( NOUT, FMT = 9985 ) - ELSE IF( CEV ) THEN - WRITE( NOUT, FMT = 9979 ) - ELSE IF( CES ) THEN - WRITE( NOUT, FMT = 9978 ) - ELSE IF( CVX ) THEN - WRITE( NOUT, FMT = 9977 ) - ELSE IF( CSX ) THEN - WRITE( NOUT, FMT = 9976 ) - ELSE IF( CGG ) THEN - WRITE( NOUT, FMT = 9975 ) - ELSE IF( CGS ) THEN - WRITE( NOUT, FMT = 9964 ) - ELSE IF( CGX ) THEN - WRITE( NOUT, FMT = 9965 ) - ELSE IF( CGV ) THEN - WRITE( NOUT, FMT = 9963 ) - ELSE IF( CXV ) THEN - WRITE( NOUT, FMT = 9962 ) - ELSE IF( CHB ) THEN - WRITE( NOUT, FMT = 9974 ) - ELSE IF( CBB ) THEN - WRITE( NOUT, FMT = 9967 ) - ELSE IF( GLM ) THEN - WRITE( NOUT, FMT = 9971 ) - ELSE IF( GQR ) THEN - WRITE( NOUT, FMT = 9970 ) - ELSE IF( GSV ) THEN - WRITE( NOUT, FMT = 9969 ) - ELSE IF( CSD ) THEN - WRITE( NOUT, FMT = 9960 ) - ELSE IF( LSE ) THEN - WRITE( NOUT, FMT = 9968 ) - ELSE IF( CBL ) THEN -* -* CGEBAL: Balancing -* - CALL CCHKBL( NIN, NOUT ) - GO TO 380 - ELSE IF( CBK ) THEN -* -* CGEBAK: Back transformation -* - CALL CCHKBK( NIN, NOUT ) - GO TO 380 - ELSE IF( CGL ) THEN -* -* CGGBAL: Balancing -* - CALL CCHKGL( NIN, NOUT ) - GO TO 380 - ELSE IF( CGK ) THEN -* -* CGGBAK: Back transformation -* - CALL CCHKGK( NIN, NOUT ) - GO TO 380 - ELSE IF( LSAMEN( 3, PATH, 'CEC' ) ) THEN -* -* CEC: Eigencondition estimation -* - READ( NIN, FMT = * )THRESH - CALL XLAENV( 1, 1 ) - CALL XLAENV( 12, 1 ) - TSTERR = .TRUE. - CALL CCHKEC( THRESH, TSTERR, NIN, NOUT ) - GO TO 380 - ELSE - WRITE( NOUT, FMT = 9992 )PATH - GO TO 380 - END IF - CALL ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) - WRITE( NOUT, FMT = 9972 ) VERS_MAJOR, VERS_MINOR, VERS_PATCH - WRITE( NOUT, FMT = 9984 ) -* -* Read the number of values of M, P, and N. -* - READ( NIN, FMT = * )NN - IF( NN.LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NN ', NN, 1 - NN = 0 - FATAL = .TRUE. - ELSE IF( NN.GT.MAXIN ) THEN - WRITE( NOUT, FMT = 9988 )' NN ', NN, MAXIN - NN = 0 - FATAL = .TRUE. - END IF -* -* Read the values of M -* - IF( .NOT.( CGX .OR. CXV ) ) THEN - READ( NIN, FMT = * )( MVAL( I ), I = 1, NN ) - IF( SVD ) THEN - VNAME = ' M ' - ELSE - VNAME = ' N ' - END IF - DO 20 I = 1, NN - IF( MVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )VNAME, MVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( MVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )VNAME, MVAL( I ), NMAX - FATAL = .TRUE. - END IF - 20 CONTINUE - WRITE( NOUT, FMT = 9983 )'M: ', ( MVAL( I ), I = 1, NN ) - END IF -* -* Read the values of P -* - IF( GLM .OR. GQR .OR. GSV .OR. CSD .OR. LSE ) THEN - READ( NIN, FMT = * )( PVAL( I ), I = 1, NN ) - DO 30 I = 1, NN - IF( PVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' P ', PVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( PVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' P ', PVAL( I ), NMAX - FATAL = .TRUE. - END IF - 30 CONTINUE - WRITE( NOUT, FMT = 9983 )'P: ', ( PVAL( I ), I = 1, NN ) - END IF -* -* Read the values of N -* - IF( SVD .OR. CBB .OR. GLM .OR. GQR .OR. GSV .OR. CSD .OR. - $ LSE ) THEN - READ( NIN, FMT = * )( NVAL( I ), I = 1, NN ) - DO 40 I = 1, NN - IF( NVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' N ', NVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' N ', NVAL( I ), NMAX - FATAL = .TRUE. - END IF - 40 CONTINUE - ELSE - DO 50 I = 1, NN - NVAL( I ) = MVAL( I ) - 50 CONTINUE - END IF - IF( .NOT.( CGX .OR. CXV ) ) THEN - WRITE( NOUT, FMT = 9983 )'N: ', ( NVAL( I ), I = 1, NN ) - ELSE - WRITE( NOUT, FMT = 9983 )'N: ', NN - END IF -* -* Read the number of values of K, followed by the values of K -* - IF( CHB .OR. CBB ) THEN - READ( NIN, FMT = * )NK - READ( NIN, FMT = * )( KVAL( I ), I = 1, NK ) - DO 60 I = 1, NK - IF( KVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' K ', KVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( KVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' K ', KVAL( I ), NMAX - FATAL = .TRUE. - END IF - 60 CONTINUE - WRITE( NOUT, FMT = 9983 )'K: ', ( KVAL( I ), I = 1, NK ) - END IF -* - IF( CEV .OR. CES .OR. CVX .OR. CSX ) THEN -* -* For the nonsymmetric QR driver routines, only one set of -* parameters is allowed. -* - READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), - $ INMIN( 1 ), INWIN( 1 ), INIBL(1), ISHFTS(1), IACC22(1) - IF( NBVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NBMIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NXVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( INMIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( INWIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( INIBL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( ISHFTS( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( 1 ), 1 - FATAL = .TRUE. - ELSE IF( IACC22( 1 ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( 1 ), 0 - FATAL = .TRUE. - END IF - CALL XLAENV( 1, NBVAL( 1 ) ) - CALL XLAENV( 2, NBMIN( 1 ) ) - CALL XLAENV( 3, NXVAL( 1 ) ) - CALL XLAENV(12, MAX( 11, INMIN( 1 ) ) ) - CALL XLAENV(13, INWIN( 1 ) ) - CALL XLAENV(14, INIBL( 1 ) ) - CALL XLAENV(15, ISHFTS( 1 ) ) - CALL XLAENV(16, IACC22( 1 ) ) - WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) - WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'INMIN: ', INMIN( 1 ) - WRITE( NOUT, FMT = 9983 )'INWIN: ', INWIN( 1 ) - WRITE( NOUT, FMT = 9983 )'INIBL: ', INIBL( 1 ) - WRITE( NOUT, FMT = 9983 )'ISHFTS: ', ISHFTS( 1 ) - WRITE( NOUT, FMT = 9983 )'IACC22: ', IACC22( 1 ) -* - ELSE IF( CGS .OR. CGX .OR. CGV .OR. CXV ) THEN -* -* For the nonsymmetric generalized driver routines, only one set of -* parameters is allowed. -* - READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), - $ NSVAL( 1 ), MXBVAL( 1 ) - IF( NBVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NBMIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NXVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NSVAL( 1 ).LT.2 ) THEN - WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( 1 ), 2 - FATAL = .TRUE. - ELSE IF( MXBVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( 1 ), 1 - FATAL = .TRUE. - END IF - CALL XLAENV( 1, NBVAL( 1 ) ) - CALL XLAENV( 2, NBMIN( 1 ) ) - CALL XLAENV( 3, NXVAL( 1 ) ) - CALL XLAENV( 4, NSVAL( 1 ) ) - CALL XLAENV( 8, MXBVAL( 1 ) ) - WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) - WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'NS: ', NSVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'MAXB: ', MXBVAL( 1 ) - ELSE IF( .NOT.CHB .AND. .NOT.GLM .AND. .NOT.GQR .AND. .NOT. - $ GSV .AND. .NOT.CSD .AND. .NOT.LSE ) THEN -* -* For the other paths, the number of parameters can be varied -* from the input file. Read the number of parameter values. -* - READ( NIN, FMT = * )NPARMS - IF( NPARMS.LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )'NPARMS', NPARMS, 1 - NPARMS = 0 - FATAL = .TRUE. - ELSE IF( NPARMS.GT.MAXIN ) THEN - WRITE( NOUT, FMT = 9988 )'NPARMS', NPARMS, MAXIN - NPARMS = 0 - FATAL = .TRUE. - END IF -* -* Read the values of NB -* - IF( .NOT.CBB ) THEN - READ( NIN, FMT = * )( NBVAL( I ), I = 1, NPARMS ) - DO 70 I = 1, NPARMS - IF( NBVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NBVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' NB ', NBVAL( I ), NMAX - FATAL = .TRUE. - END IF - 70 CONTINUE - WRITE( NOUT, FMT = 9983 )'NB: ', - $ ( NBVAL( I ), I = 1, NPARMS ) - END IF -* -* Read the values of NBMIN -* - IF( NEP .OR. SEP .OR. SVD .OR. CGG ) THEN - READ( NIN, FMT = * )( NBMIN( I ), I = 1, NPARMS ) - DO 80 I = 1, NPARMS - IF( NBMIN( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( I ), 0 - FATAL = .TRUE. - ELSE IF( NBMIN( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )'NBMIN ', NBMIN( I ), NMAX - FATAL = .TRUE. - END IF - 80 CONTINUE - WRITE( NOUT, FMT = 9983 )'NBMIN:', - $ ( NBMIN( I ), I = 1, NPARMS ) - ELSE - DO 90 I = 1, NPARMS - NBMIN( I ) = 1 - 90 CONTINUE - END IF -* -* Read the values of NX -* - IF( NEP .OR. SEP .OR. SVD ) THEN - READ( NIN, FMT = * )( NXVAL( I ), I = 1, NPARMS ) - DO 100 I = 1, NPARMS - IF( NXVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NXVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' NX ', NXVAL( I ), NMAX - FATAL = .TRUE. - END IF - 100 CONTINUE - WRITE( NOUT, FMT = 9983 )'NX: ', - $ ( NXVAL( I ), I = 1, NPARMS ) - ELSE - DO 110 I = 1, NPARMS - NXVAL( I ) = 1 - 110 CONTINUE - END IF -* -* Read the values of NSHIFT (if CGG) or NRHS (if SVD -* or CBB). -* - IF( SVD .OR. CBB .OR. CGG ) THEN - READ( NIN, FMT = * )( NSVAL( I ), I = 1, NPARMS ) - DO 120 I = 1, NPARMS - IF( NSVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NSVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' NS ', NSVAL( I ), NMAX - FATAL = .TRUE. - END IF - 120 CONTINUE - WRITE( NOUT, FMT = 9983 )'NS: ', - $ ( NSVAL( I ), I = 1, NPARMS ) - ELSE - DO 130 I = 1, NPARMS - NSVAL( I ) = 1 - 130 CONTINUE - END IF -* -* Read the values for MAXB. -* - IF( CGG ) THEN - READ( NIN, FMT = * )( MXBVAL( I ), I = 1, NPARMS ) - DO 140 I = 1, NPARMS - IF( MXBVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( MXBVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' MAXB ', MXBVAL( I ), NMAX - FATAL = .TRUE. - END IF - 140 CONTINUE - WRITE( NOUT, FMT = 9983 )'MAXB: ', - $ ( MXBVAL( I ), I = 1, NPARMS ) - ELSE - DO 150 I = 1, NPARMS - MXBVAL( I ) = 1 - 150 CONTINUE - END IF -* -* Read the values for INMIN. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( INMIN( I ), I = 1, NPARMS ) - DO 540 I = 1, NPARMS - IF( INMIN( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( I ), 0 - FATAL = .TRUE. - END IF - 540 CONTINUE - WRITE( NOUT, FMT = 9983 )'INMIN: ', - $ ( INMIN( I ), I = 1, NPARMS ) - ELSE - DO 550 I = 1, NPARMS - INMIN( I ) = 1 - 550 CONTINUE - END IF -* -* Read the values for INWIN. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( INWIN( I ), I = 1, NPARMS ) - DO 560 I = 1, NPARMS - IF( INWIN( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( I ), 0 - FATAL = .TRUE. - END IF - 560 CONTINUE - WRITE( NOUT, FMT = 9983 )'INWIN: ', - $ ( INWIN( I ), I = 1, NPARMS ) - ELSE - DO 570 I = 1, NPARMS - INWIN( I ) = 1 - 570 CONTINUE - END IF -* -* Read the values for INIBL. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( INIBL( I ), I = 1, NPARMS ) - DO 580 I = 1, NPARMS - IF( INIBL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( I ), 0 - FATAL = .TRUE. - END IF - 580 CONTINUE - WRITE( NOUT, FMT = 9983 )'INIBL: ', - $ ( INIBL( I ), I = 1, NPARMS ) - ELSE - DO 590 I = 1, NPARMS - INIBL( I ) = 1 - 590 CONTINUE - END IF -* -* Read the values for ISHFTS. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( ISHFTS( I ), I = 1, NPARMS ) - DO 600 I = 1, NPARMS - IF( ISHFTS( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( I ), 0 - FATAL = .TRUE. - END IF - 600 CONTINUE - WRITE( NOUT, FMT = 9983 )'ISHFTS: ', - $ ( ISHFTS( I ), I = 1, NPARMS ) - ELSE - DO 610 I = 1, NPARMS - ISHFTS( I ) = 1 - 610 CONTINUE - END IF -* -* Read the values for IACC22. -* - IF( NEP .OR. CGG ) THEN - READ( NIN, FMT = * )( IACC22( I ), I = 1, NPARMS ) - DO 620 I = 1, NPARMS - IF( IACC22( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( I ), 0 - FATAL = .TRUE. - END IF - 620 CONTINUE - WRITE( NOUT, FMT = 9983 )'IACC22: ', - $ ( IACC22( I ), I = 1, NPARMS ) - ELSE - DO 630 I = 1, NPARMS - IACC22( I ) = 1 - 630 CONTINUE - END IF -* -* Read the values for NBCOL. -* - IF( CGG ) THEN - READ( NIN, FMT = * )( NBCOL( I ), I = 1, NPARMS ) - DO 160 I = 1, NPARMS - IF( NBCOL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )'NBCOL ', NBCOL( I ), 0 - FATAL = .TRUE. - ELSE IF( NBCOL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )'NBCOL ', NBCOL( I ), NMAX - FATAL = .TRUE. - END IF - 160 CONTINUE - WRITE( NOUT, FMT = 9983 )'NBCOL:', - $ ( NBCOL( I ), I = 1, NPARMS ) - ELSE - DO 170 I = 1, NPARMS - NBCOL( I ) = 1 - 170 CONTINUE - END IF - END IF -* -* Calculate and print the machine dependent constants. -* - WRITE( NOUT, FMT = * ) - EPS = SLAMCH( 'Underflow threshold' ) - WRITE( NOUT, FMT = 9981 )'underflow', EPS - EPS = SLAMCH( 'Overflow threshold' ) - WRITE( NOUT, FMT = 9981 )'overflow ', EPS - EPS = SLAMCH( 'Epsilon' ) - WRITE( NOUT, FMT = 9981 )'precision', EPS -* -* Read the threshold value for the test ratios. -* - READ( NIN, FMT = * )THRESH - WRITE( NOUT, FMT = 9982 )THRESH - IF( SEP .OR. SVD .OR. CGG ) THEN -* -* Read the flag that indicates whether to test LAPACK routines. -* - READ( NIN, FMT = * )TSTCHK -* -* Read the flag that indicates whether to test driver routines. -* - READ( NIN, FMT = * )TSTDRV - END IF -* -* Read the flag that indicates whether to test the error exits. -* - READ( NIN, FMT = * )TSTERR -* -* Read the code describing how to set the random number seed. -* - READ( NIN, FMT = * )NEWSD -* -* If NEWSD = 2, read another line with 4 integers for the seed. -* - IF( NEWSD.EQ.2 ) - $ READ( NIN, FMT = * )( IOLDSD( I ), I = 1, 4 ) -* - DO 180 I = 1, 4 - ISEED( I ) = IOLDSD( I ) - 180 CONTINUE -* - IF( FATAL ) THEN - WRITE( NOUT, FMT = 9999 ) - STOP - END IF -* -* Read the input lines indicating the test path and its parameters. -* The first three characters indicate the test path, and the number -* of test matrix types must be the first nonblank item in columns -* 4-80. -* - 190 CONTINUE -* - IF( .NOT.( CGX .OR. CXV ) ) THEN -* - 200 CONTINUE - READ( NIN, FMT = '(A80)', END = 380 )LINE - C3 = LINE( 1: 3 ) - LENP = LEN( LINE ) - I = 3 - ITMP = 0 - I1 = 0 - 210 CONTINUE - I = I + 1 - IF( I.GT.LENP ) THEN - IF( I1.GT.0 ) THEN - GO TO 240 - ELSE - NTYPES = MAXT - GO TO 240 - END IF - END IF - IF( LINE( I: I ).NE.' ' .AND. LINE( I: I ).NE.',' ) THEN - I1 = I - C1 = LINE( I1: I1 ) -* -* Check that a valid integer was read -* - DO 220 K = 1, 10 - IF( C1.EQ.INTSTR( K: K ) ) THEN - IC = K - 1 - GO TO 230 - END IF - 220 CONTINUE - WRITE( NOUT, FMT = 9991 )I, LINE - GO TO 200 - 230 CONTINUE - ITMP = 10*ITMP + IC - GO TO 210 - ELSE IF( I1.GT.0 ) THEN - GO TO 240 - ELSE - GO TO 210 - END IF - 240 CONTINUE - NTYPES = ITMP -* -* Skip the tests if NTYPES is <= 0. -* - IF( .NOT.( CEV .OR. CES .OR. CVX .OR. CSX .OR. CGV .OR. - $ CGS ) .AND. NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - GO TO 200 - END IF -* - ELSE - IF( CGX ) - $ C3 = 'CGX' - IF( CXV ) - $ C3 = 'CXV' - END IF -* -* Reset the random number seed. -* - IF( NEWSD.EQ.0 ) THEN - DO 250 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 250 CONTINUE - END IF -* - IF( LSAMEN( 3, C3, 'CHS' ) .OR. LSAMEN( 3, C3, 'NEP' ) ) THEN -* -* ------------------------------------- -* NEP: Nonsymmetric Eigenvalue Problem -* ------------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* NS = number of shifts -* MAXB = minimum submatrix size -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL CERRHS( 'CHSEQR', NOUT ) - DO 270 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) - CALL XLAENV(12, MAX( 11, INMIN( I ) ) ) - CALL XLAENV(13, INWIN( I ) ) - CALL XLAENV(14, INIBL( I ) ) - CALL XLAENV(15, ISHFTS( I ) ) - CALL XLAENV(16, IACC22( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 260 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 260 CONTINUE - END IF - WRITE( NOUT, FMT = 9961 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ), MAX( 11, INMIN(I)), - $ INWIN( I ), INIBL( I ), ISHFTS( I ), IACC22( I ) - CALL CCHKHS( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 5 ), NMAX, A( 1, 6 ), - $ A( 1, 7 ), DC( 1, 1 ), DC( 1, 2 ), A( 1, 8 ), - $ A( 1, 9 ), A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), - $ DC( 1, 3 ), WORK, LWORK, RWORK, IWORK, LOGWRK, - $ RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CCHKHS', INFO - 270 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'CST' ) .OR. LSAMEN( 3, C3, 'SEP' ) - $ .OR. LSAMEN( 3, C3, 'SE2' ) ) THEN -* -* ---------------------------------- -* SEP: Symmetric Eigenvalue Problem -* ---------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 1, 1 ) - CALL XLAENV( 9, 25 ) - IF( TSTERR ) - $ CALL CERRST( 'CST', NOUT ) - DO 290 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 280 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 280 CONTINUE - END IF - WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ) - IF( TSTCHK ) THEN - IF( LSAMEN( 3, C3, 'SE2' ) ) THEN - CALL CCHKST2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), - $ DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), - $ DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), - $ DR( 1, 7 ), DR( 1, 8 ), DR( 1, 9 ), - $ DR( 1, 10 ), DR( 1, 11 ), A( 1, 3 ), NMAX, - $ A( 1, 4 ), A( 1, 5 ), DC( 1, 1 ), A( 1, 6 ), - $ WORK, LWORK, RWORK, LWORK, IWORK, LIWORK, - $ RESULT, INFO ) - ELSE - CALL CCHKST( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), - $ DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), - $ DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), - $ DR( 1, 7 ), DR( 1, 8 ), DR( 1, 9 ), - $ DR( 1, 10 ), DR( 1, 11 ), A( 1, 3 ), NMAX, - $ A( 1, 4 ), A( 1, 5 ), DC( 1, 1 ), A( 1, 6 ), - $ WORK, LWORK, RWORK, LWORK, IWORK, LIWORK, - $ RESULT, INFO ) - ENDIF - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CCHKST', INFO - END IF - IF( TSTDRV ) THEN - IF( LSAMEN( 3, C3, 'SE2' ) ) THEN - CALL CDRVST2STG( NN, NVAL, 18, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, DR( 1, 3 ), DR( 1, 4 ), - $ DR( 1, 5 ), DR( 1, 8 ), DR( 1, 9 ), - $ DR( 1, 10 ), A( 1, 2 ), NMAX, A( 1, 3 ), - $ DC( 1, 1 ), A( 1, 4 ), WORK, LWORK, RWORK, - $ LWORK, IWORK, LIWORK, RESULT, INFO ) - ELSE - CALL CDRVST( NN, NVAL, 18, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, DR( 1, 3 ), DR( 1, 4 ), - $ DR( 1, 5 ), DR( 1, 8 ), DR( 1, 9 ), - $ DR( 1, 10 ), A( 1, 2 ), NMAX, A( 1, 3 ), - $ DC( 1, 1 ), A( 1, 4 ), WORK, LWORK, RWORK, - $ LWORK, IWORK, LIWORK, RESULT, INFO ) - ENDIF - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CDRVST', INFO - END IF - 290 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'CSG' ) ) THEN -* -* ---------------------------------------------- -* CSG: Hermitian Generalized Eigenvalue Problem -* ---------------------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 9, 25 ) - DO 310 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 300 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 300 CONTINUE - END IF - WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ) - IF( TSTCHK ) THEN -* CALL CDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, -* $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, -* $ DR( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), -* $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, -* $ LWORK, RWORK, LWORK, IWORK, LIWORK, RESULT, -* $ INFO ) - CALL CDRVSG2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, - $ DR( 1, 3 ), DR( 1, 4 ), A( 1, 3 ), NMAX, - $ A( 1, 4 ), A( 1, 5 ), A( 1, 6 ), - $ A( 1, 7 ), WORK, LWORK, RWORK, LWORK, - $ IWORK, LIWORK, RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CDRVSG', INFO - END IF - 310 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'CBD' ) .OR. LSAMEN( 3, C3, 'SVD' ) ) THEN -* -* ---------------------------------- -* SVD: Singular Value Decomposition -* ---------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* NRHS = number of right hand sides -* - MAXTYP = 16 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 9, 25 ) -* -* Test the error exits -* - CALL XLAENV( 1, 1 ) - IF( TSTERR .AND. TSTCHK ) - $ CALL CERRBD( 'CBD', NOUT ) - IF( TSTERR .AND. TSTDRV ) - $ CALL CERRED( 'CBD', NOUT ) -* - DO 330 I = 1, NPARMS - NRHS = NSVAL( I ) - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) - IF( NEWSD.EQ.0 ) THEN - DO 320 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 320 CONTINUE - END IF - WRITE( NOUT, FMT = 9995 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ), NRHS - IF( TSTCHK ) THEN - CALL CCHKBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, NRHS, ISEED, - $ THRESH, A( 1, 1 ), NMAX, DR( 1, 1 ), - $ DR( 1, 2 ), DR( 1, 3 ), DR( 1, 4 ), - $ A( 1, 2 ), NMAX, A( 1, 3 ), A( 1, 4 ), - $ A( 1, 5 ), NMAX, A( 1, 6 ), NMAX, A( 1, 7 ), - $ A( 1, 8 ), WORK, LWORK, RWORK, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CCHKBD', INFO - END IF - IF( TSTDRV ) - $ CALL CDRVBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, ISEED, - $ THRESH, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, - $ A( 1, 3 ), NMAX, A( 1, 4 ), A( 1, 5 ), - $ A( 1, 6 ), DR( 1, 1 ), DR( 1, 2 ), - $ DR( 1, 3 ), WORK, LWORK, RWORK, IWORK, NOUT, - $ INFO ) - 330 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'CEV' ) ) THEN -* -* -------------------------------------------- -* CEV: Nonsymmetric Eigenvalue Problem Driver -* CGEEV (eigenvalues and eigenvectors) -* -------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL CERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL CDRVEV( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), DC( 1, 1 ), - $ DC( 1, 2 ), A( 1, 3 ), NMAX, A( 1, 4 ), NMAX, - $ A( 1, 5 ), NMAX, RESULT, WORK, LWORK, RWORK, - $ IWORK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CGEEV', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'CES' ) ) THEN -* -* -------------------------------------------- -* CES: Nonsymmetric Eigenvalue Problem Driver -* CGEES (Schur form) -* -------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL CERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL CDRVES( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ DC( 1, 1 ), DC( 1, 2 ), A( 1, 4 ), NMAX, - $ RESULT, WORK, LWORK, RWORK, IWORK, LOGWRK, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CGEES', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'CVX' ) ) THEN -* -* -------------------------------------------------------------- -* CVX: Nonsymmetric Eigenvalue Problem Expert Driver -* CGEEVX (eigenvalues, eigenvectors and condition numbers) -* -------------------------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL CERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL CDRVVX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), DC( 1, 1 ), - $ DC( 1, 2 ), A( 1, 3 ), NMAX, A( 1, 4 ), NMAX, - $ A( 1, 5 ), NMAX, DR( 1, 1 ), DR( 1, 2 ), - $ DR( 1, 3 ), DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), - $ DR( 1, 7 ), DR( 1, 8 ), RESULT, WORK, LWORK, - $ RWORK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CGEEVX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'CSX' ) ) THEN -* -* --------------------------------------------------- -* CSX: Nonsymmetric Eigenvalue Problem Expert Driver -* CGEESX (Schur form and condition numbers) -* --------------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL CERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL CDRVSX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ DC( 1, 1 ), DC( 1, 2 ), DC( 1, 3 ), A( 1, 4 ), - $ NMAX, A( 1, 5 ), RESULT, WORK, LWORK, RWORK, - $ LOGWRK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CGEESX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'CGG' ) ) THEN -* -* ------------------------------------------------- -* CGG: Generalized Nonsymmetric Eigenvalue Problem -* ------------------------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NS = number of shifts -* MAXB = minimum submatrix size -* IACC22: structured matrix multiply -* NBCOL = minimum column dimension for blocks -* - MAXTYP = 26 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV(1,1) - IF( TSTCHK .AND. TSTERR ) - $ CALL CERRGG( C3, NOUT ) - DO 350 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 4, NSVAL( I ) ) - CALL XLAENV( 8, MXBVAL( I ) ) - CALL XLAENV( 16, IACC22( I ) ) - CALL XLAENV( 5, NBCOL( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 340 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 340 CONTINUE - END IF - WRITE( NOUT, FMT = 9996 )C3, NBVAL( I ), NBMIN( I ), - $ NSVAL( I ), MXBVAL( I ), IACC22( I ), NBCOL( I ) - TSTDIF = .FALSE. - THRSHN = 10. - IF( TSTCHK ) THEN - CALL CCHKGG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ TSTDIF, THRSHN, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), - $ A( 1, 6 ), A( 1, 7 ), A( 1, 8 ), A( 1, 9 ), - $ NMAX, A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), - $ DC( 1, 1 ), DC( 1, 2 ), DC( 1, 3 ), - $ DC( 1, 4 ), A( 1, 13 ), A( 1, 14 ), WORK, - $ LWORK, RWORK, LOGWRK, RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CCHKGG', INFO - END IF - 350 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'CGS' ) ) THEN -* -* ------------------------------------------------- -* CGS: Generalized Nonsymmetric Eigenvalue Problem -* CGGES (Schur form) -* ------------------------------------------------- -* - MAXTYP = 26 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL CERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL CDRGES( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ DC( 1, 1 ), DC( 1, 2 ), WORK, LWORK, RWORK, - $ RESULT, LOGWRK, INFO ) -* - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CDRGES', INFO -* -* Blocked version -* - CALL XLAENV(16,2) - CALL CDRGES3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ DC( 1, 1 ), DC( 1, 2 ), WORK, LWORK, RWORK, - $ RESULT, LOGWRK, INFO ) -* - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CDRGES3', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - - GO TO 10 -* - ELSE IF( CGX ) THEN -* -* ------------------------------------------------- -* CGX Generalized Nonsymmetric Eigenvalue Problem -* CGGESX (Schur form and condition numbers) -* ------------------------------------------------- -* - MAXTYP = 5 - NTYPES = MAXTYP - IF( NN.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL CERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 5, 2 ) - CALL CDRGSX( NN, NCMAX, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), - $ A( 1, 6 ), DC( 1, 1 ), DC( 1, 2 ), C, - $ NCMAX*NCMAX, S, WORK, LWORK, RWORK, IWORK, - $ LIWORK, LOGWRK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CDRGSX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'CGV' ) ) THEN -* -* ------------------------------------------------- -* CGV: Generalized Nonsymmetric Eigenvalue Problem -* CGGEV (Eigenvalue/vector form) -* ------------------------------------------------- -* - MAXTYP = 26 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL CERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL CDRGEV( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ A( 1, 9 ), NMAX, DC( 1, 1 ), DC( 1, 2 ), - $ DC( 1, 3 ), DC( 1, 4 ), WORK, LWORK, RWORK, - $ RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CDRGEV', INFO -* -* Blocked version -* - CALL XLAENV(16,2) - CALL CDRGEV3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ A( 1, 9 ), NMAX, DC( 1, 1 ), DC( 1, 2 ), - $ DC( 1, 3 ), DC( 1, 4 ), WORK, LWORK, RWORK, - $ RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CDRGEV3', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( CXV ) THEN -* -* ------------------------------------------------- -* CXV: Generalized Nonsymmetric Eigenvalue Problem -* CGGEVX (eigenvalue/vector with condition numbers) -* ------------------------------------------------- -* - MAXTYP = 2 - NTYPES = MAXTYP - IF( NN.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL CERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL CDRGVX( NN, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), DC( 1, 1 ), - $ DC( 1, 2 ), A( 1, 5 ), A( 1, 6 ), IWORK( 1 ), - $ IWORK( 2 ), DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), - $ DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), WORK, - $ LWORK, RWORK, IWORK( 3 ), LIWORK-2, RESULT, - $ LOGWRK, INFO ) -* - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CDRGVX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'CHB' ) ) THEN -* -* ------------------------------ -* CHB: Hermitian Band Reduction -* ------------------------------ -* - MAXTYP = 15 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - IF( TSTERR ) - $ CALL CERRST( 'CHB', NOUT ) -* CALL CCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, -* $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ), -* $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, -* $ INFO ) - CALL CCHKHB2STG( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, - $ THRESH, NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), - $ DR( 1, 2 ), DR( 1, 3 ), DR( 1, 4 ), DR( 1, 5 ), - $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CCHKHB', INFO -* - ELSE IF( LSAMEN( 3, C3, 'CBB' ) ) THEN -* -* ------------------------------ -* CBB: General Band Reduction -* ------------------------------ -* - MAXTYP = 15 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - DO 370 I = 1, NPARMS - NRHS = NSVAL( I ) -* - IF( NEWSD.EQ.0 ) THEN - DO 360 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 360 CONTINUE - END IF - WRITE( NOUT, FMT = 9966 )C3, NRHS - CALL CCHKBB( NN, MVAL, NVAL, NK, KVAL, MAXTYP, DOTYPE, NRHS, - $ ISEED, THRESH, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), 2*NMAX, DR( 1, 1 ), DR( 1, 2 ), - $ A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, A( 1, 6 ), - $ NMAX, A( 1, 7 ), WORK, LWORK, RWORK, RESULT, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CCHKBB', INFO - 370 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'GLM' ) ) THEN -* -* ----------------------------------------- -* GLM: Generalized Linear Regression Model -* ----------------------------------------- -* - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL CERRGG( 'GLM', NOUT ) - CALL CCKGLM( NN, NVAL, MVAL, PVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, - $ WORK, DR( 1, 1 ), NIN, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CCKGLM', INFO -* - ELSE IF( LSAMEN( 3, C3, 'GQR' ) ) THEN -* -* ------------------------------------------ -* GQR: Generalized QR and RQ factorizations -* ------------------------------------------ -* - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL CERRGG( 'GQR', NOUT ) - CALL CCKGQR( NN, MVAL, NN, PVAL, NN, NVAL, NTYPES, ISEED, - $ THRESH, NMAX, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), TAUA, B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), - $ B( 1, 4 ), B( 1, 5 ), TAUB, WORK, DR( 1, 1 ), NIN, - $ NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CCKGQR', INFO -* - ELSE IF( LSAMEN( 3, C3, 'GSV' ) ) THEN -* -* ---------------------------------------------- -* GSV: Generalized Singular Value Decomposition -* ---------------------------------------------- -* - CALL XLAENV(1,1) - IF( TSTERR ) - $ CALL CERRGG( 'GSV', NOUT ) - CALL CCKGSV( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), - $ A( 1, 3 ), B( 1, 3 ), A( 1, 4 ), ALPHA, BETA, - $ B( 1, 4 ), IWORK, WORK, DR( 1, 1 ), NIN, NOUT, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CCKGSV', INFO -* - ELSE IF( LSAMEN( 3, C3, 'CSD' ) ) THEN -* -* ---------------------------------------------- -* CSD: CS Decomposition -* ---------------------------------------------- -* - CALL XLAENV(1,1) - IF( TSTERR ) - $ CALL CERRGG( 'CSD', NOUT ) - CALL CCKCSD( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), - $ A( 1, 5 ), A( 1, 6 ), RWORK, IWORK, WORK, - $ DR( 1, 1 ), NIN, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CCKCSD', INFO -* - ELSE IF( LSAMEN( 3, C3, 'LSE' ) ) THEN -* -* -------------------------------------- -* LSE: Constrained Linear Least Squares -* -------------------------------------- -* - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL CERRGG( 'LSE', NOUT ) - CALL CCKLSE( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, - $ WORK, DR( 1, 1 ), NIN, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'CCKLSE', INFO - ELSE - WRITE( NOUT, FMT = * ) - WRITE( NOUT, FMT = * ) - WRITE( NOUT, FMT = 9992 )C3 - END IF - IF( .NOT.( CGX .OR. CXV ) ) - $ GO TO 190 - 380 CONTINUE - WRITE( NOUT, FMT = 9994 ) - S2 = SECOND( ) - WRITE( NOUT, FMT = 9993 )S2 - S1 -* - 9999 FORMAT( / ' Execution not attempted due to input errors' ) - 9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 ) - 9996 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NS =', I4, - $ ', MAXB =', I4, ', IACC22 =', I4, ', NBCOL =', I4 ) - 9995 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, - $ ', NRHS =', I4 ) - 9994 FORMAT( / / ' End of tests' ) - 9993 FORMAT( ' Total time used = ', F12.2, ' seconds', / ) - 9992 FORMAT( 1X, A3, ': Unrecognized path name' ) - 9991 FORMAT( / / ' *** Invalid integer value in column ', I2, - $ ' of input', ' line:', / A79 ) - 9990 FORMAT( / / 1X, A3, ' routines were not tested' ) - 9989 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be >=', - $ I6 ) - 9988 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be <=', - $ I6 ) - 9987 FORMAT( ' Tests of the Nonsymmetric Eigenvalue Problem routines' ) - 9986 FORMAT( ' Tests of the Hermitian Eigenvalue Problem routines' ) - 9985 FORMAT( ' Tests of the Singular Value Decomposition routines' ) - 9984 FORMAT( / ' The following parameter values will be used:' ) - 9983 FORMAT( 4X, A, 10I6, / 10X, 10I6 ) - 9982 FORMAT( / ' Routines pass computational tests if test ratio is ', - $ 'less than', F8.2, / ) - 9981 FORMAT( ' Relative machine ', A, ' is taken to be', E16.6 ) - 9980 FORMAT( ' *** Error code from ', A, ' = ', I4 ) - 9979 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', - $ / ' CGEEV (eigenvalues and eigevectors)' ) - 9978 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', - $ / ' CGEES (Schur form)' ) - 9977 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', - $ ' Driver', / ' CGEEVX (eigenvalues, eigenvectors and', - $ ' condition numbers)' ) - 9976 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', - $ ' Driver', / ' CGEESX (Schur form and condition', - $ ' numbers)' ) - 9975 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem routines' ) - 9974 FORMAT( ' Tests of CHBTRD', / ' (reduction of a Hermitian band ', - $ 'matrix to real tridiagonal form)' ) - 9973 FORMAT( / 1X, 71( '-' ) ) - 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I1, '.', I1 ) - 9971 FORMAT( / ' Tests of the Generalized Linear Regression Model ', - $ 'routines' ) - 9970 FORMAT( / ' Tests of the Generalized QR and RQ routines' ) - 9969 FORMAT( / ' Tests of the Generalized Singular Value', - $ ' Decomposition routines' ) - 9968 FORMAT( / ' Tests of the Linear Least Squares routines' ) - 9967 FORMAT( ' Tests of CGBBRD', / ' (reduction of a general band ', - $ 'matrix to real bidiagonal form)' ) - 9966 FORMAT( / / 1X, A3, ': NRHS =', I4 ) - 9965 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Expert Driver CGGESX' ) - 9964 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Driver CGGES' ) - 9963 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Driver CGGEV' ) - 9962 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Expert Driver CGGEVX' ) - 9961 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, - $ ', INMIN=', I4, - $ ', INWIN =', I4, ', INIBL =', I4, ', ISHFTS =', I4, - $ ', IACC22 =', I4) - 9960 FORMAT( / ' Tests of the CS Decomposition routines' ) -* -* End of CCHKEE -* - END From ee16efff3cd5a4ee7b6c0efcc263964f1304a3a8 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 28 Feb 2021 18:46:38 +0100 Subject: [PATCH 106/134] Delete dchkee.f --- lapack-netlib/TESTING/EIG/dchkee.f | 2507 ---------------------------- 1 file changed, 2507 deletions(-) delete mode 100644 lapack-netlib/TESTING/EIG/dchkee.f diff --git a/lapack-netlib/TESTING/EIG/dchkee.f b/lapack-netlib/TESTING/EIG/dchkee.f deleted file mode 100644 index dc6f3205a..000000000 --- a/lapack-netlib/TESTING/EIG/dchkee.f +++ /dev/null @@ -1,2507 +0,0 @@ -*> \brief \b DCHKEE -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* PROGRAM DCHKEE -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> DCHKEE tests the DOUBLE PRECISION LAPACK subroutines for the matrix -*> eigenvalue problem. The test paths in this version are -*> -*> NEP (Nonsymmetric Eigenvalue Problem): -*> Test DGEHRD, DORGHR, DHSEQR, DTREVC, DHSEIN, and DORMHR -*> -*> SEP (Symmetric Eigenvalue Problem): -*> Test DSYTRD, DORGTR, DSTEQR, DSTERF, DSTEIN, DSTEDC, -*> and drivers DSYEV(X), DSBEV(X), DSPEV(X), DSTEV(X), -*> DSYEVD, DSBEVD, DSPEVD, DSTEVD -*> -*> SVD (Singular Value Decomposition): -*> Test DGEBRD, DORGBR, DBDSQR, DBDSDC -*> and the drivers DGESVD, DGESDD -*> -*> DEV (Nonsymmetric Eigenvalue/eigenvector Driver): -*> Test DGEEV -*> -*> DES (Nonsymmetric Schur form Driver): -*> Test DGEES -*> -*> DVX (Nonsymmetric Eigenvalue/eigenvector Expert Driver): -*> Test DGEEVX -*> -*> DSX (Nonsymmetric Schur form Expert Driver): -*> Test DGEESX -*> -*> DGG (Generalized Nonsymmetric Eigenvalue Problem): -*> Test DGGHD3, DGGBAL, DGGBAK, DHGEQZ, and DTGEVC -*> -*> DGS (Generalized Nonsymmetric Schur form Driver): -*> Test DGGES -*> -*> DGV (Generalized Nonsymmetric Eigenvalue/eigenvector Driver): -*> Test DGGEV -*> -*> DGX (Generalized Nonsymmetric Schur form Expert Driver): -*> Test DGGESX -*> -*> DXV (Generalized Nonsymmetric Eigenvalue/eigenvector Expert Driver): -*> Test DGGEVX -*> -*> DSG (Symmetric Generalized Eigenvalue Problem): -*> Test DSYGST, DSYGV, DSYGVD, DSYGVX, DSPGST, DSPGV, DSPGVD, -*> DSPGVX, DSBGST, DSBGV, DSBGVD, and DSBGVX -*> -*> DSB (Symmetric Band Eigenvalue Problem): -*> Test DSBTRD -*> -*> DBB (Band Singular Value Decomposition): -*> Test DGBBRD -*> -*> DEC (Eigencondition estimation): -*> Test DLALN2, DLASY2, DLAEQU, DLAEXC, DTRSYL, DTREXC, DTRSNA, -*> DTRSEN, and DLAQTR -*> -*> DBL (Balancing a general matrix) -*> Test DGEBAL -*> -*> DBK (Back transformation on a balanced matrix) -*> Test DGEBAK -*> -*> DGL (Balancing a matrix pair) -*> Test DGGBAL -*> -*> DGK (Back transformation on a matrix pair) -*> Test DGGBAK -*> -*> GLM (Generalized Linear Regression Model): -*> Tests DGGGLM -*> -*> GQR (Generalized QR and RQ factorizations): -*> Tests DGGQRF and DGGRQF -*> -*> GSV (Generalized Singular Value Decomposition): -*> Tests DGGSVD, DGGSVP, DTGSJA, DLAGS2, DLAPLL, and DLAPMT -*> -*> CSD (CS decomposition): -*> Tests DORCSD -*> -*> LSE (Constrained Linear Least Squares): -*> Tests DGGLSE -*> -*> Each test path has a different set of inputs, but the data sets for -*> the driver routines xEV, xES, xVX, and xSX can be concatenated in a -*> single input file. The first line of input should contain one of the -*> 3-character path names in columns 1-3. The number of remaining lines -*> depends on what is found on the first line. -*> -*> The number of matrix types used in testing is often controllable from -*> the input file. The number of matrix types for each path, and the -*> test routine that describes them, is as follows: -*> -*> Path name(s) Types Test routine -*> -*> DHS or NEP 21 DCHKHS -*> DST or SEP 21 DCHKST (routines) -*> 18 DDRVST (drivers) -*> DBD or SVD 16 DCHKBD (routines) -*> 5 DDRVBD (drivers) -*> DEV 21 DDRVEV -*> DES 21 DDRVES -*> DVX 21 DDRVVX -*> DSX 21 DDRVSX -*> DGG 26 DCHKGG (routines) -*> DGS 26 DDRGES -*> DGX 5 DDRGSX -*> DGV 26 DDRGEV -*> DXV 2 DDRGVX -*> DSG 21 DDRVSG -*> DSB 15 DCHKSB -*> DBB 15 DCHKBB -*> DEC - DCHKEC -*> DBL - DCHKBL -*> DBK - DCHKBK -*> DGL - DCHKGL -*> DGK - DCHKGK -*> GLM 8 DCKGLM -*> GQR 8 DCKGQR -*> GSV 8 DCKGSV -*> CSD 3 DCKCSD -*> LSE 8 DCKLSE -*> -*>----------------------------------------------------------------------- -*> -*> NEP input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NPARMS, INTEGER -*> Number of values of the parameters NB, NBMIN, NX, NS, and -*> MAXB. -*> -*> line 5: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 6: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for the minimum blocksize NBMIN. -*> -*> line 7: NXVAL, INTEGER array, dimension (NPARMS) -*> The values for the crossover point NX. -*> -*> line 8: INMIN, INTEGER array, dimension (NPARMS) -*> LAHQR vs TTQRE crossover point, >= 11 -*> -*> line 9: INWIN, INTEGER array, dimension (NPARMS) -*> recommended deflation window size -*> -*> line 10: INIBL, INTEGER array, dimension (NPARMS) -*> nibble crossover point -*> -*> line 11: ISHFTS, INTEGER array, dimension (NPARMS) -*> number of simultaneous shifts) -*> -*> line 12: IACC22, INTEGER array, dimension (NPARMS) -*> select structured matrix multiply: 0, 1 or 2) -*> -*> line 13: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. To have all of the test -*> ratios printed, use THRESH = 0.0 . -*> -*> line 14: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 14 was 2: -*> -*> line 15: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 15-EOF: The remaining lines occur in sets of 1 or 2 and allow -*> the user to specify the matrix types. Each line contains -*> a 3-character path name in columns 1-3, and the number -*> of matrix types must be the first nonblank item in columns -*> 4-80. If the number of matrix types is at least 1 but is -*> less than the maximum number of possible types, a second -*> line will be read to get the numbers of the matrix types to -*> be used. For example, -*> NEP 21 -*> requests all of the matrix types for the nonsymmetric -*> eigenvalue problem, while -*> NEP 4 -*> 9 10 11 12 -*> requests only matrices of type 9, 10, 11, and 12. -*> -*> The valid 3-character path names are 'NEP' or 'SHS' for the -*> nonsymmetric eigenvalue routines. -*> -*>----------------------------------------------------------------------- -*> -*> SEP or DSG input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NPARMS, INTEGER -*> Number of values of the parameters NB, NBMIN, and NX. -*> -*> line 5: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 6: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for the minimum blocksize NBMIN. -*> -*> line 7: NXVAL, INTEGER array, dimension (NPARMS) -*> The values for the crossover point NX. -*> -*> line 8: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 9: TSTCHK, LOGICAL -*> Flag indicating whether or not to test the LAPACK routines. -*> -*> line 10: TSTDRV, LOGICAL -*> Flag indicating whether or not to test the driver routines. -*> -*> line 11: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 12: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 12 was 2: -*> -*> line 13: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 13-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path names are 'SEP' or 'SST' for the -*> symmetric eigenvalue routines and driver routines, and -*> 'DSG' for the routines for the symmetric generalized -*> eigenvalue problem. -*> -*>----------------------------------------------------------------------- -*> -*> SVD input file: -*> -*> line 2: NN, INTEGER -*> Number of values of M and N. -*> -*> line 3: MVAL, INTEGER array, dimension (NN) -*> The values for the matrix row dimension M. -*> -*> line 4: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix column dimension N. -*> -*> line 5: NPARMS, INTEGER -*> Number of values of the parameter NB, NBMIN, NX, and NRHS. -*> -*> line 6: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 7: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for the minimum blocksize NBMIN. -*> -*> line 8: NXVAL, INTEGER array, dimension (NPARMS) -*> The values for the crossover point NX. -*> -*> line 9: NSVAL, INTEGER array, dimension (NPARMS) -*> The values for the number of right hand sides NRHS. -*> -*> line 10: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 11: TSTCHK, LOGICAL -*> Flag indicating whether or not to test the LAPACK routines. -*> -*> line 12: TSTDRV, LOGICAL -*> Flag indicating whether or not to test the driver routines. -*> -*> line 13: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 14: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 14 was 2: -*> -*> line 15: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 15-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path names are 'SVD' or 'SBD' for both the -*> SVD routines and the SVD driver routines. -*> -*>----------------------------------------------------------------------- -*> -*> DEV and DES data files: -*> -*> line 1: 'DEV' or 'DES' in columns 1 to 3. -*> -*> line 2: NSIZES, INTEGER -*> Number of sizes of matrices to use. Should be at least 0 -*> and at most 20. If NSIZES = 0, no testing is done -*> (although the remaining 3 lines are still read). -*> -*> line 3: NN, INTEGER array, dimension(NSIZES) -*> Dimensions of matrices to be tested. -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHSEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 5: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> If it is 0., all test case data will be printed. -*> -*> line 6: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits. -*> -*> line 7: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 7 was 2: -*> -*> line 8: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9 and following: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'DEV' to test SGEEV, or -*> 'DES' to test SGEES. -*> -*>----------------------------------------------------------------------- -*> -*> The DVX data has two parts. The first part is identical to DEV, -*> and the second part consists of test matrices with precomputed -*> solutions. -*> -*> line 1: 'DVX' in columns 1-3. -*> -*> line 2: NSIZES, INTEGER -*> If NSIZES = 0, no testing of randomly generated examples -*> is done, but any precomputed examples are tested. -*> -*> line 3: NN, INTEGER array, dimension(NSIZES) -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> -*> line 5: THRESH, REAL -*> -*> line 6: TSTERR, LOGICAL -*> -*> line 7: NEWSD, INTEGER -*> -*> If line 7 was 2: -*> -*> line 8: INTEGER array, dimension (4) -*> -*> lines 9 and following: The first line contains 'DVX' in columns 1-3 -*> followed by the number of matrix types, possibly with -*> a second line to specify certain matrix types. -*> If the number of matrix types = 0, no testing of randomly -*> generated examples is done, but any precomputed examples -*> are tested. -*> -*> remaining lines : Each matrix is stored on 1+2*N lines, where N is -*> its dimension. The first line contains the dimension (a -*> single integer). The next N lines contain the matrix, one -*> row per line. The last N lines correspond to each -*> eigenvalue. Each of these last N lines contains 4 real -*> values: the real part of the eigenvalue, the imaginary -*> part of the eigenvalue, the reciprocal condition number of -*> the eigenvalues, and the reciprocal condition number of the -*> eigenvector. The end of data is indicated by dimension N=0. -*> Even if no data is to be tested, there must be at least one -*> line containing N=0. -*> -*>----------------------------------------------------------------------- -*> -*> The DSX data is like DVX. The first part is identical to DEV, and the -*> second part consists of test matrices with precomputed solutions. -*> -*> line 1: 'DSX' in columns 1-3. -*> -*> line 2: NSIZES, INTEGER -*> If NSIZES = 0, no testing of randomly generated examples -*> is done, but any precomputed examples are tested. -*> -*> line 3: NN, INTEGER array, dimension(NSIZES) -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> -*> line 5: THRESH, REAL -*> -*> line 6: TSTERR, LOGICAL -*> -*> line 7: NEWSD, INTEGER -*> -*> If line 7 was 2: -*> -*> line 8: INTEGER array, dimension (4) -*> -*> lines 9 and following: The first line contains 'DSX' in columns 1-3 -*> followed by the number of matrix types, possibly with -*> a second line to specify certain matrix types. -*> If the number of matrix types = 0, no testing of randomly -*> generated examples is done, but any precomputed examples -*> are tested. -*> -*> remaining lines : Each matrix is stored on 3+N lines, where N is its -*> dimension. The first line contains the dimension N and the -*> dimension M of an invariant subspace. The second line -*> contains M integers, identifying the eigenvalues in the -*> invariant subspace (by their position in a list of -*> eigenvalues ordered by increasing real part). The next N -*> lines contain the matrix. The last line contains the -*> reciprocal condition number for the average of the selected -*> eigenvalues, and the reciprocal condition number for the -*> corresponding right invariant subspace. The end of data is -*> indicated by a line containing N=0 and M=0. Even if no data -*> is to be tested, there must be at least one line containing -*> N=0 and M=0. -*> -*>----------------------------------------------------------------------- -*> -*> DGG input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NPARMS, INTEGER -*> Number of values of the parameters NB, NBMIN, NS, MAXB, and -*> NBCOL. -*> -*> line 5: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 6: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for NBMIN, the minimum row dimension for blocks. -*> -*> line 7: NSVAL, INTEGER array, dimension (NPARMS) -*> The values for the number of shifts. -*> -*> line 8: MXBVAL, INTEGER array, dimension (NPARMS) -*> The values for MAXB, used in determining minimum blocksize. -*> -*> line 9: IACC22, INTEGER array, dimension (NPARMS) -*> select structured matrix multiply: 1 or 2) -*> -*> line 10: NBCOL, INTEGER array, dimension (NPARMS) -*> The values for NBCOL, the minimum column dimension for -*> blocks. -*> -*> line 11: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 12: TSTCHK, LOGICAL -*> Flag indicating whether or not to test the LAPACK routines. -*> -*> line 13: TSTDRV, LOGICAL -*> Flag indicating whether or not to test the driver routines. -*> -*> line 14: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 15: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 15 was 2: -*> -*> line 16: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 17-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'DGG' for the generalized -*> eigenvalue problem routines and driver routines. -*> -*>----------------------------------------------------------------------- -*> -*> DGS and DGV input files: -*> -*> line 1: 'DGS' or 'DGV' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension(NN) -*> Dimensions of matrices to be tested. -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHGEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 5: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> If it is 0., all test case data will be printed. -*> -*> line 6: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits. -*> -*> line 7: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 17 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 7-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'DGS' for the generalized -*> eigenvalue problem routines and driver routines. -*> -*>----------------------------------------------------------------------- -*> -*> DXV input files: -*> -*> line 1: 'DXV' in columns 1 to 3. -*> -*> line 2: N, INTEGER -*> Value of N. -*> -*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHGEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 4: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> Information will be printed about each test for which the -*> test ratio is greater than or equal to the threshold. -*> -*> line 5: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 6: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 6 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> If line 2 was 0: -*> -*> line 7-EOF: Precomputed examples are tested. -*> -*> remaining lines : Each example is stored on 3+2*N lines, where N is -*> its dimension. The first line contains the dimension (a -*> single integer). The next N lines contain the matrix A, one -*> row per line. The next N lines contain the matrix B. The -*> next line contains the reciprocals of the eigenvalue -*> condition numbers. The last line contains the reciprocals of -*> the eigenvector condition numbers. The end of data is -*> indicated by dimension N=0. Even if no data is to be tested, -*> there must be at least one line containing N=0. -*> -*>----------------------------------------------------------------------- -*> -*> DGX input files: -*> -*> line 1: 'DGX' in columns 1 to 3. -*> -*> line 2: N, INTEGER -*> Value of N. -*> -*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHGEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 4: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> Information will be printed about each test for which the -*> test ratio is greater than or equal to the threshold. -*> -*> line 5: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 6: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 6 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> If line 2 was 0: -*> -*> line 7-EOF: Precomputed examples are tested. -*> -*> remaining lines : Each example is stored on 3+2*N lines, where N is -*> its dimension. The first line contains the dimension (a -*> single integer). The next line contains an integer k such -*> that only the last k eigenvalues will be selected and appear -*> in the leading diagonal blocks of $A$ and $B$. The next N -*> lines contain the matrix A, one row per line. The next N -*> lines contain the matrix B. The last line contains the -*> reciprocal of the eigenvalue cluster condition number and the -*> reciprocal of the deflating subspace (associated with the -*> selected eigencluster) condition number. The end of data is -*> indicated by dimension N=0. Even if no data is to be tested, -*> there must be at least one line containing N=0. -*> -*>----------------------------------------------------------------------- -*> -*> DSB input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NK, INTEGER -*> Number of values of K. -*> -*> line 5: KVAL, INTEGER array, dimension (NK) -*> The values for the matrix dimension K. -*> -*> line 6: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 7 was 2: -*> -*> line 8: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 8-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'DSB'. -*> -*>----------------------------------------------------------------------- -*> -*> DBB input file: -*> -*> line 2: NN, INTEGER -*> Number of values of M and N. -*> -*> line 3: MVAL, INTEGER array, dimension (NN) -*> The values for the matrix row dimension M. -*> -*> line 4: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix column dimension N. -*> -*> line 4: NK, INTEGER -*> Number of values of K. -*> -*> line 5: KVAL, INTEGER array, dimension (NK) -*> The values for the matrix bandwidth K. -*> -*> line 6: NPARMS, INTEGER -*> Number of values of the parameter NRHS -*> -*> line 7: NSVAL, INTEGER array, dimension (NPARMS) -*> The values for the number of right hand sides NRHS. -*> -*> line 8: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 9: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 9 was 2: -*> -*> line 10: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 10-EOF: Lines specifying matrix types, as for SVD. -*> The 3-character path name is 'DBB'. -*> -*>----------------------------------------------------------------------- -*> -*> DEC input file: -*> -*> line 2: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> lines 3-EOF: -*> -*> Input for testing the eigencondition routines consists of a set of -*> specially constructed test cases and their solutions. The data -*> format is not intended to be modified by the user. -*> -*>----------------------------------------------------------------------- -*> -*> DBL and DBK input files: -*> -*> line 1: 'DBL' in columns 1-3 to test SGEBAL, or 'DBK' in -*> columns 1-3 to test SGEBAK. -*> -*> The remaining lines consist of specially constructed test cases. -*> -*>----------------------------------------------------------------------- -*> -*> DGL and DGK input files: -*> -*> line 1: 'DGL' in columns 1-3 to test DGGBAL, or 'DGK' in -*> columns 1-3 to test DGGBAK. -*> -*> The remaining lines consist of specially constructed test cases. -*> -*>----------------------------------------------------------------------- -*> -*> GLM data file: -*> -*> line 1: 'GLM' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M (row dimension). -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P (row dimension). -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N (column dimension), note M <= N <= M+P. -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GLM' for the generalized -*> linear regression model routines. -*> -*>----------------------------------------------------------------------- -*> -*> GQR data file: -*> -*> line 1: 'GQR' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M. -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P. -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N. -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GQR' for the generalized -*> QR and RQ routines. -*> -*>----------------------------------------------------------------------- -*> -*> GSV data file: -*> -*> line 1: 'GSV' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M (row dimension). -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P (row dimension). -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N (column dimension). -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GSV' for the generalized -*> SVD routines. -*> -*>----------------------------------------------------------------------- -*> -*> CSD data file: -*> -*> line 1: 'CSD' in columns 1 to 3. -*> -*> line 2: NM, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NM) -*> Values of M (row and column dimension of orthogonal matrix). -*> -*> line 4: PVAL, INTEGER array, dimension(NM) -*> Values of P (row dimension of top-left block). -*> -*> line 5: NVAL, INTEGER array, dimension(NM) -*> Values of N (column dimension of top-left block). -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'CSD' for the CSD routine. -*> -*>----------------------------------------------------------------------- -*> -*> LSE data file: -*> -*> line 1: 'LSE' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M. -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P. -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N, note P <= N <= P+M. -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GSV' for the generalized -*> SVD routines. -*> -*>----------------------------------------------------------------------- -*> -*> NMAX is currently set to 132 and must be at least 12 for some of the -*> precomputed examples, and LWORK = NMAX*(5*NMAX+5)+1 in the parameter -*> statements below. For SVD, we assume NRHS may be as big as N. The -*> parameter NEED is set to 14 to allow for 14 N-by-N matrices for DGG. -*> \endverbatim -* -* Arguments: -* ========== -* -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date June 2016 -* -*> \ingroup double_eig -* -* ===================================================================== - PROGRAM DCHKEE -* -* -- LAPACK test routine (version 3.7.0) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* June 2016 -* -* ===================================================================== -* -* .. Parameters .. - INTEGER NMAX - PARAMETER ( NMAX = 132 ) - INTEGER NCMAX - PARAMETER ( NCMAX = 20 ) - INTEGER NEED - PARAMETER ( NEED = 14 ) - INTEGER LWORK - PARAMETER ( LWORK = NMAX*( 5*NMAX+5 )+1 ) - INTEGER LIWORK - PARAMETER ( LIWORK = NMAX*( 5*NMAX+20 ) ) - INTEGER MAXIN - PARAMETER ( MAXIN = 20 ) - INTEGER MAXT - PARAMETER ( MAXT = 30 ) - INTEGER NIN, NOUT - PARAMETER ( NIN = 5, NOUT = 6 ) -* .. -* .. Local Scalars .. - LOGICAL CSD, DBB, DGG, DSB, FATAL, GLM, GQR, GSV, LSE, - $ NEP, DBK, DBL, SEP, DES, DEV, DGK, DGL, DGS, - $ DGV, DGX, DSX, SVD, DVX, DXV, TSTCHK, TSTDIF, - $ TSTDRV, TSTERR - CHARACTER C1 - CHARACTER*3 C3, PATH - CHARACTER*32 VNAME - CHARACTER*10 INTSTR - CHARACTER*80 LINE - INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD, - $ NK, NN, NPARMS, NRHS, NTYPES, - $ VERS_MAJOR, VERS_MINOR, VERS_PATCH - DOUBLE PRECISION EPS, S1, S2, THRESH, THRSHN -* .. -* .. Local Arrays .. - LOGICAL DOTYPE( MAXT ), LOGWRK( NMAX ) - INTEGER IOLDSD( 4 ), ISEED( 4 ), IWORK( LIWORK ), - $ KVAL( MAXIN ), MVAL( MAXIN ), MXBVAL( MAXIN ), - $ NBCOL( MAXIN ), NBMIN( MAXIN ), NBVAL( MAXIN ), - $ NSVAL( MAXIN ), NVAL( MAXIN ), NXVAL( MAXIN ), - $ PVAL( MAXIN ) - INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ), - $ ISHFTS( MAXIN ), IACC22( MAXIN ) - DOUBLE PRECISION A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ), - $ C( NCMAX*NCMAX, NCMAX*NCMAX ), D( NMAX, 12 ), - $ RESULT( 500 ), TAUA( NMAX ), TAUB( NMAX ), - $ WORK( LWORK ), X( 5*NMAX ) -* .. -* .. External Functions .. - LOGICAL LSAMEN - DOUBLE PRECISION DLAMCH, DSECND - EXTERNAL LSAMEN, DLAMCH, DSECND -* .. -* .. External Subroutines .. - EXTERNAL ALAREQ, DCHKBB, DCHKBD, DCHKBK, DCHKBL, DCHKEC, - $ DCHKGG, DCHKGK, DCHKGL, DCHKHS, DCHKSB, DCHKST, - $ DCKCSD, DCKGLM, DCKGQR, DCKGSV, DCKLSE, DDRGES, - $ DDRGEV, DDRGSX, DDRGVX, DDRVBD, DDRVES, DDRVEV, - $ DDRVSG, DDRVST, DDRVSX, DDRVVX, DERRBD, - $ DERRED, DERRGG, DERRHS, DERRST, ILAVER, XLAENV, - $ DDRGES3, DDRGEV3, - $ DCHKST2STG, DDRVST2STG, DCHKSB2STG, DDRVSG2STG -* .. -* .. Intrinsic Functions .. - INTRINSIC LEN, MIN -* .. -* .. Scalars in Common .. - LOGICAL LERR, OK - CHARACTER*32 SRNAMT - INTEGER INFOT, MAXB, NPROC, NSHIFT, NUNIT, SELDIM, - $ SELOPT -* .. -* .. Arrays in Common .. - LOGICAL SELVAL( 20 ) - INTEGER IPARMS( 100 ) - DOUBLE PRECISION SELWI( 20 ), SELWR( 20 ) -* .. -* .. Common blocks .. - COMMON / CENVIR / NPROC, NSHIFT, MAXB - COMMON / INFOC / INFOT, NUNIT, OK, LERR - COMMON / SRNAMC / SRNAMT - COMMON / SSLCT / SELOPT, SELDIM, SELVAL, SELWR, SELWI - COMMON / CLAENV / IPARMS -* .. -* .. Data statements .. - DATA INTSTR / '0123456789' / - DATA IOLDSD / 0, 0, 0, 1 / -* .. -* .. Executable Statements .. -* - A = 0.0 - B = 0.0 - C = 0.0 - D = 0.0 - S1 = DSECND( ) - FATAL = .FALSE. - NUNIT = NOUT -* -* Return to here to read multiple sets of data -* - 10 CONTINUE -* -* Read the first line and set the 3-character test path -* - READ( NIN, FMT = '(A80)', END = 380 )LINE - PATH = LINE( 1: 3 ) - NEP = LSAMEN( 3, PATH, 'NEP' ) .OR. LSAMEN( 3, PATH, 'DHS' ) - SEP = LSAMEN( 3, PATH, 'SEP' ) .OR. LSAMEN( 3, PATH, 'DST' ) .OR. - $ LSAMEN( 3, PATH, 'DSG' ) .OR. LSAMEN( 3, PATH, 'SE2' ) - SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'DBD' ) - DEV = LSAMEN( 3, PATH, 'DEV' ) - DES = LSAMEN( 3, PATH, 'DES' ) - DVX = LSAMEN( 3, PATH, 'DVX' ) - DSX = LSAMEN( 3, PATH, 'DSX' ) - DGG = LSAMEN( 3, PATH, 'DGG' ) - DGS = LSAMEN( 3, PATH, 'DGS' ) - DGX = LSAMEN( 3, PATH, 'DGX' ) - DGV = LSAMEN( 3, PATH, 'DGV' ) - DXV = LSAMEN( 3, PATH, 'DXV' ) - DSB = LSAMEN( 3, PATH, 'DSB' ) - DBB = LSAMEN( 3, PATH, 'DBB' ) - GLM = LSAMEN( 3, PATH, 'GLM' ) - GQR = LSAMEN( 3, PATH, 'GQR' ) .OR. LSAMEN( 3, PATH, 'GRQ' ) - GSV = LSAMEN( 3, PATH, 'GSV' ) - CSD = LSAMEN( 3, PATH, 'CSD' ) - LSE = LSAMEN( 3, PATH, 'LSE' ) - DBL = LSAMEN( 3, PATH, 'DBL' ) - DBK = LSAMEN( 3, PATH, 'DBK' ) - DGL = LSAMEN( 3, PATH, 'DGL' ) - DGK = LSAMEN( 3, PATH, 'DGK' ) -* -* Report values of parameters. -* - IF( PATH.EQ.' ' ) THEN - GO TO 10 - ELSE IF( NEP ) THEN - WRITE( NOUT, FMT = 9987 ) - ELSE IF( SEP ) THEN - WRITE( NOUT, FMT = 9986 ) - ELSE IF( SVD ) THEN - WRITE( NOUT, FMT = 9985 ) - ELSE IF( DEV ) THEN - WRITE( NOUT, FMT = 9979 ) - ELSE IF( DES ) THEN - WRITE( NOUT, FMT = 9978 ) - ELSE IF( DVX ) THEN - WRITE( NOUT, FMT = 9977 ) - ELSE IF( DSX ) THEN - WRITE( NOUT, FMT = 9976 ) - ELSE IF( DGG ) THEN - WRITE( NOUT, FMT = 9975 ) - ELSE IF( DGS ) THEN - WRITE( NOUT, FMT = 9964 ) - ELSE IF( DGX ) THEN - WRITE( NOUT, FMT = 9965 ) - ELSE IF( DGV ) THEN - WRITE( NOUT, FMT = 9963 ) - ELSE IF( DXV ) THEN - WRITE( NOUT, FMT = 9962 ) - ELSE IF( DSB ) THEN - WRITE( NOUT, FMT = 9974 ) - ELSE IF( DBB ) THEN - WRITE( NOUT, FMT = 9967 ) - ELSE IF( GLM ) THEN - WRITE( NOUT, FMT = 9971 ) - ELSE IF( GQR ) THEN - WRITE( NOUT, FMT = 9970 ) - ELSE IF( GSV ) THEN - WRITE( NOUT, FMT = 9969 ) - ELSE IF( CSD ) THEN - WRITE( NOUT, FMT = 9960 ) - ELSE IF( LSE ) THEN - WRITE( NOUT, FMT = 9968 ) - ELSE IF( DBL ) THEN -* -* DGEBAL: Balancing -* - CALL DCHKBL( NIN, NOUT ) - GO TO 10 - ELSE IF( DBK ) THEN -* -* DGEBAK: Back transformation -* - CALL DCHKBK( NIN, NOUT ) - GO TO 10 - ELSE IF( DGL ) THEN -* -* DGGBAL: Balancing -* - CALL DCHKGL( NIN, NOUT ) - GO TO 10 - ELSE IF( DGK ) THEN -* -* DGGBAK: Back transformation -* - CALL DCHKGK( NIN, NOUT ) - GO TO 10 - ELSE IF( LSAMEN( 3, PATH, 'DEC' ) ) THEN -* -* DEC: Eigencondition estimation -* - READ( NIN, FMT = * )THRESH - CALL XLAENV( 1, 1 ) - CALL XLAENV( 12, 11 ) - CALL XLAENV( 13, 2 ) - CALL XLAENV( 14, 0 ) - CALL XLAENV( 15, 2 ) - CALL XLAENV( 16, 2 ) - TSTERR = .TRUE. - CALL DCHKEC( THRESH, TSTERR, NIN, NOUT ) - GO TO 10 - ELSE - WRITE( NOUT, FMT = 9992 )PATH - GO TO 10 - END IF - CALL ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) - WRITE( NOUT, FMT = 9972 ) VERS_MAJOR, VERS_MINOR, VERS_PATCH - WRITE( NOUT, FMT = 9984 ) -* -* Read the number of values of M, P, and N. -* - READ( NIN, FMT = * )NN - IF( NN.LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NN ', NN, 1 - NN = 0 - FATAL = .TRUE. - ELSE IF( NN.GT.MAXIN ) THEN - WRITE( NOUT, FMT = 9988 )' NN ', NN, MAXIN - NN = 0 - FATAL = .TRUE. - END IF -* -* Read the values of M -* - IF( .NOT.( DGX .OR. DXV ) ) THEN - READ( NIN, FMT = * )( MVAL( I ), I = 1, NN ) - IF( SVD ) THEN - VNAME = ' M ' - ELSE - VNAME = ' N ' - END IF - DO 20 I = 1, NN - IF( MVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )VNAME, MVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( MVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )VNAME, MVAL( I ), NMAX - FATAL = .TRUE. - END IF - 20 CONTINUE - WRITE( NOUT, FMT = 9983 )'M: ', ( MVAL( I ), I = 1, NN ) - END IF -* -* Read the values of P -* - IF( GLM .OR. GQR .OR. GSV .OR. CSD .OR. LSE ) THEN - READ( NIN, FMT = * )( PVAL( I ), I = 1, NN ) - DO 30 I = 1, NN - IF( PVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' P ', PVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( PVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' P ', PVAL( I ), NMAX - FATAL = .TRUE. - END IF - 30 CONTINUE - WRITE( NOUT, FMT = 9983 )'P: ', ( PVAL( I ), I = 1, NN ) - END IF -* -* Read the values of N -* - IF( SVD .OR. DBB .OR. GLM .OR. GQR .OR. GSV .OR. CSD .OR. - $ LSE ) THEN - READ( NIN, FMT = * )( NVAL( I ), I = 1, NN ) - DO 40 I = 1, NN - IF( NVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' N ', NVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' N ', NVAL( I ), NMAX - FATAL = .TRUE. - END IF - 40 CONTINUE - ELSE - DO 50 I = 1, NN - NVAL( I ) = MVAL( I ) - 50 CONTINUE - END IF - IF( .NOT.( DGX .OR. DXV ) ) THEN - WRITE( NOUT, FMT = 9983 )'N: ', ( NVAL( I ), I = 1, NN ) - ELSE - WRITE( NOUT, FMT = 9983 )'N: ', NN - END IF -* -* Read the number of values of K, followed by the values of K -* - IF( DSB .OR. DBB ) THEN - READ( NIN, FMT = * )NK - READ( NIN, FMT = * )( KVAL( I ), I = 1, NK ) - DO 60 I = 1, NK - IF( KVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' K ', KVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( KVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' K ', KVAL( I ), NMAX - FATAL = .TRUE. - END IF - 60 CONTINUE - WRITE( NOUT, FMT = 9983 )'K: ', ( KVAL( I ), I = 1, NK ) - END IF -* - IF( DEV .OR. DES .OR. DVX .OR. DSX ) THEN -* -* For the nonsymmetric QR driver routines, only one set of -* parameters is allowed. -* - READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), - $ INMIN( 1 ), INWIN( 1 ), INIBL(1), ISHFTS(1), IACC22(1) - IF( NBVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NBMIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NXVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( INMIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( INWIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( INIBL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( ISHFTS( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( 1 ), 1 - FATAL = .TRUE. - ELSE IF( IACC22( 1 ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( 1 ), 0 - FATAL = .TRUE. - END IF - CALL XLAENV( 1, NBVAL( 1 ) ) - CALL XLAENV( 2, NBMIN( 1 ) ) - CALL XLAENV( 3, NXVAL( 1 ) ) - CALL XLAENV(12, MAX( 11, INMIN( 1 ) ) ) - CALL XLAENV(13, INWIN( 1 ) ) - CALL XLAENV(14, INIBL( 1 ) ) - CALL XLAENV(15, ISHFTS( 1 ) ) - CALL XLAENV(16, IACC22( 1 ) ) - WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) - WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'INMIN: ', INMIN( 1 ) - WRITE( NOUT, FMT = 9983 )'INWIN: ', INWIN( 1 ) - WRITE( NOUT, FMT = 9983 )'INIBL: ', INIBL( 1 ) - WRITE( NOUT, FMT = 9983 )'ISHFTS: ', ISHFTS( 1 ) - WRITE( NOUT, FMT = 9983 )'IACC22: ', IACC22( 1 ) -* - ELSEIF( DGS .OR. DGX .OR. DGV .OR. DXV ) THEN -* -* For the nonsymmetric generalized driver routines, only one set -* of parameters is allowed. -* - READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), - $ NSVAL( 1 ), MXBVAL( 1 ) - IF( NBVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NBMIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NXVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NSVAL( 1 ).LT.2 ) THEN - WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( 1 ), 2 - FATAL = .TRUE. - ELSE IF( MXBVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( 1 ), 1 - FATAL = .TRUE. - END IF - CALL XLAENV( 1, NBVAL( 1 ) ) - CALL XLAENV( 2, NBMIN( 1 ) ) - CALL XLAENV( 3, NXVAL( 1 ) ) - CALL XLAENV( 4, NSVAL( 1 ) ) - CALL XLAENV( 8, MXBVAL( 1 ) ) - WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) - WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'NS: ', NSVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'MAXB: ', MXBVAL( 1 ) -* - ELSE IF( .NOT.DSB .AND. .NOT.GLM .AND. .NOT.GQR .AND. .NOT. - $ GSV .AND. .NOT.CSD .AND. .NOT.LSE ) THEN -* -* For the other paths, the number of parameters can be varied -* from the input file. Read the number of parameter values. -* - READ( NIN, FMT = * )NPARMS - IF( NPARMS.LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )'NPARMS', NPARMS, 1 - NPARMS = 0 - FATAL = .TRUE. - ELSE IF( NPARMS.GT.MAXIN ) THEN - WRITE( NOUT, FMT = 9988 )'NPARMS', NPARMS, MAXIN - NPARMS = 0 - FATAL = .TRUE. - END IF -* -* Read the values of NB -* - IF( .NOT.DBB ) THEN - READ( NIN, FMT = * )( NBVAL( I ), I = 1, NPARMS ) - DO 70 I = 1, NPARMS - IF( NBVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NBVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' NB ', NBVAL( I ), NMAX - FATAL = .TRUE. - END IF - 70 CONTINUE - WRITE( NOUT, FMT = 9983 )'NB: ', - $ ( NBVAL( I ), I = 1, NPARMS ) - END IF -* -* Read the values of NBMIN -* - IF( NEP .OR. SEP .OR. SVD .OR. DGG ) THEN - READ( NIN, FMT = * )( NBMIN( I ), I = 1, NPARMS ) - DO 80 I = 1, NPARMS - IF( NBMIN( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( I ), 0 - FATAL = .TRUE. - ELSE IF( NBMIN( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )'NBMIN ', NBMIN( I ), NMAX - FATAL = .TRUE. - END IF - 80 CONTINUE - WRITE( NOUT, FMT = 9983 )'NBMIN:', - $ ( NBMIN( I ), I = 1, NPARMS ) - ELSE - DO 90 I = 1, NPARMS - NBMIN( I ) = 1 - 90 CONTINUE - END IF -* -* Read the values of NX -* - IF( NEP .OR. SEP .OR. SVD ) THEN - READ( NIN, FMT = * )( NXVAL( I ), I = 1, NPARMS ) - DO 100 I = 1, NPARMS - IF( NXVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NXVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' NX ', NXVAL( I ), NMAX - FATAL = .TRUE. - END IF - 100 CONTINUE - WRITE( NOUT, FMT = 9983 )'NX: ', - $ ( NXVAL( I ), I = 1, NPARMS ) - ELSE - DO 110 I = 1, NPARMS - NXVAL( I ) = 1 - 110 CONTINUE - END IF -* -* Read the values of NSHIFT (if DGG) or NRHS (if SVD -* or DBB). -* - IF( SVD .OR. DBB .OR. DGG ) THEN - READ( NIN, FMT = * )( NSVAL( I ), I = 1, NPARMS ) - DO 120 I = 1, NPARMS - IF( NSVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NSVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' NS ', NSVAL( I ), NMAX - FATAL = .TRUE. - END IF - 120 CONTINUE - WRITE( NOUT, FMT = 9983 )'NS: ', - $ ( NSVAL( I ), I = 1, NPARMS ) - ELSE - DO 130 I = 1, NPARMS - NSVAL( I ) = 1 - 130 CONTINUE - END IF -* -* Read the values for MAXB. -* - IF( DGG ) THEN - READ( NIN, FMT = * )( MXBVAL( I ), I = 1, NPARMS ) - DO 140 I = 1, NPARMS - IF( MXBVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( MXBVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' MAXB ', MXBVAL( I ), NMAX - FATAL = .TRUE. - END IF - 140 CONTINUE - WRITE( NOUT, FMT = 9983 )'MAXB: ', - $ ( MXBVAL( I ), I = 1, NPARMS ) - ELSE - DO 150 I = 1, NPARMS - MXBVAL( I ) = 1 - 150 CONTINUE - END IF -* -* Read the values for INMIN. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( INMIN( I ), I = 1, NPARMS ) - DO 540 I = 1, NPARMS - IF( INMIN( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( I ), 0 - FATAL = .TRUE. - END IF - 540 CONTINUE - WRITE( NOUT, FMT = 9983 )'INMIN: ', - $ ( INMIN( I ), I = 1, NPARMS ) - ELSE - DO 550 I = 1, NPARMS - INMIN( I ) = 1 - 550 CONTINUE - END IF -* -* Read the values for INWIN. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( INWIN( I ), I = 1, NPARMS ) - DO 560 I = 1, NPARMS - IF( INWIN( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( I ), 0 - FATAL = .TRUE. - END IF - 560 CONTINUE - WRITE( NOUT, FMT = 9983 )'INWIN: ', - $ ( INWIN( I ), I = 1, NPARMS ) - ELSE - DO 570 I = 1, NPARMS - INWIN( I ) = 1 - 570 CONTINUE - END IF -* -* Read the values for INIBL. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( INIBL( I ), I = 1, NPARMS ) - DO 580 I = 1, NPARMS - IF( INIBL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( I ), 0 - FATAL = .TRUE. - END IF - 580 CONTINUE - WRITE( NOUT, FMT = 9983 )'INIBL: ', - $ ( INIBL( I ), I = 1, NPARMS ) - ELSE - DO 590 I = 1, NPARMS - INIBL( I ) = 1 - 590 CONTINUE - END IF -* -* Read the values for ISHFTS. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( ISHFTS( I ), I = 1, NPARMS ) - DO 600 I = 1, NPARMS - IF( ISHFTS( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( I ), 0 - FATAL = .TRUE. - END IF - 600 CONTINUE - WRITE( NOUT, FMT = 9983 )'ISHFTS: ', - $ ( ISHFTS( I ), I = 1, NPARMS ) - ELSE - DO 610 I = 1, NPARMS - ISHFTS( I ) = 1 - 610 CONTINUE - END IF -* -* Read the values for IACC22. -* - IF( NEP .OR. DGG ) THEN - READ( NIN, FMT = * )( IACC22( I ), I = 1, NPARMS ) - DO 620 I = 1, NPARMS - IF( IACC22( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( I ), 0 - FATAL = .TRUE. - END IF - 620 CONTINUE - WRITE( NOUT, FMT = 9983 )'IACC22: ', - $ ( IACC22( I ), I = 1, NPARMS ) - ELSE - DO 630 I = 1, NPARMS - IACC22( I ) = 1 - 630 CONTINUE - END IF -* -* Read the values for NBCOL. -* - IF( DGG ) THEN - READ( NIN, FMT = * )( NBCOL( I ), I = 1, NPARMS ) - DO 160 I = 1, NPARMS - IF( NBCOL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )'NBCOL ', NBCOL( I ), 0 - FATAL = .TRUE. - ELSE IF( NBCOL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )'NBCOL ', NBCOL( I ), NMAX - FATAL = .TRUE. - END IF - 160 CONTINUE - WRITE( NOUT, FMT = 9983 )'NBCOL:', - $ ( NBCOL( I ), I = 1, NPARMS ) - ELSE - DO 170 I = 1, NPARMS - NBCOL( I ) = 1 - 170 CONTINUE - END IF - END IF -* -* Calculate and print the machine dependent constants. -* - WRITE( NOUT, FMT = * ) - EPS = DLAMCH( 'Underflow threshold' ) - WRITE( NOUT, FMT = 9981 )'underflow', EPS - EPS = DLAMCH( 'Overflow threshold' ) - WRITE( NOUT, FMT = 9981 )'overflow ', EPS - EPS = DLAMCH( 'Epsilon' ) - WRITE( NOUT, FMT = 9981 )'precision', EPS -* -* Read the threshold value for the test ratios. -* - READ( NIN, FMT = * )THRESH - WRITE( NOUT, FMT = 9982 )THRESH - IF( SEP .OR. SVD .OR. DGG ) THEN -* -* Read the flag that indicates whether to test LAPACK routines. -* - READ( NIN, FMT = * )TSTCHK -* -* Read the flag that indicates whether to test driver routines. -* - READ( NIN, FMT = * )TSTDRV - END IF -* -* Read the flag that indicates whether to test the error exits. -* - READ( NIN, FMT = * )TSTERR -* -* Read the code describing how to set the random number seed. -* - READ( NIN, FMT = * )NEWSD -* -* If NEWSD = 2, read another line with 4 integers for the seed. -* - IF( NEWSD.EQ.2 ) - $ READ( NIN, FMT = * )( IOLDSD( I ), I = 1, 4 ) -* - DO 180 I = 1, 4 - ISEED( I ) = IOLDSD( I ) - 180 CONTINUE -* - IF( FATAL ) THEN - WRITE( NOUT, FMT = 9999 ) - STOP - END IF -* -* Read the input lines indicating the test path and its parameters. -* The first three characters indicate the test path, and the number -* of test matrix types must be the first nonblank item in columns -* 4-80. -* - 190 CONTINUE -* - IF( .NOT.( DGX .OR. DXV ) ) THEN -* - 200 CONTINUE - READ( NIN, FMT = '(A80)', END = 380 )LINE - C3 = LINE( 1: 3 ) - LENP = LEN( LINE ) - I = 3 - ITMP = 0 - I1 = 0 - 210 CONTINUE - I = I + 1 - IF( I.GT.LENP ) THEN - IF( I1.GT.0 ) THEN - GO TO 240 - ELSE - NTYPES = MAXT - GO TO 240 - END IF - END IF - IF( LINE( I: I ).NE.' ' .AND. LINE( I: I ).NE.',' ) THEN - I1 = I - C1 = LINE( I1: I1 ) -* -* Check that a valid integer was read -* - DO 220 K = 1, 10 - IF( C1.EQ.INTSTR( K: K ) ) THEN - IC = K - 1 - GO TO 230 - END IF - 220 CONTINUE - WRITE( NOUT, FMT = 9991 )I, LINE - GO TO 200 - 230 CONTINUE - ITMP = 10*ITMP + IC - GO TO 210 - ELSE IF( I1.GT.0 ) THEN - GO TO 240 - ELSE - GO TO 210 - END IF - 240 CONTINUE - NTYPES = ITMP -* -* Skip the tests if NTYPES is <= 0. -* - IF( .NOT.( DEV .OR. DES .OR. DVX .OR. DSX .OR. DGV .OR. - $ DGS ) .AND. NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - GO TO 200 - END IF -* - ELSE - IF( DXV ) - $ C3 = 'DXV' - IF( DGX ) - $ C3 = 'DGX' - END IF -* -* Reset the random number seed. -* - IF( NEWSD.EQ.0 ) THEN - DO 250 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 250 CONTINUE - END IF -* - IF( LSAMEN( 3, C3, 'DHS' ) .OR. LSAMEN( 3, C3, 'NEP' ) ) THEN -* -* ------------------------------------- -* NEP: Nonsymmetric Eigenvalue Problem -* ------------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* NS = number of shifts -* MAXB = minimum submatrix size -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL DERRHS( 'DHSEQR', NOUT ) - DO 270 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) - CALL XLAENV(12, MAX( 11, INMIN( I ) ) ) - CALL XLAENV(13, INWIN( I ) ) - CALL XLAENV(14, INIBL( I ) ) - CALL XLAENV(15, ISHFTS( I ) ) - CALL XLAENV(16, IACC22( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 260 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 260 CONTINUE - END IF - WRITE( NOUT, FMT = 9961 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ), MAX( 11, INMIN(I)), - $ INWIN( I ), INIBL( I ), ISHFTS( I ), IACC22( I ) - CALL DCHKHS( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 5 ), NMAX, A( 1, 6 ), - $ A( 1, 7 ), D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), - $ D( 1, 4 ), D( 1, 5 ), D( 1, 6 ), A( 1, 8 ), - $ A( 1, 9 ), A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), - $ D( 1, 7 ), WORK, LWORK, IWORK, LOGWRK, RESULT, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DCHKHS', INFO - 270 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'DST' ) .OR. LSAMEN( 3, C3, 'SEP' ) - $ .OR. LSAMEN( 3, C3, 'SE2' ) ) THEN -* -* ---------------------------------- -* SEP: Symmetric Eigenvalue Problem -* ---------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 1, 1 ) - CALL XLAENV( 9, 25 ) - IF( TSTERR ) - $ CALL DERRST( 'DST', NOUT ) - DO 290 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 280 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 280 CONTINUE - END IF - WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ) - IF( TSTCHK ) THEN - IF( LSAMEN( 3, C3, 'SE2' ) ) THEN - CALL DCHKST2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), - $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), - $ D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), D( 1, 9 ), - $ D( 1, 10 ), D( 1, 11 ), A( 1, 3 ), NMAX, - $ A( 1, 4 ), A( 1, 5 ), D( 1, 12 ), A( 1, 6 ), - $ WORK, LWORK, IWORK, LIWORK, RESULT, INFO ) - ELSE - CALL DCHKST( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), - $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), - $ D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), D( 1, 9 ), - $ D( 1, 10 ), D( 1, 11 ), A( 1, 3 ), NMAX, - $ A( 1, 4 ), A( 1, 5 ), D( 1, 12 ), A( 1, 6 ), - $ WORK, LWORK, IWORK, LIWORK, RESULT, INFO ) - ENDIF - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DCHKST', INFO - END IF - IF( TSTDRV ) THEN - IF( LSAMEN( 3, C3, 'SE2' ) ) THEN - CALL DDRVST2STG( NN, NVAL, 18, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, D( 1, 3 ), D( 1, 4 ), - $ D( 1, 5 ), D( 1, 6 ), D( 1, 8 ), D( 1, 9 ), - $ D( 1, 10 ), D( 1, 11 ), A( 1, 2 ), NMAX, - $ A( 1, 3 ), D( 1, 12 ), A( 1, 4 ), WORK, - $ LWORK, IWORK, LIWORK, RESULT, INFO ) - ELSE - CALL DDRVST( NN, NVAL, 18, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, D( 1, 3 ), D( 1, 4 ), - $ D( 1, 5 ), D( 1, 6 ), D( 1, 8 ), D( 1, 9 ), - $ D( 1, 10 ), D( 1, 11 ), A( 1, 2 ), NMAX, - $ A( 1, 3 ), D( 1, 12 ), A( 1, 4 ), WORK, - $ LWORK, IWORK, LIWORK, RESULT, INFO ) - ENDIF - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DDRVST', INFO - END IF - 290 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'DSG' ) ) THEN -* -* ---------------------------------------------- -* DSG: Symmetric Generalized Eigenvalue Problem -* ---------------------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 9, 25 ) - DO 310 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 300 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 300 CONTINUE - END IF - WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ) - IF( TSTCHK ) THEN -* CALL DDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, -* $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, -* $ D( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), -* $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, -* $ LWORK, IWORK, LIWORK, RESULT, INFO ) - CALL DDRVSG2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, - $ D( 1, 3 ), D( 1, 3 ), A( 1, 3 ), NMAX, - $ A( 1, 4 ), A( 1, 5 ), A( 1, 6 ), - $ A( 1, 7 ), WORK, LWORK, IWORK, LIWORK, - $ RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DDRVSG', INFO - END IF - 310 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'DBD' ) .OR. LSAMEN( 3, C3, 'SVD' ) ) THEN -* -* ---------------------------------- -* SVD: Singular Value Decomposition -* ---------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* NRHS = number of right hand sides -* - MAXTYP = 16 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 1, 1 ) - CALL XLAENV( 9, 25 ) -* -* Test the error exits -* - IF( TSTERR .AND. TSTCHK ) - $ CALL DERRBD( 'DBD', NOUT ) - IF( TSTERR .AND. TSTDRV ) - $ CALL DERRED( 'DBD', NOUT ) -* - DO 330 I = 1, NPARMS - NRHS = NSVAL( I ) - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) - IF( NEWSD.EQ.0 ) THEN - DO 320 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 320 CONTINUE - END IF - WRITE( NOUT, FMT = 9995 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ), NRHS - IF( TSTCHK ) THEN - CALL DCHKBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, NRHS, ISEED, - $ THRESH, A( 1, 1 ), NMAX, D( 1, 1 ), - $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), A( 1, 2 ), - $ NMAX, A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), NMAX, - $ A( 1, 6 ), NMAX, A( 1, 7 ), A( 1, 8 ), WORK, - $ LWORK, IWORK, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DCHKBD', INFO - END IF - IF( TSTDRV ) - $ CALL DDRVBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, ISEED, - $ THRESH, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, - $ A( 1, 3 ), NMAX, A( 1, 4 ), A( 1, 5 ), - $ A( 1, 6 ), D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), - $ WORK, LWORK, IWORK, NOUT, INFO ) - 330 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'DEV' ) ) THEN -* -* -------------------------------------------- -* DEV: Nonsymmetric Eigenvalue Problem Driver -* DGEEV (eigenvalues and eigenvectors) -* -------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL DERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL DDRVEV( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), - $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), A( 1, 3 ), - $ NMAX, A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, RESULT, - $ WORK, LWORK, IWORK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DGEEV', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'DES' ) ) THEN -* -* -------------------------------------------- -* DES: Nonsymmetric Eigenvalue Problem Driver -* DGEES (Schur form) -* -------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL DERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL DDRVES( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), - $ A( 1, 4 ), NMAX, RESULT, WORK, LWORK, IWORK, - $ LOGWRK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DGEES', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'DVX' ) ) THEN -* -* -------------------------------------------------------------- -* DVX: Nonsymmetric Eigenvalue Problem Expert Driver -* DGEEVX (eigenvalues, eigenvectors and condition numbers) -* -------------------------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL DERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL DDRVVX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), - $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), A( 1, 3 ), - $ NMAX, A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, - $ D( 1, 5 ), D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), - $ D( 1, 9 ), D( 1, 10 ), D( 1, 11 ), D( 1, 12 ), - $ RESULT, WORK, LWORK, IWORK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DGEEVX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'DSX' ) ) THEN -* -* --------------------------------------------------- -* DSX: Nonsymmetric Eigenvalue Problem Expert Driver -* DGEESX (Schur form and condition numbers) -* --------------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL DERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL DDRVSX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), - $ D( 1, 5 ), D( 1, 6 ), A( 1, 4 ), NMAX, - $ A( 1, 5 ), RESULT, WORK, LWORK, IWORK, LOGWRK, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DGEESX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'DGG' ) ) THEN -* -* ------------------------------------------------- -* DGG: Generalized Nonsymmetric Eigenvalue Problem -* ------------------------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NS = number of shifts -* MAXB = minimum submatrix size -* IACC22: structured matrix multiply -* NBCOL = minimum column dimension for blocks -* - MAXTYP = 26 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV(1,1) - IF( TSTCHK .AND. TSTERR ) - $ CALL DERRGG( C3, NOUT ) - DO 350 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 4, NSVAL( I ) ) - CALL XLAENV( 8, MXBVAL( I ) ) - CALL XLAENV( 16, IACC22( I ) ) - CALL XLAENV( 5, NBCOL( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 340 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 340 CONTINUE - END IF - WRITE( NOUT, FMT = 9996 )C3, NBVAL( I ), NBMIN( I ), - $ NSVAL( I ), MXBVAL( I ), IACC22( I ), NBCOL( I ) - TSTDIF = .FALSE. - THRSHN = 10.D0 - IF( TSTCHK ) THEN - CALL DCHKGG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ TSTDIF, THRSHN, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), - $ A( 1, 6 ), A( 1, 7 ), A( 1, 8 ), A( 1, 9 ), - $ NMAX, A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), - $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), - $ D( 1, 5 ), D( 1, 6 ), A( 1, 13 ), - $ A( 1, 14 ), WORK, LWORK, LOGWRK, RESULT, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DCHKGG', INFO - END IF - 350 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'DGS' ) ) THEN -* -* ------------------------------------------------- -* DGS: Generalized Nonsymmetric Eigenvalue Problem -* DGGES (Schur form) -* ------------------------------------------------- -* - MAXTYP = 26 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL DERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL DDRGES( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), WORK, LWORK, - $ RESULT, LOGWRK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DDRGES', INFO -* -* Blocked version -* - CALL XLAENV(16, 2) - CALL DDRGES3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), WORK, LWORK, - $ RESULT, LOGWRK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DDRGES3', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( DGX ) THEN -* -* ------------------------------------------------- -* DGX: Generalized Nonsymmetric Eigenvalue Problem -* DGGESX (Schur form and condition numbers) -* ------------------------------------------------- -* - MAXTYP = 5 - NTYPES = MAXTYP - IF( NN.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL DERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 5, 2 ) - CALL DDRGSX( NN, NCMAX, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), - $ A( 1, 6 ), D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), - $ C( 1, 1 ), NCMAX*NCMAX, A( 1, 12 ), WORK, - $ LWORK, IWORK, LIWORK, LOGWRK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DDRGSX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'DGV' ) ) THEN -* -* ------------------------------------------------- -* DGV: Generalized Nonsymmetric Eigenvalue Problem -* DGGEV (Eigenvalue/vector form) -* ------------------------------------------------- -* - MAXTYP = 26 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL DERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL DDRGEV( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ A( 1, 9 ), NMAX, D( 1, 1 ), D( 1, 2 ), - $ D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), D( 1, 6 ), - $ WORK, LWORK, RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DDRGEV', INFO -* -* Blocked version -* - CALL DDRGEV3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ A( 1, 9 ), NMAX, D( 1, 1 ), D( 1, 2 ), - $ D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), D( 1, 6 ), - $ WORK, LWORK, RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DDRGEV3', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( DXV ) THEN -* -* ------------------------------------------------- -* DXV: Generalized Nonsymmetric Eigenvalue Problem -* DGGEVX (eigenvalue/vector with condition numbers) -* ------------------------------------------------- -* - MAXTYP = 2 - NTYPES = MAXTYP - IF( NN.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL DERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL DDRGVX( NN, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), D( 1, 1 ), - $ D( 1, 2 ), D( 1, 3 ), A( 1, 5 ), A( 1, 6 ), - $ IWORK( 1 ), IWORK( 2 ), D( 1, 4 ), D( 1, 5 ), - $ D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), D( 1, 9 ), - $ WORK, LWORK, IWORK( 3 ), LIWORK-2, RESULT, - $ LOGWRK, INFO ) -* - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DDRGVX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'DSB' ) ) THEN -* -* ------------------------------ -* DSB: Symmetric Band Reduction -* ------------------------------ -* - MAXTYP = 15 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - IF( TSTERR ) - $ CALL DERRST( 'DSB', NOUT ) -* CALL DCHKSB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, -* $ NOUT, A( 1, 1 ), NMAX, D( 1, 1 ), D( 1, 2 ), -* $ A( 1, 2 ), NMAX, WORK, LWORK, RESULT, INFO ) - CALL DCHKSB2STG( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, - $ THRESH, NOUT, A( 1, 1 ), NMAX, D( 1, 1 ), - $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), - $ A( 1, 2 ), NMAX, WORK, LWORK, RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DCHKSB', INFO -* - ELSE IF( LSAMEN( 3, C3, 'DBB' ) ) THEN -* -* ------------------------------ -* DBB: General Band Reduction -* ------------------------------ -* - MAXTYP = 15 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - DO 370 I = 1, NPARMS - NRHS = NSVAL( I ) -* - IF( NEWSD.EQ.0 ) THEN - DO 360 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 360 CONTINUE - END IF - WRITE( NOUT, FMT = 9966 )C3, NRHS - CALL DCHKBB( NN, MVAL, NVAL, NK, KVAL, MAXTYP, DOTYPE, NRHS, - $ ISEED, THRESH, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), 2*NMAX, D( 1, 1 ), D( 1, 2 ), - $ A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, A( 1, 6 ), - $ NMAX, A( 1, 7 ), WORK, LWORK, RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DCHKBB', INFO - 370 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'GLM' ) ) THEN -* -* ----------------------------------------- -* GLM: Generalized Linear Regression Model -* ----------------------------------------- -* - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL DERRGG( 'GLM', NOUT ) - CALL DCKGLM( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, - $ WORK, D( 1, 1 ), NIN, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DCKGLM', INFO -* - ELSE IF( LSAMEN( 3, C3, 'GQR' ) ) THEN -* -* ------------------------------------------ -* GQR: Generalized QR and RQ factorizations -* ------------------------------------------ -* - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL DERRGG( 'GQR', NOUT ) - CALL DCKGQR( NN, MVAL, NN, PVAL, NN, NVAL, NTYPES, ISEED, - $ THRESH, NMAX, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), TAUA, B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), - $ B( 1, 4 ), B( 1, 5 ), TAUB, WORK, D( 1, 1 ), NIN, - $ NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DCKGQR', INFO -* - ELSE IF( LSAMEN( 3, C3, 'GSV' ) ) THEN -* -* ---------------------------------------------- -* GSV: Generalized Singular Value Decomposition -* ---------------------------------------------- -* - CALL XLAENV(1,1) - IF( TSTERR ) - $ CALL DERRGG( 'GSV', NOUT ) - CALL DCKGSV( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), - $ A( 1, 3 ), B( 1, 3 ), A( 1, 4 ), TAUA, TAUB, - $ B( 1, 4 ), IWORK, WORK, D( 1, 1 ), NIN, NOUT, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DCKGSV', INFO -* - ELSE IF( LSAMEN( 3, C3, 'CSD' ) ) THEN -* -* ---------------------------------------------- -* CSD: CS Decomposition -* ---------------------------------------------- -* - CALL XLAENV(1,1) - IF( TSTERR ) - $ CALL DERRGG( 'CSD', NOUT ) - CALL DCKCSD( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), - $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), IWORK, WORK, - $ D( 1, 1 ), NIN, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DCKCSD', INFO -* - ELSE IF( LSAMEN( 3, C3, 'LSE' ) ) THEN -* -* -------------------------------------- -* LSE: Constrained Linear Least Squares -* -------------------------------------- -* - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL DERRGG( 'LSE', NOUT ) - CALL DCKLSE( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, - $ WORK, D( 1, 1 ), NIN, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'DCKLSE', INFO -* - ELSE - WRITE( NOUT, FMT = * ) - WRITE( NOUT, FMT = * ) - WRITE( NOUT, FMT = 9992 )C3 - END IF - IF( .NOT.( DGX .OR. DXV ) ) - $ GO TO 190 - 380 CONTINUE - WRITE( NOUT, FMT = 9994 ) - S2 = DSECND( ) - WRITE( NOUT, FMT = 9993 )S2 - S1 -* - 9999 FORMAT( / ' Execution not attempted due to input errors' ) - 9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 ) - 9996 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NS =', I4, - $ ', MAXB =', I4, ', IACC22 =', I4, ', NBCOL =', I4 ) - 9995 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, - $ ', NRHS =', I4 ) - 9994 FORMAT( / / ' End of tests' ) - 9993 FORMAT( ' Total time used = ', F12.2, ' seconds', / ) - 9992 FORMAT( 1X, A3, ': Unrecognized path name' ) - 9991 FORMAT( / / ' *** Invalid integer value in column ', I2, - $ ' of input', ' line:', / A79 ) - 9990 FORMAT( / / 1X, A3, ' routines were not tested' ) - 9989 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be >=', - $ I6 ) - 9988 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be <=', - $ I6 ) - 9987 FORMAT( ' Tests of the Nonsymmetric Eigenvalue Problem routines' ) - 9986 FORMAT( ' Tests of the Symmetric Eigenvalue Problem routines' ) - 9985 FORMAT( ' Tests of the Singular Value Decomposition routines' ) - 9984 FORMAT( / ' The following parameter values will be used:' ) - 9983 FORMAT( 4X, A, 10I6, / 10X, 10I6 ) - 9982 FORMAT( / ' Routines pass computational tests if test ratio is ', - $ 'less than', F8.2, / ) - 9981 FORMAT( ' Relative machine ', A, ' is taken to be', D16.6 ) - 9980 FORMAT( ' *** Error code from ', A, ' = ', I4 ) - 9979 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', - $ / ' DGEEV (eigenvalues and eigevectors)' ) - 9978 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', - $ / ' DGEES (Schur form)' ) - 9977 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', - $ ' Driver', / ' DGEEVX (eigenvalues, eigenvectors and', - $ ' condition numbers)' ) - 9976 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', - $ ' Driver', / ' DGEESX (Schur form and condition', - $ ' numbers)' ) - 9975 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem routines' ) - 9974 FORMAT( ' Tests of DSBTRD', / ' (reduction of a symmetric band ', - $ 'matrix to tridiagonal form)' ) - 9973 FORMAT( / 1X, 71( '-' ) ) - 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I1, '.', I1 ) - 9971 FORMAT( / ' Tests of the Generalized Linear Regression Model ', - $ 'routines' ) - 9970 FORMAT( / ' Tests of the Generalized QR and RQ routines' ) - 9969 FORMAT( / ' Tests of the Generalized Singular Value', - $ ' Decomposition routines' ) - 9968 FORMAT( / ' Tests of the Linear Least Squares routines' ) - 9967 FORMAT( ' Tests of DGBBRD', / ' (reduction of a general band ', - $ 'matrix to real bidiagonal form)' ) - 9966 FORMAT( / / 1X, A3, ': NRHS =', I4 ) - 9965 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Expert Driver DGGESX' ) - 9964 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Driver DGGES' ) - 9963 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Driver DGGEV' ) - 9962 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Expert Driver DGGEVX' ) - 9961 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, - $ ', INMIN=', I4, - $ ', INWIN =', I4, ', INIBL =', I4, ', ISHFTS =', I4, - $ ', IACC22 =', I4) - 9960 FORMAT( / ' Tests of the CS Decomposition routines' ) -* -* End of DCHKEE -* - END From 0e96c378fde1e9587dcfec35af221ee8cc3c90cb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 28 Feb 2021 18:46:52 +0100 Subject: [PATCH 107/134] Delete schkee.f --- lapack-netlib/TESTING/EIG/schkee.f | 2510 ---------------------------- 1 file changed, 2510 deletions(-) delete mode 100644 lapack-netlib/TESTING/EIG/schkee.f diff --git a/lapack-netlib/TESTING/EIG/schkee.f b/lapack-netlib/TESTING/EIG/schkee.f deleted file mode 100644 index 3757e0655..000000000 --- a/lapack-netlib/TESTING/EIG/schkee.f +++ /dev/null @@ -1,2510 +0,0 @@ -*> \brief \b SCHKEE -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* PROGRAM SCHKEE -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> SCHKEE tests the REAL LAPACK subroutines for the matrix -*> eigenvalue problem. The test paths in this version are -*> -*> NEP (Nonsymmetric Eigenvalue Problem): -*> Test SGEHRD, SORGHR, SHSEQR, STREVC, SHSEIN, and SORMHR -*> -*> SEP (Symmetric Eigenvalue Problem): -*> Test SSYTRD, SORGTR, SSTEQR, SSTERF, SSTEIN, SSTEDC, -*> and drivers SSYEV(X), SSBEV(X), SSPEV(X), SSTEV(X), -*> SSYEVD, SSBEVD, SSPEVD, SSTEVD -*> -*> SVD (Singular Value Decomposition): -*> Test SGEBRD, SORGBR, SBDSQR, SBDSDC -*> and the drivers SGESVD, SGESDD -*> -*> SEV (Nonsymmetric Eigenvalue/eigenvector Driver): -*> Test SGEEV -*> -*> SES (Nonsymmetric Schur form Driver): -*> Test SGEES -*> -*> SVX (Nonsymmetric Eigenvalue/eigenvector Expert Driver): -*> Test SGEEVX -*> -*> SSX (Nonsymmetric Schur form Expert Driver): -*> Test SGEESX -*> -*> SGG (Generalized Nonsymmetric Eigenvalue Problem): -*> Test SGGHD3, SGGBAL, SGGBAK, SHGEQZ, and STGEVC -*> -*> SGS (Generalized Nonsymmetric Schur form Driver): -*> Test SGGES -*> -*> SGV (Generalized Nonsymmetric Eigenvalue/eigenvector Driver): -*> Test SGGEV -*> -*> SGX (Generalized Nonsymmetric Schur form Expert Driver): -*> Test SGGESX -*> -*> SXV (Generalized Nonsymmetric Eigenvalue/eigenvector Expert Driver): -*> Test SGGEVX -*> -*> SSG (Symmetric Generalized Eigenvalue Problem): -*> Test SSYGST, SSYGV, SSYGVD, SSYGVX, SSPGST, SSPGV, SSPGVD, -*> SSPGVX, SSBGST, SSBGV, SSBGVD, and SSBGVX -*> -*> SSB (Symmetric Band Eigenvalue Problem): -*> Test SSBTRD -*> -*> SBB (Band Singular Value Decomposition): -*> Test SGBBRD -*> -*> SEC (Eigencondition estimation): -*> Test SLALN2, SLASY2, SLAEQU, SLAEXC, STRSYL, STREXC, STRSNA, -*> STRSEN, and SLAQTR -*> -*> SBL (Balancing a general matrix) -*> Test SGEBAL -*> -*> SBK (Back transformation on a balanced matrix) -*> Test SGEBAK -*> -*> SGL (Balancing a matrix pair) -*> Test SGGBAL -*> -*> SGK (Back transformation on a matrix pair) -*> Test SGGBAK -*> -*> GLM (Generalized Linear Regression Model): -*> Tests SGGGLM -*> -*> GQR (Generalized QR and RQ factorizations): -*> Tests SGGQRF and SGGRQF -*> -*> GSV (Generalized Singular Value Decomposition): -*> Tests SGGSVD, SGGSVP, STGSJA, SLAGS2, SLAPLL, and SLAPMT -*> -*> CSD (CS decomposition): -*> Tests SORCSD -*> -*> LSE (Constrained Linear Least Squares): -*> Tests SGGLSE -*> -*> Each test path has a different set of inputs, but the data sets for -*> the driver routines xEV, xES, xVX, and xSX can be concatenated in a -*> single input file. The first line of input should contain one of the -*> 3-character path names in columns 1-3. The number of remaining lines -*> depends on what is found on the first line. -*> -*> The number of matrix types used in testing is often controllable from -*> the input file. The number of matrix types for each path, and the -*> test routine that describes them, is as follows: -*> -*> Path name(s) Types Test routine -*> -*> SHS or NEP 21 SCHKHS -*> SST or SEP 21 SCHKST (routines) -*> 18 SDRVST (drivers) -*> SBD or SVD 16 SCHKBD (routines) -*> 5 SDRVBD (drivers) -*> SEV 21 SDRVEV -*> SES 21 SDRVES -*> SVX 21 SDRVVX -*> SSX 21 SDRVSX -*> SGG 26 SCHKGG (routines) -*> SGS 26 SDRGES -*> SGX 5 SDRGSX -*> SGV 26 SDRGEV -*> SXV 2 SDRGVX -*> SSG 21 SDRVSG -*> SSB 15 SCHKSB -*> SBB 15 SCHKBB -*> SEC - SCHKEC -*> SBL - SCHKBL -*> SBK - SCHKBK -*> SGL - SCHKGL -*> SGK - SCHKGK -*> GLM 8 SCKGLM -*> GQR 8 SCKGQR -*> GSV 8 SCKGSV -*> CSD 3 SCKCSD -*> LSE 8 SCKLSE -*> -*>----------------------------------------------------------------------- -*> -*> NEP input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NPARMS, INTEGER -*> Number of values of the parameters NB, NBMIN, NX, NS, and -*> MAXB. -*> -*> line 5: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 6: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for the minimum blocksize NBMIN. -*> -*> line 7: NXVAL, INTEGER array, dimension (NPARMS) -*> The values for the crossover point NX. -*> -*> line 8: INMIN, INTEGER array, dimension (NPARMS) -*> LAHQR vs TTQRE crossover point, >= 11 -*> -*> line 9: INWIN, INTEGER array, dimension (NPARMS) -*> recommended deflation window size -*> -*> line 10: INIBL, INTEGER array, dimension (NPARMS) -*> nibble crossover point -*> -*> line 11: ISHFTS, INTEGER array, dimension (NPARMS) -*> number of simultaneous shifts) -*> -*> line 12: IACC22, INTEGER array, dimension (NPARMS) -*> select structured matrix multiply: 0, 1 or 2) -*> -*> line 13: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. To have all of the test -*> ratios printed, use THRESH = 0.0 . -*> -*> line 14: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 14 was 2: -*> -*> line 15: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 15-EOF: The remaining lines occur in sets of 1 or 2 and allow -*> the user to specify the matrix types. Each line contains -*> a 3-character path name in columns 1-3, and the number -*> of matrix types must be the first nonblank item in columns -*> 4-80. If the number of matrix types is at least 1 but is -*> less than the maximum number of possible types, a second -*> line will be read to get the numbers of the matrix types to -*> be used. For example, -*> NEP 21 -*> requests all of the matrix types for the nonsymmetric -*> eigenvalue problem, while -*> NEP 4 -*> 9 10 11 12 -*> requests only matrices of type 9, 10, 11, and 12. -*> -*> The valid 3-character path names are 'NEP' or 'SHS' for the -*> nonsymmetric eigenvalue routines. -*> -*>----------------------------------------------------------------------- -*> -*> SEP or SSG input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NPARMS, INTEGER -*> Number of values of the parameters NB, NBMIN, and NX. -*> -*> line 5: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 6: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for the minimum blocksize NBMIN. -*> -*> line 7: NXVAL, INTEGER array, dimension (NPARMS) -*> The values for the crossover point NX. -*> -*> line 8: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 9: TSTCHK, LOGICAL -*> Flag indicating whether or not to test the LAPACK routines. -*> -*> line 10: TSTDRV, LOGICAL -*> Flag indicating whether or not to test the driver routines. -*> -*> line 11: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 12: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 12 was 2: -*> -*> line 13: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 13-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path names are 'SEP' or 'SST' for the -*> symmetric eigenvalue routines and driver routines, and -*> 'SSG' for the routines for the symmetric generalized -*> eigenvalue problem. -*> -*>----------------------------------------------------------------------- -*> -*> SVD input file: -*> -*> line 2: NN, INTEGER -*> Number of values of M and N. -*> -*> line 3: MVAL, INTEGER array, dimension (NN) -*> The values for the matrix row dimension M. -*> -*> line 4: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix column dimension N. -*> -*> line 5: NPARMS, INTEGER -*> Number of values of the parameter NB, NBMIN, NX, and NRHS. -*> -*> line 6: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 7: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for the minimum blocksize NBMIN. -*> -*> line 8: NXVAL, INTEGER array, dimension (NPARMS) -*> The values for the crossover point NX. -*> -*> line 9: NSVAL, INTEGER array, dimension (NPARMS) -*> The values for the number of right hand sides NRHS. -*> -*> line 10: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 11: TSTCHK, LOGICAL -*> Flag indicating whether or not to test the LAPACK routines. -*> -*> line 12: TSTDRV, LOGICAL -*> Flag indicating whether or not to test the driver routines. -*> -*> line 13: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 14: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 14 was 2: -*> -*> line 15: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 15-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path names are 'SVD' or 'SBD' for both the -*> SVD routines and the SVD driver routines. -*> -*>----------------------------------------------------------------------- -*> -*> SEV and SES data files: -*> -*> line 1: 'SEV' or 'SES' in columns 1 to 3. -*> -*> line 2: NSIZES, INTEGER -*> Number of sizes of matrices to use. Should be at least 0 -*> and at most 20. If NSIZES = 0, no testing is done -*> (although the remaining 3 lines are still read). -*> -*> line 3: NN, INTEGER array, dimension(NSIZES) -*> Dimensions of matrices to be tested. -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHSEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 5: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> If it is 0., all test case data will be printed. -*> -*> line 6: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits. -*> -*> line 7: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 7 was 2: -*> -*> line 8: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9 and following: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'SEV' to test SGEEV, or -*> 'SES' to test SGEES. -*> -*>----------------------------------------------------------------------- -*> -*> The SVX data has two parts. The first part is identical to SEV, -*> and the second part consists of test matrices with precomputed -*> solutions. -*> -*> line 1: 'SVX' in columns 1-3. -*> -*> line 2: NSIZES, INTEGER -*> If NSIZES = 0, no testing of randomly generated examples -*> is done, but any precomputed examples are tested. -*> -*> line 3: NN, INTEGER array, dimension(NSIZES) -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> -*> line 5: THRESH, REAL -*> -*> line 6: TSTERR, LOGICAL -*> -*> line 7: NEWSD, INTEGER -*> -*> If line 7 was 2: -*> -*> line 8: INTEGER array, dimension (4) -*> -*> lines 9 and following: The first line contains 'SVX' in columns 1-3 -*> followed by the number of matrix types, possibly with -*> a second line to specify certain matrix types. -*> If the number of matrix types = 0, no testing of randomly -*> generated examples is done, but any precomputed examples -*> are tested. -*> -*> remaining lines : Each matrix is stored on 1+2*N lines, where N is -*> its dimension. The first line contains the dimension (a -*> single integer). The next N lines contain the matrix, one -*> row per line. The last N lines correspond to each -*> eigenvalue. Each of these last N lines contains 4 real -*> values: the real part of the eigenvalue, the imaginary -*> part of the eigenvalue, the reciprocal condition number of -*> the eigenvalues, and the reciprocal condition number of the -*> eigenvector. The end of data is indicated by dimension N=0. -*> Even if no data is to be tested, there must be at least one -*> line containing N=0. -*> -*>----------------------------------------------------------------------- -*> -*> The SSX data is like SVX. The first part is identical to SEV, and the -*> second part consists of test matrices with precomputed solutions. -*> -*> line 1: 'SSX' in columns 1-3. -*> -*> line 2: NSIZES, INTEGER -*> If NSIZES = 0, no testing of randomly generated examples -*> is done, but any precomputed examples are tested. -*> -*> line 3: NN, INTEGER array, dimension(NSIZES) -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> -*> line 5: THRESH, REAL -*> -*> line 6: TSTERR, LOGICAL -*> -*> line 7: NEWSD, INTEGER -*> -*> If line 7 was 2: -*> -*> line 8: INTEGER array, dimension (4) -*> -*> lines 9 and following: The first line contains 'SSX' in columns 1-3 -*> followed by the number of matrix types, possibly with -*> a second line to specify certain matrix types. -*> If the number of matrix types = 0, no testing of randomly -*> generated examples is done, but any precomputed examples -*> are tested. -*> -*> remaining lines : Each matrix is stored on 3+N lines, where N is its -*> dimension. The first line contains the dimension N and the -*> dimension M of an invariant subspace. The second line -*> contains M integers, identifying the eigenvalues in the -*> invariant subspace (by their position in a list of -*> eigenvalues ordered by increasing real part). The next N -*> lines contain the matrix. The last line contains the -*> reciprocal condition number for the average of the selected -*> eigenvalues, and the reciprocal condition number for the -*> corresponding right invariant subspace. The end of data is -*> indicated by a line containing N=0 and M=0. Even if no data -*> is to be tested, there must be at least one line containing -*> N=0 and M=0. -*> -*>----------------------------------------------------------------------- -*> -*> SGG input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NPARMS, INTEGER -*> Number of values of the parameters NB, NBMIN, NS, MAXB, and -*> NBCOL. -*> -*> line 5: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 6: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for NBMIN, the minimum row dimension for blocks. -*> -*> line 7: NSVAL, INTEGER array, dimension (NPARMS) -*> The values for the number of shifts. -*> -*> line 8: MXBVAL, INTEGER array, dimension (NPARMS) -*> The values for MAXB, used in determining minimum blocksize. -*> -*> line 9: IACC22, INTEGER array, dimension (NPARMS) -*> select structured matrix multiply: 1 or 2) -*> -*> line 10: NBCOL, INTEGER array, dimension (NPARMS) -*> The values for NBCOL, the minimum column dimension for -*> blocks. -*> -*> line 11: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 12: TSTCHK, LOGICAL -*> Flag indicating whether or not to test the LAPACK routines. -*> -*> line 13: TSTDRV, LOGICAL -*> Flag indicating whether or not to test the driver routines. -*> -*> line 14: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 15: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 15 was 2: -*> -*> line 16: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 17-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'SGG' for the generalized -*> eigenvalue problem routines and driver routines. -*> -*>----------------------------------------------------------------------- -*> -*> SGS and SGV input files: -*> -*> line 1: 'SGS' or 'SGV' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension(NN) -*> Dimensions of matrices to be tested. -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHGEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 5: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> If it is 0., all test case data will be printed. -*> -*> line 6: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits. -*> -*> line 7: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 17 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 7-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'SGS' for the generalized -*> eigenvalue problem routines and driver routines. -*> -*>----------------------------------------------------------------------- -*> -*> SXV input files: -*> -*> line 1: 'SXV' in columns 1 to 3. -*> -*> line 2: N, INTEGER -*> Value of N. -*> -*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHGEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 4: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> Information will be printed about each test for which the -*> test ratio is greater than or equal to the threshold. -*> -*> line 5: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 6: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 6 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> If line 2 was 0: -*> -*> line 7-EOF: Precomputed examples are tested. -*> -*> remaining lines : Each example is stored on 3+2*N lines, where N is -*> its dimension. The first line contains the dimension (a -*> single integer). The next N lines contain the matrix A, one -*> row per line. The next N lines contain the matrix B. The -*> next line contains the reciprocals of the eigenvalue -*> condition numbers. The last line contains the reciprocals of -*> the eigenvector condition numbers. The end of data is -*> indicated by dimension N=0. Even if no data is to be tested, -*> there must be at least one line containing N=0. -*> -*>----------------------------------------------------------------------- -*> -*> SGX input files: -*> -*> line 1: 'SGX' in columns 1 to 3. -*> -*> line 2: N, INTEGER -*> Value of N. -*> -*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHGEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 4: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> Information will be printed about each test for which the -*> test ratio is greater than or equal to the threshold. -*> -*> line 5: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 6: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 6 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> If line 2 was 0: -*> -*> line 7-EOF: Precomputed examples are tested. -*> -*> remaining lines : Each example is stored on 3+2*N lines, where N is -*> its dimension. The first line contains the dimension (a -*> single integer). The next line contains an integer k such -*> that only the last k eigenvalues will be selected and appear -*> in the leading diagonal blocks of $A$ and $B$. The next N -*> lines contain the matrix A, one row per line. The next N -*> lines contain the matrix B. The last line contains the -*> reciprocal of the eigenvalue cluster condition number and the -*> reciprocal of the deflating subspace (associated with the -*> selected eigencluster) condition number. The end of data is -*> indicated by dimension N=0. Even if no data is to be tested, -*> there must be at least one line containing N=0. -*> -*>----------------------------------------------------------------------- -*> -*> SSB input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NK, INTEGER -*> Number of values of K. -*> -*> line 5: KVAL, INTEGER array, dimension (NK) -*> The values for the matrix dimension K. -*> -*> line 6: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 7 was 2: -*> -*> line 8: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 8-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'SSB'. -*> -*>----------------------------------------------------------------------- -*> -*> SBB input file: -*> -*> line 2: NN, INTEGER -*> Number of values of M and N. -*> -*> line 3: MVAL, INTEGER array, dimension (NN) -*> The values for the matrix row dimension M. -*> -*> line 4: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix column dimension N. -*> -*> line 4: NK, INTEGER -*> Number of values of K. -*> -*> line 5: KVAL, INTEGER array, dimension (NK) -*> The values for the matrix bandwidth K. -*> -*> line 6: NPARMS, INTEGER -*> Number of values of the parameter NRHS -*> -*> line 7: NSVAL, INTEGER array, dimension (NPARMS) -*> The values for the number of right hand sides NRHS. -*> -*> line 8: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 9: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 9 was 2: -*> -*> line 10: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 10-EOF: Lines specifying matrix types, as for SVD. -*> The 3-character path name is 'SBB'. -*> -*>----------------------------------------------------------------------- -*> -*> SEC input file: -*> -*> line 2: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> lines 3-EOF: -*> -*> Input for testing the eigencondition routines consists of a set of -*> specially constructed test cases and their solutions. The data -*> format is not intended to be modified by the user. -*> -*>----------------------------------------------------------------------- -*> -*> SBL and SBK input files: -*> -*> line 1: 'SBL' in columns 1-3 to test SGEBAL, or 'SBK' in -*> columns 1-3 to test SGEBAK. -*> -*> The remaining lines consist of specially constructed test cases. -*> -*>----------------------------------------------------------------------- -*> -*> SGL and SGK input files: -*> -*> line 1: 'SGL' in columns 1-3 to test SGGBAL, or 'SGK' in -*> columns 1-3 to test SGGBAK. -*> -*> The remaining lines consist of specially constructed test cases. -*> -*>----------------------------------------------------------------------- -*> -*> GLM data file: -*> -*> line 1: 'GLM' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M (row dimension). -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P (row dimension). -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N (column dimension), note M <= N <= M+P. -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GLM' for the generalized -*> linear regression model routines. -*> -*>----------------------------------------------------------------------- -*> -*> GQR data file: -*> -*> line 1: 'GQR' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M. -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P. -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N. -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GQR' for the generalized -*> QR and RQ routines. -*> -*>----------------------------------------------------------------------- -*> -*> GSV data file: -*> -*> line 1: 'GSV' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M (row dimension). -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P (row dimension). -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N (column dimension). -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GSV' for the generalized -*> SVD routines. -*> -*>----------------------------------------------------------------------- -*> -*> CSD data file: -*> -*> line 1: 'CSD' in columns 1 to 3. -*> -*> line 2: NM, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NM) -*> Values of M (row and column dimension of orthogonal matrix). -*> -*> line 4: PVAL, INTEGER array, dimension(NM) -*> Values of P (row dimension of top-left block). -*> -*> line 5: NVAL, INTEGER array, dimension(NM) -*> Values of N (column dimension of top-left block). -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'CSD' for the CSD routine. -*> -*>----------------------------------------------------------------------- -*> -*> LSE data file: -*> -*> line 1: 'LSE' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M. -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P. -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N, note P <= N <= P+M. -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GSV' for the generalized -*> SVD routines. -*> -*>----------------------------------------------------------------------- -*> -*> NMAX is currently set to 132 and must be at least 12 for some of the -*> precomputed examples, and LWORK = NMAX*(5*NMAX+5)+1 in the parameter -*> statements below. For SVD, we assume NRHS may be as big as N. The -*> parameter NEED is set to 14 to allow for 14 N-by-N matrices for SGG. -*> \endverbatim -* -* Arguments: -* ========== -* -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date June 2016 -* -*> \ingroup single_eig -* -* ===================================================================== - PROGRAM SCHKEE -* -* -- LAPACK test routine (version 3.7.0) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* June 2016 -* -* ===================================================================== -* -* .. Parameters .. - INTEGER NMAX - PARAMETER ( NMAX = 132 ) - INTEGER NCMAX - PARAMETER ( NCMAX = 20 ) - INTEGER NEED - PARAMETER ( NEED = 14 ) - INTEGER LWORK - PARAMETER ( LWORK = NMAX*( 5*NMAX+5 )+1 ) - INTEGER LIWORK - PARAMETER ( LIWORK = NMAX*( 5*NMAX+20 ) ) - INTEGER MAXIN - PARAMETER ( MAXIN = 20 ) - INTEGER MAXT - PARAMETER ( MAXT = 30 ) - INTEGER NIN, NOUT - PARAMETER ( NIN = 5, NOUT = 6 ) -* .. -* .. Local Scalars .. - LOGICAL CSD, FATAL, GLM, GQR, GSV, LSE, NEP, SBB, SBK, - $ SBL, SEP, SES, SEV, SGG, SGK, SGL, SGS, SGV, - $ SGX, SSB, SSX, SVD, SVX, SXV, TSTCHK, TSTDIF, - $ TSTDRV, TSTERR - CHARACTER C1 - CHARACTER*3 C3, PATH - CHARACTER*32 VNAME - CHARACTER*10 INTSTR - CHARACTER*80 LINE - INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD, - $ NK, NN, NPARMS, NRHS, NTYPES, - $ VERS_MAJOR, VERS_MINOR, VERS_PATCH - REAL EPS, S1, S2, THRESH, THRSHN -* .. -* .. Local Arrays .. - LOGICAL DOTYPE( MAXT ), LOGWRK( NMAX ) - INTEGER IOLDSD( 4 ), ISEED( 4 ), IWORK( LIWORK ), - $ KVAL( MAXIN ), MVAL( MAXIN ), MXBVAL( MAXIN ), - $ NBCOL( MAXIN ), NBMIN( MAXIN ), NBVAL( MAXIN ), - $ NSVAL( MAXIN ), NVAL( MAXIN ), NXVAL( MAXIN ), - $ PVAL( MAXIN ) - INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ), - $ ISHFTS( MAXIN ), IACC22( MAXIN ) - REAL A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ), - $ C( NCMAX*NCMAX, NCMAX*NCMAX ), D( NMAX, 12 ), - $ RESULT( 500 ), TAUA( NMAX ), TAUB( NMAX ), - $ WORK( LWORK ), X( 5*NMAX ) -* .. -* .. External Functions .. - LOGICAL LSAMEN - REAL SECOND, SLAMCH - EXTERNAL LSAMEN, SECOND, SLAMCH -* .. -* .. External Subroutines .. - EXTERNAL ALAREQ, SCHKBB, SCHKBD, SCHKBK, SCHKBL, SCHKEC, - $ SCHKGG, SCHKGK, SCHKGL, SCHKHS, SCHKSB, SCHKST, - $ SCKCSD, SCKGLM, SCKGQR, SCKGSV, SCKLSE, SDRGES, - $ SDRGEV, SDRGSX, SDRGVX, SDRVBD, SDRVES, SDRVEV, - $ SDRVSG, SDRVST, SDRVSX, SDRVVX, SERRBD, - $ SERRED, SERRGG, SERRHS, SERRST, ILAVER, XLAENV, - $ SDRGES3, SDRGEV3, - $ SCHKST2STG, SDRVST2STG, SCHKSB2STG, SDRVSG2STG -* .. -* .. Intrinsic Functions .. - INTRINSIC LEN, MIN -* .. -* .. Scalars in Common .. - LOGICAL LERR, OK - CHARACTER*32 SRNAMT - INTEGER INFOT, MAXB, NPROC, NSHIFT, NUNIT, SELDIM, - $ SELOPT -* .. -* .. Arrays in Common .. - LOGICAL SELVAL( 20 ) - INTEGER IPARMS( 100 ) - REAL SELWI( 20 ), SELWR( 20 ) -* .. -* .. Common blocks .. - COMMON / CENVIR / NPROC, NSHIFT, MAXB - COMMON / CLAENV / IPARMS - COMMON / INFOC / INFOT, NUNIT, OK, LERR - COMMON / SRNAMC / SRNAMT - COMMON / SSLCT / SELOPT, SELDIM, SELVAL, SELWR, SELWI -* .. -* .. Data statements .. - DATA INTSTR / '0123456789' / - DATA IOLDSD / 0, 0, 0, 1 / -* .. -* .. Executable Statements .. -* - A = 0.0 - B = 0.0 - C = 0.0 - D = 0.0 - S1 = SECOND( ) - FATAL = .FALSE. - NUNIT = NOUT -* -* Return to here to read multiple sets of data -* - 10 CONTINUE -* -* Read the first line and set the 3-character test path -* - READ( NIN, FMT = '(A80)', END = 380 )LINE - PATH = LINE( 1: 3 ) - NEP = LSAMEN( 3, PATH, 'NEP' ) .OR. LSAMEN( 3, PATH, 'SHS' ) - SEP = LSAMEN( 3, PATH, 'SEP' ) .OR. LSAMEN( 3, PATH, 'SST' ) .OR. - $ LSAMEN( 3, PATH, 'SSG' ) .OR. LSAMEN( 3, PATH, 'SE2' ) - SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'DBD' ) - SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'SBD' ) - SEV = LSAMEN( 3, PATH, 'SEV' ) - SES = LSAMEN( 3, PATH, 'SES' ) - SVX = LSAMEN( 3, PATH, 'SVX' ) - SSX = LSAMEN( 3, PATH, 'SSX' ) - SGG = LSAMEN( 3, PATH, 'SGG' ) - SGS = LSAMEN( 3, PATH, 'SGS' ) - SGX = LSAMEN( 3, PATH, 'SGX' ) - SGV = LSAMEN( 3, PATH, 'SGV' ) - SXV = LSAMEN( 3, PATH, 'SXV' ) - SSB = LSAMEN( 3, PATH, 'SSB' ) - SBB = LSAMEN( 3, PATH, 'SBB' ) - GLM = LSAMEN( 3, PATH, 'GLM' ) - GQR = LSAMEN( 3, PATH, 'GQR' ) .OR. LSAMEN( 3, PATH, 'GRQ' ) - GSV = LSAMEN( 3, PATH, 'GSV' ) - CSD = LSAMEN( 3, PATH, 'CSD' ) - LSE = LSAMEN( 3, PATH, 'LSE' ) - SBL = LSAMEN( 3, PATH, 'SBL' ) - SBK = LSAMEN( 3, PATH, 'SBK' ) - SGL = LSAMEN( 3, PATH, 'SGL' ) - SGK = LSAMEN( 3, PATH, 'SGK' ) -* -* Report values of parameters. -* - IF( PATH.EQ.' ' ) THEN - GO TO 10 - ELSE IF( NEP ) THEN - WRITE( NOUT, FMT = 9987 ) - ELSE IF( SEP ) THEN - WRITE( NOUT, FMT = 9986 ) - ELSE IF( SVD ) THEN - WRITE( NOUT, FMT = 9985 ) - ELSE IF( SEV ) THEN - WRITE( NOUT, FMT = 9979 ) - ELSE IF( SES ) THEN - WRITE( NOUT, FMT = 9978 ) - ELSE IF( SVX ) THEN - WRITE( NOUT, FMT = 9977 ) - ELSE IF( SSX ) THEN - WRITE( NOUT, FMT = 9976 ) - ELSE IF( SGG ) THEN - WRITE( NOUT, FMT = 9975 ) - ELSE IF( SGS ) THEN - WRITE( NOUT, FMT = 9964 ) - ELSE IF( SGX ) THEN - WRITE( NOUT, FMT = 9965 ) - ELSE IF( SGV ) THEN - WRITE( NOUT, FMT = 9963 ) - ELSE IF( SXV ) THEN - WRITE( NOUT, FMT = 9962 ) - ELSE IF( SSB ) THEN - WRITE( NOUT, FMT = 9974 ) - ELSE IF( SBB ) THEN - WRITE( NOUT, FMT = 9967 ) - ELSE IF( GLM ) THEN - WRITE( NOUT, FMT = 9971 ) - ELSE IF( GQR ) THEN - WRITE( NOUT, FMT = 9970 ) - ELSE IF( GSV ) THEN - WRITE( NOUT, FMT = 9969 ) - ELSE IF( CSD ) THEN - WRITE( NOUT, FMT = 9960 ) - ELSE IF( LSE ) THEN - WRITE( NOUT, FMT = 9968 ) - ELSE IF( SBL ) THEN -* -* SGEBAL: Balancing -* - CALL SCHKBL( NIN, NOUT ) - GO TO 10 - ELSE IF( SBK ) THEN -* -* SGEBAK: Back transformation -* - CALL SCHKBK( NIN, NOUT ) - GO TO 10 - ELSE IF( SGL ) THEN -* -* SGGBAL: Balancing -* - CALL SCHKGL( NIN, NOUT ) - GO TO 10 - ELSE IF( SGK ) THEN -* -* SGGBAK: Back transformation -* - CALL SCHKGK( NIN, NOUT ) - GO TO 10 - ELSE IF( LSAMEN( 3, PATH, 'SEC' ) ) THEN -* -* SEC: Eigencondition estimation -* - READ( NIN, FMT = * )THRESH - CALL XLAENV( 1, 1 ) - CALL XLAENV( 12, 11 ) - CALL XLAENV( 13, 2 ) - CALL XLAENV( 14, 0 ) - CALL XLAENV( 15, 2 ) - CALL XLAENV( 16, 2 ) - TSTERR = .TRUE. - CALL SCHKEC( THRESH, TSTERR, NIN, NOUT ) - GO TO 10 - ELSE - WRITE( NOUT, FMT = 9992 )PATH - GO TO 10 - END IF - CALL ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) - WRITE( NOUT, FMT = 9972 ) VERS_MAJOR, VERS_MINOR, VERS_PATCH - WRITE( NOUT, FMT = 9984 ) -* -* Read the number of values of M, P, and N. -* - READ( NIN, FMT = * )NN - IF( NN.LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NN ', NN, 1 - NN = 0 - FATAL = .TRUE. - ELSE IF( NN.GT.MAXIN ) THEN - WRITE( NOUT, FMT = 9988 )' NN ', NN, MAXIN - NN = 0 - FATAL = .TRUE. - END IF -* -* Read the values of M -* - IF( .NOT.( SGX .OR. SXV ) ) THEN - READ( NIN, FMT = * )( MVAL( I ), I = 1, NN ) - IF( SVD ) THEN - VNAME = ' M ' - ELSE - VNAME = ' N ' - END IF - DO 20 I = 1, NN - IF( MVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )VNAME, MVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( MVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )VNAME, MVAL( I ), NMAX - FATAL = .TRUE. - END IF - 20 CONTINUE - WRITE( NOUT, FMT = 9983 )'M: ', ( MVAL( I ), I = 1, NN ) - END IF -* -* Read the values of P -* - IF( GLM .OR. GQR .OR. GSV .OR. CSD .OR. LSE ) THEN - READ( NIN, FMT = * )( PVAL( I ), I = 1, NN ) - DO 30 I = 1, NN - IF( PVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' P ', PVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( PVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' P ', PVAL( I ), NMAX - FATAL = .TRUE. - END IF - 30 CONTINUE - WRITE( NOUT, FMT = 9983 )'P: ', ( PVAL( I ), I = 1, NN ) - END IF -* -* Read the values of N -* - IF( SVD .OR. SBB .OR. GLM .OR. GQR .OR. GSV .OR. CSD .OR. - $ LSE ) THEN - READ( NIN, FMT = * )( NVAL( I ), I = 1, NN ) - DO 40 I = 1, NN - IF( NVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' N ', NVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' N ', NVAL( I ), NMAX - FATAL = .TRUE. - END IF - 40 CONTINUE - ELSE - DO 50 I = 1, NN - NVAL( I ) = MVAL( I ) - 50 CONTINUE - END IF - IF( .NOT.( SGX .OR. SXV ) ) THEN - WRITE( NOUT, FMT = 9983 )'N: ', ( NVAL( I ), I = 1, NN ) - ELSE - WRITE( NOUT, FMT = 9983 )'N: ', NN - END IF -* -* Read the number of values of K, followed by the values of K -* - IF( SSB .OR. SBB ) THEN - READ( NIN, FMT = * )NK - READ( NIN, FMT = * )( KVAL( I ), I = 1, NK ) - DO 60 I = 1, NK - IF( KVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' K ', KVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( KVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' K ', KVAL( I ), NMAX - FATAL = .TRUE. - END IF - 60 CONTINUE - WRITE( NOUT, FMT = 9983 )'K: ', ( KVAL( I ), I = 1, NK ) - END IF -* - IF( SEV .OR. SES .OR. SVX .OR. SSX ) THEN -* -* For the nonsymmetric QR driver routines, only one set of -* parameters is allowed. -* - READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), - $ INMIN( 1 ), INWIN( 1 ), INIBL(1), ISHFTS(1), IACC22(1) - IF( NBVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NBMIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NXVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( INMIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( INWIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( INIBL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( ISHFTS( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( 1 ), 1 - FATAL = .TRUE. - ELSE IF( IACC22( 1 ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( 1 ), 0 - FATAL = .TRUE. - END IF - CALL XLAENV( 1, NBVAL( 1 ) ) - CALL XLAENV( 2, NBMIN( 1 ) ) - CALL XLAENV( 3, NXVAL( 1 ) ) - CALL XLAENV(12, MAX( 11, INMIN( 1 ) ) ) - CALL XLAENV(13, INWIN( 1 ) ) - CALL XLAENV(14, INIBL( 1 ) ) - CALL XLAENV(15, ISHFTS( 1 ) ) - CALL XLAENV(16, IACC22( 1 ) ) - WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) - WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'INMIN: ', INMIN( 1 ) - WRITE( NOUT, FMT = 9983 )'INWIN: ', INWIN( 1 ) - WRITE( NOUT, FMT = 9983 )'INIBL: ', INIBL( 1 ) - WRITE( NOUT, FMT = 9983 )'ISHFTS: ', ISHFTS( 1 ) - WRITE( NOUT, FMT = 9983 )'IACC22: ', IACC22( 1 ) -* - ELSE IF( SGS .OR. SGX .OR. SGV .OR. SXV ) THEN -* -* For the nonsymmetric generalized driver routines, only one set -* of parameters is allowed. -* - READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), - $ NSVAL( 1 ), MXBVAL( 1 ) - IF( NBVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NBMIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NXVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NSVAL( 1 ).LT.2 ) THEN - WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( 1 ), 2 - FATAL = .TRUE. - ELSE IF( MXBVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( 1 ), 1 - FATAL = .TRUE. - END IF - CALL XLAENV( 1, NBVAL( 1 ) ) - CALL XLAENV( 2, NBMIN( 1 ) ) - CALL XLAENV( 3, NXVAL( 1 ) ) - CALL XLAENV( 4, NSVAL( 1 ) ) - CALL XLAENV( 8, MXBVAL( 1 ) ) - WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) - WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'NS: ', NSVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'MAXB: ', MXBVAL( 1 ) -* - ELSE IF( .NOT.SSB .AND. .NOT.GLM .AND. .NOT.GQR .AND. .NOT. - $ GSV .AND. .NOT.CSD .AND. .NOT.LSE ) THEN -* -* For the other paths, the number of parameters can be varied -* from the input file. Read the number of parameter values. -* - READ( NIN, FMT = * )NPARMS - IF( NPARMS.LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )'NPARMS', NPARMS, 1 - NPARMS = 0 - FATAL = .TRUE. - ELSE IF( NPARMS.GT.MAXIN ) THEN - WRITE( NOUT, FMT = 9988 )'NPARMS', NPARMS, MAXIN - NPARMS = 0 - FATAL = .TRUE. - END IF -* -* Read the values of NB -* - IF( .NOT.SBB ) THEN - READ( NIN, FMT = * )( NBVAL( I ), I = 1, NPARMS ) - DO 70 I = 1, NPARMS - IF( NBVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NBVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' NB ', NBVAL( I ), NMAX - FATAL = .TRUE. - END IF - 70 CONTINUE - WRITE( NOUT, FMT = 9983 )'NB: ', - $ ( NBVAL( I ), I = 1, NPARMS ) - END IF -* -* Read the values of NBMIN -* - IF( NEP .OR. SEP .OR. SVD .OR. SGG ) THEN - READ( NIN, FMT = * )( NBMIN( I ), I = 1, NPARMS ) - DO 80 I = 1, NPARMS - IF( NBMIN( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( I ), 0 - FATAL = .TRUE. - ELSE IF( NBMIN( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )'NBMIN ', NBMIN( I ), NMAX - FATAL = .TRUE. - END IF - 80 CONTINUE - WRITE( NOUT, FMT = 9983 )'NBMIN:', - $ ( NBMIN( I ), I = 1, NPARMS ) - ELSE - DO 90 I = 1, NPARMS - NBMIN( I ) = 1 - 90 CONTINUE - END IF -* -* Read the values of NX -* - IF( NEP .OR. SEP .OR. SVD ) THEN - READ( NIN, FMT = * )( NXVAL( I ), I = 1, NPARMS ) - DO 100 I = 1, NPARMS - IF( NXVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NXVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' NX ', NXVAL( I ), NMAX - FATAL = .TRUE. - END IF - 100 CONTINUE - WRITE( NOUT, FMT = 9983 )'NX: ', - $ ( NXVAL( I ), I = 1, NPARMS ) - ELSE - DO 110 I = 1, NPARMS - NXVAL( I ) = 1 - 110 CONTINUE - END IF -* -* Read the values of NSHIFT (if SGG) or NRHS (if SVD -* or SBB). -* - IF( SVD .OR. SBB .OR. SGG ) THEN - READ( NIN, FMT = * )( NSVAL( I ), I = 1, NPARMS ) - DO 120 I = 1, NPARMS - IF( NSVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NSVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' NS ', NSVAL( I ), NMAX - FATAL = .TRUE. - END IF - 120 CONTINUE - WRITE( NOUT, FMT = 9983 )'NS: ', - $ ( NSVAL( I ), I = 1, NPARMS ) - ELSE - DO 130 I = 1, NPARMS - NSVAL( I ) = 1 - 130 CONTINUE - END IF -* -* Read the values for MAXB. -* - IF( SGG ) THEN - READ( NIN, FMT = * )( MXBVAL( I ), I = 1, NPARMS ) - DO 140 I = 1, NPARMS - IF( MXBVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( MXBVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' MAXB ', MXBVAL( I ), NMAX - FATAL = .TRUE. - END IF - 140 CONTINUE - WRITE( NOUT, FMT = 9983 )'MAXB: ', - $ ( MXBVAL( I ), I = 1, NPARMS ) - ELSE - DO 150 I = 1, NPARMS - MXBVAL( I ) = 1 - 150 CONTINUE - END IF -* -* Read the values for INMIN. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( INMIN( I ), I = 1, NPARMS ) - DO 540 I = 1, NPARMS - IF( INMIN( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( I ), 0 - FATAL = .TRUE. - END IF - 540 CONTINUE - WRITE( NOUT, FMT = 9983 )'INMIN: ', - $ ( INMIN( I ), I = 1, NPARMS ) - ELSE - DO 550 I = 1, NPARMS - INMIN( I ) = 1 - 550 CONTINUE - END IF -* -* Read the values for INWIN. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( INWIN( I ), I = 1, NPARMS ) - DO 560 I = 1, NPARMS - IF( INWIN( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( I ), 0 - FATAL = .TRUE. - END IF - 560 CONTINUE - WRITE( NOUT, FMT = 9983 )'INWIN: ', - $ ( INWIN( I ), I = 1, NPARMS ) - ELSE - DO 570 I = 1, NPARMS - INWIN( I ) = 1 - 570 CONTINUE - END IF -* -* Read the values for INIBL. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( INIBL( I ), I = 1, NPARMS ) - DO 580 I = 1, NPARMS - IF( INIBL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( I ), 0 - FATAL = .TRUE. - END IF - 580 CONTINUE - WRITE( NOUT, FMT = 9983 )'INIBL: ', - $ ( INIBL( I ), I = 1, NPARMS ) - ELSE - DO 590 I = 1, NPARMS - INIBL( I ) = 1 - 590 CONTINUE - END IF -* -* Read the values for ISHFTS. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( ISHFTS( I ), I = 1, NPARMS ) - DO 600 I = 1, NPARMS - IF( ISHFTS( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( I ), 0 - FATAL = .TRUE. - END IF - 600 CONTINUE - WRITE( NOUT, FMT = 9983 )'ISHFTS: ', - $ ( ISHFTS( I ), I = 1, NPARMS ) - ELSE - DO 610 I = 1, NPARMS - ISHFTS( I ) = 1 - 610 CONTINUE - END IF -* -* Read the values for IACC22. -* - IF( NEP .OR. SGG ) THEN - READ( NIN, FMT = * )( IACC22( I ), I = 1, NPARMS ) - DO 620 I = 1, NPARMS - IF( IACC22( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( I ), 0 - FATAL = .TRUE. - END IF - 620 CONTINUE - WRITE( NOUT, FMT = 9983 )'IACC22: ', - $ ( IACC22( I ), I = 1, NPARMS ) - ELSE - DO 630 I = 1, NPARMS - IACC22( I ) = 1 - 630 CONTINUE - END IF -* -* Read the values for NBCOL. -* - IF( SGG ) THEN - READ( NIN, FMT = * )( NBCOL( I ), I = 1, NPARMS ) - DO 160 I = 1, NPARMS - IF( NBCOL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )'NBCOL ', NBCOL( I ), 0 - FATAL = .TRUE. - ELSE IF( NBCOL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )'NBCOL ', NBCOL( I ), NMAX - FATAL = .TRUE. - END IF - 160 CONTINUE - WRITE( NOUT, FMT = 9983 )'NBCOL:', - $ ( NBCOL( I ), I = 1, NPARMS ) - ELSE - DO 170 I = 1, NPARMS - NBCOL( I ) = 1 - 170 CONTINUE - END IF - END IF -* -* Calculate and print the machine dependent constants. -* - WRITE( NOUT, FMT = * ) - EPS = SLAMCH( 'Underflow threshold' ) - WRITE( NOUT, FMT = 9981 )'underflow', EPS - EPS = SLAMCH( 'Overflow threshold' ) - WRITE( NOUT, FMT = 9981 )'overflow ', EPS - EPS = SLAMCH( 'Epsilon' ) - WRITE( NOUT, FMT = 9981 )'precision', EPS -* -* Read the threshold value for the test ratios. -* - READ( NIN, FMT = * )THRESH - WRITE( NOUT, FMT = 9982 )THRESH - IF( SEP .OR. SVD .OR. SGG ) THEN -* -* Read the flag that indicates whether to test LAPACK routines. -* - READ( NIN, FMT = * )TSTCHK -* -* Read the flag that indicates whether to test driver routines. -* - READ( NIN, FMT = * )TSTDRV - END IF -* -* Read the flag that indicates whether to test the error exits. -* - READ( NIN, FMT = * )TSTERR -* -* Read the code describing how to set the random number seed. -* - READ( NIN, FMT = * )NEWSD -* -* If NEWSD = 2, read another line with 4 integers for the seed. -* - IF( NEWSD.EQ.2 ) - $ READ( NIN, FMT = * )( IOLDSD( I ), I = 1, 4 ) -* - DO 180 I = 1, 4 - ISEED( I ) = IOLDSD( I ) - 180 CONTINUE -* - IF( FATAL ) THEN - WRITE( NOUT, FMT = 9999 ) - STOP - END IF -* -* Read the input lines indicating the test path and its parameters. -* The first three characters indicate the test path, and the number -* of test matrix types must be the first nonblank item in columns -* 4-80. -* - 190 CONTINUE -* - IF( .NOT.( SGX .OR. SXV ) ) THEN -* - 200 CONTINUE - READ( NIN, FMT = '(A80)', END = 380 )LINE - C3 = LINE( 1: 3 ) - LENP = LEN( LINE ) - I = 3 - ITMP = 0 - I1 = 0 - 210 CONTINUE - I = I + 1 - IF( I.GT.LENP ) THEN - IF( I1.GT.0 ) THEN - GO TO 240 - ELSE - NTYPES = MAXT - GO TO 240 - END IF - END IF - IF( LINE( I: I ).NE.' ' .AND. LINE( I: I ).NE.',' ) THEN - I1 = I - C1 = LINE( I1: I1 ) -* -* Check that a valid integer was read -* - DO 220 K = 1, 10 - IF( C1.EQ.INTSTR( K: K ) ) THEN - IC = K - 1 - GO TO 230 - END IF - 220 CONTINUE - WRITE( NOUT, FMT = 9991 )I, LINE - GO TO 200 - 230 CONTINUE - ITMP = 10*ITMP + IC - GO TO 210 - ELSE IF( I1.GT.0 ) THEN - GO TO 240 - ELSE - GO TO 210 - END IF - 240 CONTINUE - NTYPES = ITMP -* -* Skip the tests if NTYPES is <= 0. -* - IF( .NOT.( SEV .OR. SES .OR. SVX .OR. SSX .OR. SGV .OR. - $ SGS ) .AND. NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - GO TO 200 - END IF -* - ELSE - IF( SXV ) - $ C3 = 'SXV' - IF( SGX ) - $ C3 = 'SGX' - END IF -* -* Reset the random number seed. -* - IF( NEWSD.EQ.0 ) THEN - DO 250 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 250 CONTINUE - END IF -* - IF( LSAMEN( 3, C3, 'SHS' ) .OR. LSAMEN( 3, C3, 'NEP' ) ) THEN -* -* ------------------------------------- -* NEP: Nonsymmetric Eigenvalue Problem -* ------------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* NS = number of shifts -* MAXB = minimum submatrix size -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL SERRHS( 'SHSEQR', NOUT ) - DO 270 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) - CALL XLAENV(12, MAX( 11, INMIN( I ) ) ) - CALL XLAENV(13, INWIN( I ) ) - CALL XLAENV(14, INIBL( I ) ) - CALL XLAENV(15, ISHFTS( I ) ) - CALL XLAENV(16, IACC22( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 260 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 260 CONTINUE - END IF - WRITE( NOUT, FMT = 9961 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ), MAX( 11, INMIN(I)), - $ INWIN( I ), INIBL( I ), ISHFTS( I ), IACC22( I ) - CALL SCHKHS( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 5 ), NMAX, A( 1, 6 ), - $ A( 1, 7 ), D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), - $ D( 1, 4 ), D( 1, 5 ), D( 1, 6 ), A( 1, 8 ), - $ A( 1, 9 ), A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), - $ D( 1, 7 ), WORK, LWORK, IWORK, LOGWRK, RESULT, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SCHKHS', INFO - 270 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'SST' ) .OR. LSAMEN( 3, C3, 'SEP' ) - $ .OR. LSAMEN( 3, C3, 'SE2' ) ) THEN -* -* ---------------------------------- -* SEP: Symmetric Eigenvalue Problem -* ---------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 1, 1 ) - CALL XLAENV( 9, 25 ) - IF( TSTERR ) - $ CALL SERRST( 'SST', NOUT ) - DO 290 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 280 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 280 CONTINUE - END IF - WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ) - IF( TSTCHK ) THEN - IF( LSAMEN( 3, C3, 'SE2' ) ) THEN - CALL SCHKST2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), - $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), - $ D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), D( 1, 9 ), - $ D( 1, 10 ), D( 1, 11 ), A( 1, 3 ), NMAX, - $ A( 1, 4 ), A( 1, 5 ), D( 1, 12 ), A( 1, 6 ), - $ WORK, LWORK, IWORK, LIWORK, RESULT, INFO ) - ELSE - CALL SCHKST( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), - $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), - $ D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), D( 1, 9 ), - $ D( 1, 10 ), D( 1, 11 ), A( 1, 3 ), NMAX, - $ A( 1, 4 ), A( 1, 5 ), D( 1, 12 ), A( 1, 6 ), - $ WORK, LWORK, IWORK, LIWORK, RESULT, INFO ) - ENDIF - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SCHKST', INFO - END IF - IF( TSTDRV ) THEN - IF( LSAMEN( 3, C3, 'SE2' ) ) THEN - CALL SDRVST2STG( NN, NVAL, 18, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, D( 1, 3 ), D( 1, 4 ), - $ D( 1, 5 ), D( 1, 6 ), D( 1, 8 ), D( 1, 9 ), - $ D( 1, 10 ), D( 1, 11), A( 1, 2 ), NMAX, - $ A( 1, 3 ), D( 1, 12 ), A( 1, 4 ), WORK, - $ LWORK, IWORK, LIWORK, RESULT, INFO ) - ELSE - CALL SDRVST( NN, NVAL, 18, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, D( 1, 3 ), D( 1, 4 ), - $ D( 1, 5 ), D( 1, 6 ), D( 1, 8 ), D( 1, 9 ), - $ D( 1, 10 ), D( 1, 11), A( 1, 2 ), NMAX, - $ A( 1, 3 ), D( 1, 12 ), A( 1, 4 ), WORK, - $ LWORK, IWORK, LIWORK, RESULT, INFO ) - ENDIF - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SDRVST', INFO - END IF - 290 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'SSG' ) ) THEN -* -* ---------------------------------------------- -* SSG: Symmetric Generalized Eigenvalue Problem -* ---------------------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 9, 25 ) - DO 310 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 300 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 300 CONTINUE - END IF - WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ) - IF( TSTCHK ) THEN -* CALL SDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, -* $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, -* $ D( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), -* $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, -* $ LWORK, IWORK, LIWORK, RESULT, INFO ) - CALL SDRVSG2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, - $ D( 1, 3 ), D( 1, 3 ), A( 1, 3 ), NMAX, - $ A( 1, 4 ), A( 1, 5 ), A( 1, 6 ), - $ A( 1, 7 ), WORK, LWORK, IWORK, LIWORK, - $ RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SDRVSG', INFO - END IF - 310 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'SBD' ) .OR. LSAMEN( 3, C3, 'SVD' ) ) THEN -* -* ---------------------------------- -* SVD: Singular Value Decomposition -* ---------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* NRHS = number of right hand sides -* - MAXTYP = 16 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 1, 1 ) - CALL XLAENV( 9, 25 ) -* -* Test the error exits -* - IF( TSTERR .AND. TSTCHK ) - $ CALL SERRBD( 'SBD', NOUT ) - IF( TSTERR .AND. TSTDRV ) - $ CALL SERRED( 'SBD', NOUT ) -* - DO 330 I = 1, NPARMS - NRHS = NSVAL( I ) - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) - IF( NEWSD.EQ.0 ) THEN - DO 320 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 320 CONTINUE - END IF - WRITE( NOUT, FMT = 9995 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ), NRHS - IF( TSTCHK ) THEN - CALL SCHKBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, NRHS, ISEED, - $ THRESH, A( 1, 1 ), NMAX, D( 1, 1 ), - $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), A( 1, 2 ), - $ NMAX, A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), NMAX, - $ A( 1, 6 ), NMAX, A( 1, 7 ), A( 1, 8 ), WORK, - $ LWORK, IWORK, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SCHKBD', INFO - END IF - IF( TSTDRV ) - $ CALL SDRVBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, ISEED, - $ THRESH, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, - $ A( 1, 3 ), NMAX, A( 1, 4 ), A( 1, 5 ), - $ A( 1, 6 ), D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), - $ WORK, LWORK, IWORK, NOUT, INFO ) - 330 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'SEV' ) ) THEN -* -* -------------------------------------------- -* SEV: Nonsymmetric Eigenvalue Problem Driver -* SGEEV (eigenvalues and eigenvectors) -* -------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL SERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL SDRVEV( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), - $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), A( 1, 3 ), - $ NMAX, A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, RESULT, - $ WORK, LWORK, IWORK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SGEEV', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'SES' ) ) THEN -* -* -------------------------------------------- -* SES: Nonsymmetric Eigenvalue Problem Driver -* SGEES (Schur form) -* -------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL SERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL SDRVES( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), - $ A( 1, 4 ), NMAX, RESULT, WORK, LWORK, IWORK, - $ LOGWRK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SGEES', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'SVX' ) ) THEN -* -* -------------------------------------------------------------- -* SVX: Nonsymmetric Eigenvalue Problem Expert Driver -* SGEEVX (eigenvalues, eigenvectors and condition numbers) -* -------------------------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL SERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL SDRVVX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), - $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), A( 1, 3 ), - $ NMAX, A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, - $ D( 1, 5 ), D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), - $ D( 1, 9 ), D( 1, 10 ), D( 1, 11 ), D( 1, 12 ), - $ RESULT, WORK, LWORK, IWORK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SGEEVX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'SSX' ) ) THEN -* -* --------------------------------------------------- -* SSX: Nonsymmetric Eigenvalue Problem Expert Driver -* SGEESX (Schur form and condition numbers) -* --------------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL SERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL SDRVSX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), - $ D( 1, 5 ), D( 1, 6 ), A( 1, 4 ), NMAX, - $ A( 1, 5 ), RESULT, WORK, LWORK, IWORK, LOGWRK, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SGEESX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'SGG' ) ) THEN -* -* ------------------------------------------------- -* SGG: Generalized Nonsymmetric Eigenvalue Problem -* ------------------------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NS = number of shifts -* MAXB = minimum submatrix size -* IACC22: structured matrix multiply -* NBCOL = minimum column dimension for blocks -* - MAXTYP = 26 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV(1,1) - IF( TSTCHK .AND. TSTERR ) - & CALL SERRGG( C3, NOUT ) - DO 350 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 4, NSVAL( I ) ) - CALL XLAENV( 8, MXBVAL( I ) ) - CALL XLAENV( 16, IACC22( I ) ) - CALL XLAENV( 5, NBCOL( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 340 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 340 CONTINUE - END IF - WRITE( NOUT, FMT = 9996 )C3, NBVAL( I ), NBMIN( I ), - $ NSVAL( I ), MXBVAL( I ), IACC22( I ), NBCOL( I ) - TSTDIF = .FALSE. - THRSHN = 10. - IF( TSTCHK ) THEN - CALL SCHKGG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ TSTDIF, THRSHN, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), - $ A( 1, 6 ), A( 1, 7 ), A( 1, 8 ), A( 1, 9 ), - $ NMAX, A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), - $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), - $ D( 1, 5 ), D( 1, 6 ), A( 1, 13 ), - $ A( 1, 14 ), WORK, LWORK, LOGWRK, RESULT, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SCHKGG', INFO - END IF - 350 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'SGS' ) ) THEN -* -* ------------------------------------------------- -* SGS: Generalized Nonsymmetric Eigenvalue Problem -* SGGES (Schur form) -* ------------------------------------------------- -* - MAXTYP = 26 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL SERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL SDRGES( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), WORK, LWORK, - $ RESULT, LOGWRK, INFO ) -* - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SDRGES', INFO -* -* Blocked version -* - CALL XLAENV(16,1) - CALL SDRGES3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), WORK, LWORK, - $ RESULT, LOGWRK, INFO ) -* - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SDRGES3', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( SGX ) THEN -* -* ------------------------------------------------- -* SGX: Generalized Nonsymmetric Eigenvalue Problem -* SGGESX (Schur form and condition numbers) -* ------------------------------------------------- -* - MAXTYP = 5 - NTYPES = MAXTYP - IF( NN.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL SERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 5, 2 ) - CALL SDRGSX( NN, NCMAX, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), - $ A( 1, 6 ), D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), - $ C( 1, 1 ), NCMAX*NCMAX, A( 1, 12 ), WORK, - $ LWORK, IWORK, LIWORK, LOGWRK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SDRGSX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'SGV' ) ) THEN -* -* ------------------------------------------------- -* SGV: Generalized Nonsymmetric Eigenvalue Problem -* SGGEV (Eigenvalue/vector form) -* ------------------------------------------------- -* - MAXTYP = 26 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL SERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL SDRGEV( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ A( 1, 9 ), NMAX, D( 1, 1 ), D( 1, 2 ), - $ D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), D( 1, 6 ), - $ WORK, LWORK, RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SDRGEV', INFO -* -* Blocked version -* - CALL SDRGEV3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ A( 1, 9 ), NMAX, D( 1, 1 ), D( 1, 2 ), - $ D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), D( 1, 6 ), - $ WORK, LWORK, RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SDRGEV3', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( SXV ) THEN -* -* ------------------------------------------------- -* SXV: Generalized Nonsymmetric Eigenvalue Problem -* SGGEVX (eigenvalue/vector with condition numbers) -* ------------------------------------------------- -* - MAXTYP = 2 - NTYPES = MAXTYP - IF( NN.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL SERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL SDRGVX( NN, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), D( 1, 1 ), - $ D( 1, 2 ), D( 1, 3 ), A( 1, 5 ), A( 1, 6 ), - $ IWORK( 1 ), IWORK( 2 ), D( 1, 4 ), D( 1, 5 ), - $ D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), D( 1, 9 ), - $ WORK, LWORK, IWORK( 3 ), LIWORK-2, RESULT, - $ LOGWRK, INFO ) -* - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SDRGVX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'SSB' ) ) THEN -* -* ------------------------------ -* SSB: Symmetric Band Reduction -* ------------------------------ -* - MAXTYP = 15 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - IF( TSTERR ) - $ CALL SERRST( 'SSB', NOUT ) -* CALL SCHKSB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, -* $ NOUT, A( 1, 1 ), NMAX, D( 1, 1 ), D( 1, 2 ), -* $ A( 1, 2 ), NMAX, WORK, LWORK, RESULT, INFO ) - CALL SCHKSB2STG( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, - $ THRESH, NOUT, A( 1, 1 ), NMAX, D( 1, 1 ), - $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), - $ A( 1, 2 ), NMAX, WORK, LWORK, RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SCHKSB', INFO -* - ELSE IF( LSAMEN( 3, C3, 'SBB' ) ) THEN -* -* ------------------------------ -* SBB: General Band Reduction -* ------------------------------ -* - MAXTYP = 15 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - DO 370 I = 1, NPARMS - NRHS = NSVAL( I ) -* - IF( NEWSD.EQ.0 ) THEN - DO 360 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 360 CONTINUE - END IF - WRITE( NOUT, FMT = 9966 )C3, NRHS - CALL SCHKBB( NN, MVAL, NVAL, NK, KVAL, MAXTYP, DOTYPE, NRHS, - $ ISEED, THRESH, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), 2*NMAX, D( 1, 1 ), D( 1, 2 ), - $ A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, A( 1, 6 ), - $ NMAX, A( 1, 7 ), WORK, LWORK, RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SCHKBB', INFO - 370 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'GLM' ) ) THEN -* -* ----------------------------------------- -* GLM: Generalized Linear Regression Model -* ----------------------------------------- -* - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL SERRGG( 'GLM', NOUT ) - CALL SCKGLM( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, - $ WORK, D( 1, 1 ), NIN, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SCKGLM', INFO -* - ELSE IF( LSAMEN( 3, C3, 'GQR' ) ) THEN -* -* ------------------------------------------ -* GQR: Generalized QR and RQ factorizations -* ------------------------------------------ -* - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL SERRGG( 'GQR', NOUT ) - CALL SCKGQR( NN, MVAL, NN, PVAL, NN, NVAL, NTYPES, ISEED, - $ THRESH, NMAX, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), TAUA, B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), - $ B( 1, 4 ), B( 1, 5 ), TAUB, WORK, D( 1, 1 ), NIN, - $ NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SCKGQR', INFO -* - ELSE IF( LSAMEN( 3, C3, 'GSV' ) ) THEN -* -* ---------------------------------------------- -* GSV: Generalized Singular Value Decomposition -* ---------------------------------------------- -* - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL SERRGG( 'GSV', NOUT ) - CALL SCKGSV( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), - $ A( 1, 3 ), B( 1, 3 ), A( 1, 4 ), TAUA, TAUB, - $ B( 1, 4 ), IWORK, WORK, D( 1, 1 ), NIN, NOUT, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SCKGSV', INFO -* - ELSE IF( LSAMEN( 3, C3, 'CSD' ) ) THEN -* -* ---------------------------------------------- -* CSD: CS Decomposition -* ---------------------------------------------- -* - CALL XLAENV(1,1) - IF( TSTERR ) - $ CALL SERRGG( 'CSD', NOUT ) - CALL SCKCSD( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), - $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), IWORK, WORK, - $ D( 1, 1 ), NIN, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SCKCSD', INFO -* - ELSE IF( LSAMEN( 3, C3, 'LSE' ) ) THEN -* -* -------------------------------------- -* LSE: Constrained Linear Least Squares -* -------------------------------------- -* - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL SERRGG( 'LSE', NOUT ) - CALL SCKLSE( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, - $ WORK, D( 1, 1 ), NIN, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'SCKLSE', INFO -* - ELSE - WRITE( NOUT, FMT = * ) - WRITE( NOUT, FMT = * ) - WRITE( NOUT, FMT = 9992 )C3 - END IF - IF( .NOT.( SGX .OR. SXV ) ) - $ GO TO 190 - 380 CONTINUE - WRITE( NOUT, FMT = 9994 ) - S2 = SECOND( ) - WRITE( NOUT, FMT = 9993 )S2 - S1 -* - 9999 FORMAT( / ' Execution not attempted due to input errors' ) - 9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 ) - 9996 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NS =', I4, - $ ', MAXB =', I4, ', IACC22 =', I4, ', NBCOL =', I4 ) - 9995 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, - $ ', NRHS =', I4 ) - 9994 FORMAT( / / ' End of tests' ) - 9993 FORMAT( ' Total time used = ', F12.2, ' seconds', / ) - 9992 FORMAT( 1X, A3, ': Unrecognized path name' ) - 9991 FORMAT( / / ' *** Invalid integer value in column ', I2, - $ ' of input', ' line:', / A79 ) - 9990 FORMAT( / / 1X, A3, ' routines were not tested' ) - 9989 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be >=', - $ I6 ) - 9988 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be <=', - $ I6 ) - 9987 FORMAT( ' Tests of the Nonsymmetric Eigenvalue Problem routines' ) - 9986 FORMAT( ' Tests of the Symmetric Eigenvalue Problem routines' ) - 9985 FORMAT( ' Tests of the Singular Value Decomposition routines' ) - 9984 FORMAT( / ' The following parameter values will be used:' ) - 9983 FORMAT( 4X, A, 10I6, / 10X, 10I6 ) - 9982 FORMAT( / ' Routines pass computational tests if test ratio is ', - $ 'less than', F8.2, / ) - 9981 FORMAT( ' Relative machine ', A, ' is taken to be', E16.6 ) - 9980 FORMAT( ' *** Error code from ', A, ' = ', I4 ) - 9979 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', - $ / ' SGEEV (eigenvalues and eigevectors)' ) - 9978 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', - $ / ' SGEES (Schur form)' ) - 9977 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', - $ ' Driver', / ' SGEEVX (eigenvalues, eigenvectors and', - $ ' condition numbers)' ) - 9976 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', - $ ' Driver', / ' SGEESX (Schur form and condition', - $ ' numbers)' ) - 9975 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem routines' ) - 9974 FORMAT( ' Tests of SSBTRD', / ' (reduction of a symmetric band ', - $ 'matrix to tridiagonal form)' ) - 9973 FORMAT( / 1X, 71( '-' ) ) - 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I1, '.', I1 ) - 9971 FORMAT( / ' Tests of the Generalized Linear Regression Model ', - $ 'routines' ) - 9970 FORMAT( / ' Tests of the Generalized QR and RQ routines' ) - 9969 FORMAT( / ' Tests of the Generalized Singular Value', - $ ' Decomposition routines' ) - 9968 FORMAT( / ' Tests of the Linear Least Squares routines' ) - 9967 FORMAT( ' Tests of SGBBRD', / ' (reduction of a general band ', - $ 'matrix to real bidiagonal form)' ) - 9966 FORMAT( / / 1X, A3, ': NRHS =', I4 ) - 9965 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Expert Driver SGGESX' ) - 9964 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Driver SGGES' ) - 9963 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Driver SGGEV' ) - 9962 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Expert Driver SGGEVX' ) - 9961 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, - $ ', INMIN=', I4, - $ ', INWIN =', I4, ', INIBL =', I4, ', ISHFTS =', I4, - $ ', IACC22 =', I4) - 9960 FORMAT( / ' Tests of the CS Decomposition routines' ) -* -* End of SCHKEE -* - END From 23a0d1bc1fb11a48a97c5d292730c752823f41de Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 28 Feb 2021 18:47:06 +0100 Subject: [PATCH 108/134] Delete zchkee.f --- lapack-netlib/TESTING/EIG/zchkee.f | 2505 ---------------------------- 1 file changed, 2505 deletions(-) delete mode 100644 lapack-netlib/TESTING/EIG/zchkee.f diff --git a/lapack-netlib/TESTING/EIG/zchkee.f b/lapack-netlib/TESTING/EIG/zchkee.f deleted file mode 100644 index 6807ef7e4..000000000 --- a/lapack-netlib/TESTING/EIG/zchkee.f +++ /dev/null @@ -1,2505 +0,0 @@ -*> \brief \b ZCHKEE -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -* Definition: -* =========== -* -* PROGRAM ZCHKEE -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> ZCHKEE tests the COMPLEX*16 LAPACK subroutines for the matrix -*> eigenvalue problem. The test paths in this version are -*> -*> NEP (Nonsymmetric Eigenvalue Problem): -*> Test ZGEHRD, ZUNGHR, ZHSEQR, ZTREVC, ZHSEIN, and ZUNMHR -*> -*> SEP (Hermitian Eigenvalue Problem): -*> Test ZHETRD, ZUNGTR, ZSTEQR, ZSTERF, ZSTEIN, ZSTEDC, -*> and drivers ZHEEV(X), ZHBEV(X), ZHPEV(X), -*> ZHEEVD, ZHBEVD, ZHPEVD -*> -*> SVD (Singular Value Decomposition): -*> Test ZGEBRD, ZUNGBR, and ZBDSQR -*> and the drivers ZGESVD, ZGESDD -*> -*> ZEV (Nonsymmetric Eigenvalue/eigenvector Driver): -*> Test ZGEEV -*> -*> ZES (Nonsymmetric Schur form Driver): -*> Test ZGEES -*> -*> ZVX (Nonsymmetric Eigenvalue/eigenvector Expert Driver): -*> Test ZGEEVX -*> -*> ZSX (Nonsymmetric Schur form Expert Driver): -*> Test ZGEESX -*> -*> ZGG (Generalized Nonsymmetric Eigenvalue Problem): -*> Test ZGGHD3, ZGGBAL, ZGGBAK, ZHGEQZ, and ZTGEVC -*> -*> ZGS (Generalized Nonsymmetric Schur form Driver): -*> Test ZGGES -*> -*> ZGV (Generalized Nonsymmetric Eigenvalue/eigenvector Driver): -*> Test ZGGEV -*> -*> ZGX (Generalized Nonsymmetric Schur form Expert Driver): -*> Test ZGGESX -*> -*> ZXV (Generalized Nonsymmetric Eigenvalue/eigenvector Expert Driver): -*> Test ZGGEVX -*> -*> ZSG (Hermitian Generalized Eigenvalue Problem): -*> Test ZHEGST, ZHEGV, ZHEGVD, ZHEGVX, ZHPGST, ZHPGV, ZHPGVD, -*> ZHPGVX, ZHBGST, ZHBGV, ZHBGVD, and ZHBGVX -*> -*> ZHB (Hermitian Band Eigenvalue Problem): -*> Test ZHBTRD -*> -*> ZBB (Band Singular Value Decomposition): -*> Test ZGBBRD -*> -*> ZEC (Eigencondition estimation): -*> Test ZTRSYL, ZTREXC, ZTRSNA, and ZTRSEN -*> -*> ZBL (Balancing a general matrix) -*> Test ZGEBAL -*> -*> ZBK (Back transformation on a balanced matrix) -*> Test ZGEBAK -*> -*> ZGL (Balancing a matrix pair) -*> Test ZGGBAL -*> -*> ZGK (Back transformation on a matrix pair) -*> Test ZGGBAK -*> -*> GLM (Generalized Linear Regression Model): -*> Tests ZGGGLM -*> -*> GQR (Generalized QR and RQ factorizations): -*> Tests ZGGQRF and ZGGRQF -*> -*> GSV (Generalized Singular Value Decomposition): -*> Tests ZGGSVD, ZGGSVP, ZTGSJA, ZLAGS2, ZLAPLL, and ZLAPMT -*> -*> CSD (CS decomposition): -*> Tests ZUNCSD -*> -*> LSE (Constrained Linear Least Squares): -*> Tests ZGGLSE -*> -*> Each test path has a different set of inputs, but the data sets for -*> the driver routines xEV, xES, xVX, and xSX can be concatenated in a -*> single input file. The first line of input should contain one of the -*> 3-character path names in columns 1-3. The number of remaining lines -*> depends on what is found on the first line. -*> -*> The number of matrix types used in testing is often controllable from -*> the input file. The number of matrix types for each path, and the -*> test routine that describes them, is as follows: -*> -*> Path name(s) Types Test routine -*> -*> ZHS or NEP 21 ZCHKHS -*> ZST or SEP 21 ZCHKST (routines) -*> 18 ZDRVST (drivers) -*> ZBD or SVD 16 ZCHKBD (routines) -*> 5 ZDRVBD (drivers) -*> ZEV 21 ZDRVEV -*> ZES 21 ZDRVES -*> ZVX 21 ZDRVVX -*> ZSX 21 ZDRVSX -*> ZGG 26 ZCHKGG (routines) -*> ZGS 26 ZDRGES -*> ZGX 5 ZDRGSX -*> ZGV 26 ZDRGEV -*> ZXV 2 ZDRGVX -*> ZSG 21 ZDRVSG -*> ZHB 15 ZCHKHB -*> ZBB 15 ZCHKBB -*> ZEC - ZCHKEC -*> ZBL - ZCHKBL -*> ZBK - ZCHKBK -*> ZGL - ZCHKGL -*> ZGK - ZCHKGK -*> GLM 8 ZCKGLM -*> GQR 8 ZCKGQR -*> GSV 8 ZCKGSV -*> CSD 3 ZCKCSD -*> LSE 8 ZCKLSE -*> -*>----------------------------------------------------------------------- -*> -*> NEP input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NPARMS, INTEGER -*> Number of values of the parameters NB, NBMIN, NX, NS, and -*> MAXB. -*> -*> line 5: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 6: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for the minimum blocksize NBMIN. -*> -*> line 7: NXVAL, INTEGER array, dimension (NPARMS) -*> The values for the crossover point NX. -*> -*> line 8: INMIN, INTEGER array, dimension (NPARMS) -*> LAHQR vs TTQRE crossover point, >= 11 -*> -*> line 9: INWIN, INTEGER array, dimension (NPARMS) -*> recommended deflation window size -*> -*> line 10: INIBL, INTEGER array, dimension (NPARMS) -*> nibble crossover point -*> -*> line 11: ISHFTS, INTEGER array, dimension (NPARMS) -*> number of simultaneous shifts) -*> -*> line 12: IACC22, INTEGER array, dimension (NPARMS) -*> select structured matrix multiply: 0, 1 or 2) -*> -*> line 13: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. To have all of the test -*> ratios printed, use THRESH = 0.0 . -*> -*> line 14: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 14 was 2: -*> -*> line 15: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 15-EOF: The remaining lines occur in sets of 1 or 2 and allow -*> the user to specify the matrix types. Each line contains -*> a 3-character path name in columns 1-3, and the number -*> of matrix types must be the first nonblank item in columns -*> 4-80. If the number of matrix types is at least 1 but is -*> less than the maximum number of possible types, a second -*> line will be read to get the numbers of the matrix types to -*> be used. For example, -*> NEP 21 -*> requests all of the matrix types for the nonsymmetric -*> eigenvalue problem, while -*> NEP 4 -*> 9 10 11 12 -*> requests only matrices of type 9, 10, 11, and 12. -*> -*> The valid 3-character path names are 'NEP' or 'ZHS' for the -*> nonsymmetric eigenvalue routines. -*> -*>----------------------------------------------------------------------- -*> -*> SEP or ZSG input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NPARMS, INTEGER -*> Number of values of the parameters NB, NBMIN, and NX. -*> -*> line 5: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 6: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for the minimum blocksize NBMIN. -*> -*> line 7: NXVAL, INTEGER array, dimension (NPARMS) -*> The values for the crossover point NX. -*> -*> line 8: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 9: TSTCHK, LOGICAL -*> Flag indicating whether or not to test the LAPACK routines. -*> -*> line 10: TSTDRV, LOGICAL -*> Flag indicating whether or not to test the driver routines. -*> -*> line 11: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 12: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 12 was 2: -*> -*> line 13: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 13-EOF: Lines specifying matrix types, as for NEP. -*> The valid 3-character path names are 'SEP' or 'ZST' for the -*> Hermitian eigenvalue routines and driver routines, and -*> 'ZSG' for the routines for the Hermitian generalized -*> eigenvalue problem. -*> -*>----------------------------------------------------------------------- -*> -*> SVD input file: -*> -*> line 2: NN, INTEGER -*> Number of values of M and N. -*> -*> line 3: MVAL, INTEGER array, dimension (NN) -*> The values for the matrix row dimension M. -*> -*> line 4: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix column dimension N. -*> -*> line 5: NPARMS, INTEGER -*> Number of values of the parameter NB, NBMIN, NX, and NRHS. -*> -*> line 6: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 7: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for the minimum blocksize NBMIN. -*> -*> line 8: NXVAL, INTEGER array, dimension (NPARMS) -*> The values for the crossover point NX. -*> -*> line 9: NSVAL, INTEGER array, dimension (NPARMS) -*> The values for the number of right hand sides NRHS. -*> -*> line 10: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 11: TSTCHK, LOGICAL -*> Flag indicating whether or not to test the LAPACK routines. -*> -*> line 12: TSTDRV, LOGICAL -*> Flag indicating whether or not to test the driver routines. -*> -*> line 13: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 14: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 14 was 2: -*> -*> line 15: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 15-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path names are 'SVD' or 'ZBD' for both the -*> SVD routines and the SVD driver routines. -*> -*>----------------------------------------------------------------------- -*> -*> ZEV and ZES data files: -*> -*> line 1: 'ZEV' or 'ZES' in columns 1 to 3. -*> -*> line 2: NSIZES, INTEGER -*> Number of sizes of matrices to use. Should be at least 0 -*> and at most 20. If NSIZES = 0, no testing is done -*> (although the remaining 3 lines are still read). -*> -*> line 3: NN, INTEGER array, dimension(NSIZES) -*> Dimensions of matrices to be tested. -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHSEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 5: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> If it is 0., all test case data will be printed. -*> -*> line 6: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 6 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 8 and following: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'ZEV' to test CGEEV, or -*> 'ZES' to test CGEES. -*> -*>----------------------------------------------------------------------- -*> -*> The ZVX data has two parts. The first part is identical to ZEV, -*> and the second part consists of test matrices with precomputed -*> solutions. -*> -*> line 1: 'ZVX' in columns 1-3. -*> -*> line 2: NSIZES, INTEGER -*> If NSIZES = 0, no testing of randomly generated examples -*> is done, but any precomputed examples are tested. -*> -*> line 3: NN, INTEGER array, dimension(NSIZES) -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> -*> line 5: THRESH, REAL -*> -*> line 6: NEWSD, INTEGER -*> -*> If line 6 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> -*> lines 8 and following: The first line contains 'ZVX' in columns 1-3 -*> followed by the number of matrix types, possibly with -*> a second line to specify certain matrix types. -*> If the number of matrix types = 0, no testing of randomly -*> generated examples is done, but any precomputed examples -*> are tested. -*> -*> remaining lines : Each matrix is stored on 1+N+N**2 lines, where N is -*> its dimension. The first line contains the dimension N and -*> ISRT (two integers). ISRT indicates whether the last N lines -*> are sorted by increasing real part of the eigenvalue -*> (ISRT=0) or by increasing imaginary part (ISRT=1). The next -*> N**2 lines contain the matrix rowwise, one entry per line. -*> The last N lines correspond to each eigenvalue. Each of -*> these last N lines contains 4 real values: the real part of -*> the eigenvalues, the imaginary part of the eigenvalue, the -*> reciprocal condition number of the eigenvalues, and the -*> reciprocal condition number of the vector eigenvector. The -*> end of data is indicated by dimension N=0. Even if no data -*> is to be tested, there must be at least one line containing -*> N=0. -*> -*>----------------------------------------------------------------------- -*> -*> The ZSX data is like ZVX. The first part is identical to ZEV, and the -*> second part consists of test matrices with precomputed solutions. -*> -*> line 1: 'ZSX' in columns 1-3. -*> -*> line 2: NSIZES, INTEGER -*> If NSIZES = 0, no testing of randomly generated examples -*> is done, but any precomputed examples are tested. -*> -*> line 3: NN, INTEGER array, dimension(NSIZES) -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> -*> line 5: THRESH, REAL -*> -*> line 6: NEWSD, INTEGER -*> -*> If line 6 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> -*> lines 8 and following: The first line contains 'ZSX' in columns 1-3 -*> followed by the number of matrix types, possibly with -*> a second line to specify certain matrix types. -*> If the number of matrix types = 0, no testing of randomly -*> generated examples is done, but any precomputed examples -*> are tested. -*> -*> remaining lines : Each matrix is stored on 3+N**2 lines, where N is -*> its dimension. The first line contains the dimension N, the -*> dimension M of an invariant subspace, and ISRT. The second -*> line contains M integers, identifying the eigenvalues in the -*> invariant subspace (by their position in a list of -*> eigenvalues ordered by increasing real part (if ISRT=0) or -*> by increasing imaginary part (if ISRT=1)). The next N**2 -*> lines contain the matrix rowwise. The last line contains the -*> reciprocal condition number for the average of the selected -*> eigenvalues, and the reciprocal condition number for the -*> corresponding right invariant subspace. The end of data in -*> indicated by a line containing N=0, M=0, and ISRT = 0. Even -*> if no data is to be tested, there must be at least one line -*> containing N=0, M=0 and ISRT=0. -*> -*>----------------------------------------------------------------------- -*> -*> ZGG input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NPARMS, INTEGER -*> Number of values of the parameters NB, NBMIN, NBCOL, NS, and -*> MAXB. -*> -*> line 5: NBVAL, INTEGER array, dimension (NPARMS) -*> The values for the blocksize NB. -*> -*> line 6: NBMIN, INTEGER array, dimension (NPARMS) -*> The values for NBMIN, the minimum row dimension for blocks. -*> -*> line 7: NSVAL, INTEGER array, dimension (NPARMS) -*> The values for the number of shifts. -*> -*> line 8: MXBVAL, INTEGER array, dimension (NPARMS) -*> The values for MAXB, used in determining minimum blocksize. -*> -*> line 9: IACC22, INTEGER array, dimension (NPARMS) -*> select structured matrix multiply: 1 or 2) -*> -*> line 10: NBCOL, INTEGER array, dimension (NPARMS) -*> The values for NBCOL, the minimum column dimension for -*> blocks. -*> -*> line 11: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 12: TSTCHK, LOGICAL -*> Flag indicating whether or not to test the LAPACK routines. -*> -*> line 13: TSTDRV, LOGICAL -*> Flag indicating whether or not to test the driver routines. -*> -*> line 14: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 15: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 15 was 2: -*> -*> line 16: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 17-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'ZGG' for the generalized -*> eigenvalue problem routines and driver routines. -*> -*>----------------------------------------------------------------------- -*> -*> ZGS and ZGV input files: -*> -*> line 1: 'ZGS' or 'ZGV' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension(NN) -*> Dimensions of matrices to be tested. -*> -*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHGEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 5: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> If it is 0., all test case data will be printed. -*> -*> line 6: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits. -*> -*> line 7: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 17 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 7-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'ZGS' for the generalized -*> eigenvalue problem routines and driver routines. -*> -*>----------------------------------------------------------------------- -*> -*> ZGX input file: -*> line 1: 'ZGX' in columns 1 to 3. -*> -*> line 2: N, INTEGER -*> Value of N. -*> -*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHGEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 4: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> Information will be printed about each test for which the -*> test ratio is greater than or equal to the threshold. -*> -*> line 5: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 6: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 6 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> If line 2 was 0: -*> -*> line 7-EOF: Precomputed examples are tested. -*> -*> remaining lines : Each example is stored on 3+2*N*N lines, where N is -*> its dimension. The first line contains the dimension (a -*> single integer). The next line contains an integer k such -*> that only the last k eigenvalues will be selected and appear -*> in the leading diagonal blocks of $A$ and $B$. The next N*N -*> lines contain the matrix A, one element per line. The next N*N -*> lines contain the matrix B. The last line contains the -*> reciprocal of the eigenvalue cluster condition number and the -*> reciprocal of the deflating subspace (associated with the -*> selected eigencluster) condition number. The end of data is -*> indicated by dimension N=0. Even if no data is to be tested, -*> there must be at least one line containing N=0. -*> -*>----------------------------------------------------------------------- -*> -*> ZXV input files: -*> line 1: 'ZXV' in columns 1 to 3. -*> -*> line 2: N, INTEGER -*> Value of N. -*> -*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs -*> These integer parameters determine how blocking is done -*> (see ILAENV for details) -*> NB : block size -*> NBMIN : minimum block size -*> NX : minimum dimension for blocking -*> NS : number of shifts in xHGEQR -*> NBCOL : minimum column dimension for blocking -*> -*> line 4: THRESH, REAL -*> The test threshold against which computed residuals are -*> compared. Should generally be in the range from 10. to 20. -*> Information will be printed about each test for which the -*> test ratio is greater than or equal to the threshold. -*> -*> line 5: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 6: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 6 was 2: -*> -*> line 7: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> If line 2 was 0: -*> -*> line 7-EOF: Precomputed examples are tested. -*> -*> remaining lines : Each example is stored on 3+2*N*N lines, where N is -*> its dimension. The first line contains the dimension (a -*> single integer). The next N*N lines contain the matrix A, one -*> element per line. The next N*N lines contain the matrix B. -*> The next line contains the reciprocals of the eigenvalue -*> condition numbers. The last line contains the reciprocals of -*> the eigenvector condition numbers. The end of data is -*> indicated by dimension N=0. Even if no data is to be tested, -*> there must be at least one line containing N=0. -*> -*>----------------------------------------------------------------------- -*> -*> ZHB input file: -*> -*> line 2: NN, INTEGER -*> Number of values of N. -*> -*> line 3: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix dimension N. -*> -*> line 4: NK, INTEGER -*> Number of values of K. -*> -*> line 5: KVAL, INTEGER array, dimension (NK) -*> The values for the matrix dimension K. -*> -*> line 6: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 7 was 2: -*> -*> line 8: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 8-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'ZHB'. -*> -*>----------------------------------------------------------------------- -*> -*> ZBB input file: -*> -*> line 2: NN, INTEGER -*> Number of values of M and N. -*> -*> line 3: MVAL, INTEGER array, dimension (NN) -*> The values for the matrix row dimension M. -*> -*> line 4: NVAL, INTEGER array, dimension (NN) -*> The values for the matrix column dimension N. -*> -*> line 4: NK, INTEGER -*> Number of values of K. -*> -*> line 5: KVAL, INTEGER array, dimension (NK) -*> The values for the matrix bandwidth K. -*> -*> line 6: NPARMS, INTEGER -*> Number of values of the parameter NRHS -*> -*> line 7: NSVAL, INTEGER array, dimension (NPARMS) -*> The values for the number of right hand sides NRHS. -*> -*> line 8: THRESH -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 9: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 9 was 2: -*> -*> line 10: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 10-EOF: Lines specifying matrix types, as for SVD. -*> The 3-character path name is 'ZBB'. -*> -*>----------------------------------------------------------------------- -*> -*> ZEC input file: -*> -*> line 2: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> lines 3-EOF: -*> -*> Input for testing the eigencondition routines consists of a set of -*> specially constructed test cases and their solutions. The data -*> format is not intended to be modified by the user. -*> -*>----------------------------------------------------------------------- -*> -*> ZBL and ZBK input files: -*> -*> line 1: 'ZBL' in columns 1-3 to test CGEBAL, or 'ZBK' in -*> columns 1-3 to test CGEBAK. -*> -*> The remaining lines consist of specially constructed test cases. -*> -*>----------------------------------------------------------------------- -*> -*> ZGL and ZGK input files: -*> -*> line 1: 'ZGL' in columns 1-3 to test ZGGBAL, or 'ZGK' in -*> columns 1-3 to test ZGGBAK. -*> -*> The remaining lines consist of specially constructed test cases. -*> -*>----------------------------------------------------------------------- -*> -*> GLM data file: -*> -*> line 1: 'GLM' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M (row dimension). -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P (row dimension). -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N (column dimension), note M <= N <= M+P. -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GLM' for the generalized -*> linear regression model routines. -*> -*>----------------------------------------------------------------------- -*> -*> GQR data file: -*> -*> line 1: 'GQR' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M. -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P. -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N. -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GQR' for the generalized -*> QR and RQ routines. -*> -*>----------------------------------------------------------------------- -*> -*> GSV data file: -*> -*> line 1: 'GSV' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M (row dimension). -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P (row dimension). -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N (column dimension). -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GSV' for the generalized -*> SVD routines. -*> -*>----------------------------------------------------------------------- -*> -*> CSD data file: -*> -*> line 1: 'CSD' in columns 1 to 3. -*> -*> line 2: NM, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NM) -*> Values of M (row and column dimension of orthogonal matrix). -*> -*> line 4: PVAL, INTEGER array, dimension(NM) -*> Values of P (row dimension of top-left block). -*> -*> line 5: NVAL, INTEGER array, dimension(NM) -*> Values of N (column dimension of top-left block). -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'CSD' for the CSD routine. -*> -*>----------------------------------------------------------------------- -*> -*> LSE data file: -*> -*> line 1: 'LSE' in columns 1 to 3. -*> -*> line 2: NN, INTEGER -*> Number of values of M, P, and N. -*> -*> line 3: MVAL, INTEGER array, dimension(NN) -*> Values of M. -*> -*> line 4: PVAL, INTEGER array, dimension(NN) -*> Values of P. -*> -*> line 5: NVAL, INTEGER array, dimension(NN) -*> Values of N, note P <= N <= P+M. -*> -*> line 6: THRESH, REAL -*> Threshold value for the test ratios. Information will be -*> printed about each test for which the test ratio is greater -*> than or equal to the threshold. -*> -*> line 7: TSTERR, LOGICAL -*> Flag indicating whether or not to test the error exits for -*> the LAPACK routines and driver routines. -*> -*> line 8: NEWSD, INTEGER -*> A code indicating how to set the random number seed. -*> = 0: Set the seed to a default value before each run -*> = 1: Initialize the seed to a default value only before the -*> first run -*> = 2: Like 1, but use the seed values on the next line -*> -*> If line 8 was 2: -*> -*> line 9: INTEGER array, dimension (4) -*> Four integer values for the random number seed. -*> -*> lines 9-EOF: Lines specifying matrix types, as for NEP. -*> The 3-character path name is 'GSV' for the generalized -*> SVD routines. -*> -*>----------------------------------------------------------------------- -*> -*> NMAX is currently set to 132 and must be at least 12 for some of the -*> precomputed examples, and LWORK = NMAX*(5*NMAX+20) in the parameter -*> statements below. For SVD, we assume NRHS may be as big as N. The -*> parameter NEED is set to 14 to allow for 14 N-by-N matrices for ZGG. -*> \endverbatim -* -* Arguments: -* ========== -* -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date June 2016 -* -*> \ingroup complex16_eig -* -* ===================================================================== - PROGRAM ZCHKEE -* -* -- LAPACK test routine (version 3.7.0) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* June 2016 -* -* ===================================================================== -* -* .. Parameters .. - INTEGER NMAX - PARAMETER ( NMAX = 132 ) - INTEGER NCMAX - PARAMETER ( NCMAX = 20 ) - INTEGER NEED - PARAMETER ( NEED = 14 ) - INTEGER LWORK - PARAMETER ( LWORK = NMAX*( 5*NMAX+20 ) ) - INTEGER LIWORK - PARAMETER ( LIWORK = NMAX*( NMAX+20 ) ) - INTEGER MAXIN - PARAMETER ( MAXIN = 20 ) - INTEGER MAXT - PARAMETER ( MAXT = 30 ) - INTEGER NIN, NOUT - PARAMETER ( NIN = 5, NOUT = 6 ) -* .. -* .. Local Scalars .. - LOGICAL ZBK, ZBL, ZES, ZEV, ZGK, ZGL, ZGS, ZGV, ZGX, - $ ZSX, ZVX, ZXV, CSD, FATAL, GLM, GQR, GSV, LSE, - $ NEP, SEP, SVD, TSTCHK, TSTDIF, TSTDRV, TSTERR, - $ ZBB, ZGG, ZHB - CHARACTER C1 - CHARACTER*3 C3, PATH - CHARACTER*32 VNAME - CHARACTER*10 INTSTR - CHARACTER*80 LINE - INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD, - $ NK, NN, NPARMS, NRHS, NTYPES, - $ VERS_MAJOR, VERS_MINOR, VERS_PATCH - DOUBLE PRECISION EPS, S1, S2, THRESH, THRSHN -* .. -* .. Local Arrays .. - LOGICAL DOTYPE( MAXT ), LOGWRK( NMAX ) - INTEGER IOLDSD( 4 ), ISEED( 4 ), IWORK( LIWORK ), - $ KVAL( MAXIN ), MVAL( MAXIN ), MXBVAL( MAXIN ), - $ NBCOL( MAXIN ), NBMIN( MAXIN ), NBVAL( MAXIN ), - $ NSVAL( MAXIN ), NVAL( MAXIN ), NXVAL( MAXIN ), - $ PVAL( MAXIN ) - INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ), - $ ISHFTS( MAXIN ), IACC22( MAXIN ) - DOUBLE PRECISION ALPHA( NMAX ), BETA( NMAX ), DR( NMAX, 12 ), - $ RESULT( 500 ), RWORK( LWORK ), S( NMAX*NMAX ) - COMPLEX*16 A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ), - $ C( NCMAX*NCMAX, NCMAX*NCMAX ), DC( NMAX, 6 ), - $ TAUA( NMAX ), TAUB( NMAX ), WORK( LWORK ), - $ X( 5*NMAX ) -* .. -* .. External Functions .. - LOGICAL LSAMEN - DOUBLE PRECISION DLAMCH, DSECND - EXTERNAL LSAMEN, DLAMCH, DSECND -* .. -* .. External Subroutines .. - EXTERNAL ALAREQ, XLAENV, ZCHKBB, ZCHKBD, ZCHKBK, ZCHKBL, - $ ZCHKEC, ZCHKGG, ZCHKGK, ZCHKGL, ZCHKHB, ZCHKHS, - $ ZCHKST, ZCKCSD, ZCKGLM, ZCKGQR, ZCKGSV, ZCKLSE, - $ ZDRGES, ZDRGEV, ZDRGSX, ZDRGVX, ZDRVBD, ZDRVES, - $ ZDRVEV, ZDRVSG, ZDRVST, ZDRVSX, ZDRVVX, - $ ZERRBD, ZERRED, ZERRGG, ZERRHS, ZERRST, ILAVER, - $ ZDRGES3, ZDRGEV3, - $ ZCHKST2STG, ZDRVST2STG, ZCHKHB2STG -* .. -* .. Intrinsic Functions .. - INTRINSIC LEN, MIN -* .. -* .. Scalars in Common .. - LOGICAL LERR, OK - CHARACTER*32 SRNAMT - INTEGER INFOT, MAXB, NPROC, NSHIFT, NUNIT, SELDIM, - $ SELOPT -* .. -* .. Arrays in Common .. - LOGICAL SELVAL( 20 ) - INTEGER IPARMS( 100 ) - DOUBLE PRECISION SELWI( 20 ), SELWR( 20 ) -* .. -* .. Common blocks .. - COMMON / CENVIR / NPROC, NSHIFT, MAXB - COMMON / INFOC / INFOT, NUNIT, OK, LERR - COMMON / SRNAMC / SRNAMT - COMMON / SSLCT / SELOPT, SELDIM, SELVAL, SELWR, SELWI - COMMON / CLAENV / IPARMS -* .. -* .. Data statements .. - DATA INTSTR / '0123456789' / - DATA IOLDSD / 0, 0, 0, 1 / -* .. -* .. Executable Statements .. -* - A = 0.0 - B = 0.0 - C = 0.0 - DC = 0.0 - S1 = DSECND( ) - FATAL = .FALSE. - NUNIT = NOUT -* -* Return to here to read multiple sets of data -* - 10 CONTINUE -* -* Read the first line and set the 3-character test path -* - READ( NIN, FMT = '(A80)', END = 380 )LINE - PATH = LINE( 1: 3 ) - NEP = LSAMEN( 3, PATH, 'NEP' ) .OR. LSAMEN( 3, PATH, 'ZHS' ) - SEP = LSAMEN( 3, PATH, 'SEP' ) .OR. LSAMEN( 3, PATH, 'ZST' ) .OR. - $ LSAMEN( 3, PATH, 'ZSG' ) .OR. LSAMEN( 3, PATH, 'SE2' ) - SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'ZBD' ) - ZEV = LSAMEN( 3, PATH, 'ZEV' ) - ZES = LSAMEN( 3, PATH, 'ZES' ) - ZVX = LSAMEN( 3, PATH, 'ZVX' ) - ZSX = LSAMEN( 3, PATH, 'ZSX' ) - ZGG = LSAMEN( 3, PATH, 'ZGG' ) - ZGS = LSAMEN( 3, PATH, 'ZGS' ) - ZGX = LSAMEN( 3, PATH, 'ZGX' ) - ZGV = LSAMEN( 3, PATH, 'ZGV' ) - ZXV = LSAMEN( 3, PATH, 'ZXV' ) - ZHB = LSAMEN( 3, PATH, 'ZHB' ) - ZBB = LSAMEN( 3, PATH, 'ZBB' ) - GLM = LSAMEN( 3, PATH, 'GLM' ) - GQR = LSAMEN( 3, PATH, 'GQR' ) .OR. LSAMEN( 3, PATH, 'GRQ' ) - GSV = LSAMEN( 3, PATH, 'GSV' ) - CSD = LSAMEN( 3, PATH, 'CSD' ) - LSE = LSAMEN( 3, PATH, 'LSE' ) - ZBL = LSAMEN( 3, PATH, 'ZBL' ) - ZBK = LSAMEN( 3, PATH, 'ZBK' ) - ZGL = LSAMEN( 3, PATH, 'ZGL' ) - ZGK = LSAMEN( 3, PATH, 'ZGK' ) -* -* Report values of parameters. -* - IF( PATH.EQ.' ' ) THEN - GO TO 10 - ELSE IF( NEP ) THEN - WRITE( NOUT, FMT = 9987 ) - ELSE IF( SEP ) THEN - WRITE( NOUT, FMT = 9986 ) - ELSE IF( SVD ) THEN - WRITE( NOUT, FMT = 9985 ) - ELSE IF( ZEV ) THEN - WRITE( NOUT, FMT = 9979 ) - ELSE IF( ZES ) THEN - WRITE( NOUT, FMT = 9978 ) - ELSE IF( ZVX ) THEN - WRITE( NOUT, FMT = 9977 ) - ELSE IF( ZSX ) THEN - WRITE( NOUT, FMT = 9976 ) - ELSE IF( ZGG ) THEN - WRITE( NOUT, FMT = 9975 ) - ELSE IF( ZGS ) THEN - WRITE( NOUT, FMT = 9964 ) - ELSE IF( ZGX ) THEN - WRITE( NOUT, FMT = 9965 ) - ELSE IF( ZGV ) THEN - WRITE( NOUT, FMT = 9963 ) - ELSE IF( ZXV ) THEN - WRITE( NOUT, FMT = 9962 ) - ELSE IF( ZHB ) THEN - WRITE( NOUT, FMT = 9974 ) - ELSE IF( ZBB ) THEN - WRITE( NOUT, FMT = 9967 ) - ELSE IF( GLM ) THEN - WRITE( NOUT, FMT = 9971 ) - ELSE IF( GQR ) THEN - WRITE( NOUT, FMT = 9970 ) - ELSE IF( GSV ) THEN - WRITE( NOUT, FMT = 9969 ) - ELSE IF( CSD ) THEN - WRITE( NOUT, FMT = 9960 ) - ELSE IF( LSE ) THEN - WRITE( NOUT, FMT = 9968 ) - ELSE IF( ZBL ) THEN -* -* ZGEBAL: Balancing -* - CALL ZCHKBL( NIN, NOUT ) - GO TO 380 - ELSE IF( ZBK ) THEN -* -* ZGEBAK: Back transformation -* - CALL ZCHKBK( NIN, NOUT ) - GO TO 380 - ELSE IF( ZGL ) THEN -* -* ZGGBAL: Balancing -* - CALL ZCHKGL( NIN, NOUT ) - GO TO 380 - ELSE IF( ZGK ) THEN -* -* ZGGBAK: Back transformation -* - CALL ZCHKGK( NIN, NOUT ) - GO TO 380 - ELSE IF( LSAMEN( 3, PATH, 'ZEC' ) ) THEN -* -* ZEC: Eigencondition estimation -* - READ( NIN, FMT = * )THRESH - CALL XLAENV( 1, 1 ) - CALL XLAENV( 12, 1 ) - TSTERR = .TRUE. - CALL ZCHKEC( THRESH, TSTERR, NIN, NOUT ) - GO TO 380 - ELSE - WRITE( NOUT, FMT = 9992 )PATH - GO TO 380 - END IF - CALL ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) - WRITE( NOUT, FMT = 9972 ) VERS_MAJOR, VERS_MINOR, VERS_PATCH - WRITE( NOUT, FMT = 9984 ) -* -* Read the number of values of M, P, and N. -* - READ( NIN, FMT = * )NN - IF( NN.LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NN ', NN, 1 - NN = 0 - FATAL = .TRUE. - ELSE IF( NN.GT.MAXIN ) THEN - WRITE( NOUT, FMT = 9988 )' NN ', NN, MAXIN - NN = 0 - FATAL = .TRUE. - END IF -* -* Read the values of M -* - IF( .NOT.( ZGX .OR. ZXV ) ) THEN - READ( NIN, FMT = * )( MVAL( I ), I = 1, NN ) - IF( SVD ) THEN - VNAME = ' M ' - ELSE - VNAME = ' N ' - END IF - DO 20 I = 1, NN - IF( MVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )VNAME, MVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( MVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )VNAME, MVAL( I ), NMAX - FATAL = .TRUE. - END IF - 20 CONTINUE - WRITE( NOUT, FMT = 9983 )'M: ', ( MVAL( I ), I = 1, NN ) - END IF -* -* Read the values of P -* - IF( GLM .OR. GQR .OR. GSV .OR. CSD .OR. LSE ) THEN - READ( NIN, FMT = * )( PVAL( I ), I = 1, NN ) - DO 30 I = 1, NN - IF( PVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' P ', PVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( PVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' P ', PVAL( I ), NMAX - FATAL = .TRUE. - END IF - 30 CONTINUE - WRITE( NOUT, FMT = 9983 )'P: ', ( PVAL( I ), I = 1, NN ) - END IF -* -* Read the values of N -* - IF( SVD .OR. ZBB .OR. GLM .OR. GQR .OR. GSV .OR. CSD .OR. - $ LSE ) THEN - READ( NIN, FMT = * )( NVAL( I ), I = 1, NN ) - DO 40 I = 1, NN - IF( NVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' N ', NVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' N ', NVAL( I ), NMAX - FATAL = .TRUE. - END IF - 40 CONTINUE - ELSE - DO 50 I = 1, NN - NVAL( I ) = MVAL( I ) - 50 CONTINUE - END IF - IF( .NOT.( ZGX .OR. ZXV ) ) THEN - WRITE( NOUT, FMT = 9983 )'N: ', ( NVAL( I ), I = 1, NN ) - ELSE - WRITE( NOUT, FMT = 9983 )'N: ', NN - END IF -* -* Read the number of values of K, followed by the values of K -* - IF( ZHB .OR. ZBB ) THEN - READ( NIN, FMT = * )NK - READ( NIN, FMT = * )( KVAL( I ), I = 1, NK ) - DO 60 I = 1, NK - IF( KVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' K ', KVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( KVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' K ', KVAL( I ), NMAX - FATAL = .TRUE. - END IF - 60 CONTINUE - WRITE( NOUT, FMT = 9983 )'K: ', ( KVAL( I ), I = 1, NK ) - END IF -* - IF( ZEV .OR. ZES .OR. ZVX .OR. ZSX ) THEN -* -* For the nonsymmetric QR driver routines, only one set of -* parameters is allowed. -* - READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), - $ INMIN( 1 ), INWIN( 1 ), INIBL(1), ISHFTS(1), IACC22(1) - IF( NBVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NBMIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NXVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( INMIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( INWIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( INIBL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( ISHFTS( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( 1 ), 1 - FATAL = .TRUE. - ELSE IF( IACC22( 1 ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( 1 ), 0 - FATAL = .TRUE. - END IF - CALL XLAENV( 1, NBVAL( 1 ) ) - CALL XLAENV( 2, NBMIN( 1 ) ) - CALL XLAENV( 3, NXVAL( 1 ) ) - CALL XLAENV(12, MAX( 11, INMIN( 1 ) ) ) - CALL XLAENV(13, INWIN( 1 ) ) - CALL XLAENV(14, INIBL( 1 ) ) - CALL XLAENV(15, ISHFTS( 1 ) ) - CALL XLAENV(16, IACC22( 1 ) ) - WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) - WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'INMIN: ', INMIN( 1 ) - WRITE( NOUT, FMT = 9983 )'INWIN: ', INWIN( 1 ) - WRITE( NOUT, FMT = 9983 )'INIBL: ', INIBL( 1 ) - WRITE( NOUT, FMT = 9983 )'ISHFTS: ', ISHFTS( 1 ) - WRITE( NOUT, FMT = 9983 )'IACC22: ', IACC22( 1 ) -* - ELSE IF( ZGS .OR. ZGX .OR. ZGV .OR. ZXV ) THEN -* -* For the nonsymmetric generalized driver routines, only one set of -* parameters is allowed. -* - READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), - $ NSVAL( 1 ), MXBVAL( 1 ) - IF( NBVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NBMIN( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NXVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 - FATAL = .TRUE. - ELSE IF( NSVAL( 1 ).LT.2 ) THEN - WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( 1 ), 2 - FATAL = .TRUE. - ELSE IF( MXBVAL( 1 ).LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( 1 ), 1 - FATAL = .TRUE. - END IF - CALL XLAENV( 1, NBVAL( 1 ) ) - CALL XLAENV( 2, NBMIN( 1 ) ) - CALL XLAENV( 3, NXVAL( 1 ) ) - CALL XLAENV( 4, NSVAL( 1 ) ) - CALL XLAENV( 8, MXBVAL( 1 ) ) - WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) - WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'NS: ', NSVAL( 1 ) - WRITE( NOUT, FMT = 9983 )'MAXB: ', MXBVAL( 1 ) - ELSE IF( .NOT.ZHB .AND. .NOT.GLM .AND. .NOT.GQR .AND. .NOT. - $ GSV .AND. .NOT.CSD .AND. .NOT.LSE ) THEN -* -* For the other paths, the number of parameters can be varied -* from the input file. Read the number of parameter values. -* - READ( NIN, FMT = * )NPARMS - IF( NPARMS.LT.1 ) THEN - WRITE( NOUT, FMT = 9989 )'NPARMS', NPARMS, 1 - NPARMS = 0 - FATAL = .TRUE. - ELSE IF( NPARMS.GT.MAXIN ) THEN - WRITE( NOUT, FMT = 9988 )'NPARMS', NPARMS, MAXIN - NPARMS = 0 - FATAL = .TRUE. - END IF -* -* Read the values of NB -* - IF( .NOT.ZBB ) THEN - READ( NIN, FMT = * )( NBVAL( I ), I = 1, NPARMS ) - DO 70 I = 1, NPARMS - IF( NBVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NBVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' NB ', NBVAL( I ), NMAX - FATAL = .TRUE. - END IF - 70 CONTINUE - WRITE( NOUT, FMT = 9983 )'NB: ', - $ ( NBVAL( I ), I = 1, NPARMS ) - END IF -* -* Read the values of NBMIN -* - IF( NEP .OR. SEP .OR. SVD .OR. ZGG ) THEN - READ( NIN, FMT = * )( NBMIN( I ), I = 1, NPARMS ) - DO 80 I = 1, NPARMS - IF( NBMIN( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( I ), 0 - FATAL = .TRUE. - ELSE IF( NBMIN( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )'NBMIN ', NBMIN( I ), NMAX - FATAL = .TRUE. - END IF - 80 CONTINUE - WRITE( NOUT, FMT = 9983 )'NBMIN:', - $ ( NBMIN( I ), I = 1, NPARMS ) - ELSE - DO 90 I = 1, NPARMS - NBMIN( I ) = 1 - 90 CONTINUE - END IF -* -* Read the values of NX -* - IF( NEP .OR. SEP .OR. SVD ) THEN - READ( NIN, FMT = * )( NXVAL( I ), I = 1, NPARMS ) - DO 100 I = 1, NPARMS - IF( NXVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NXVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' NX ', NXVAL( I ), NMAX - FATAL = .TRUE. - END IF - 100 CONTINUE - WRITE( NOUT, FMT = 9983 )'NX: ', - $ ( NXVAL( I ), I = 1, NPARMS ) - ELSE - DO 110 I = 1, NPARMS - NXVAL( I ) = 1 - 110 CONTINUE - END IF -* -* Read the values of NSHIFT (if ZGG) or NRHS (if SVD -* or ZBB). -* - IF( SVD .OR. ZBB .OR. ZGG ) THEN - READ( NIN, FMT = * )( NSVAL( I ), I = 1, NPARMS ) - DO 120 I = 1, NPARMS - IF( NSVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( NSVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' NS ', NSVAL( I ), NMAX - FATAL = .TRUE. - END IF - 120 CONTINUE - WRITE( NOUT, FMT = 9983 )'NS: ', - $ ( NSVAL( I ), I = 1, NPARMS ) - ELSE - DO 130 I = 1, NPARMS - NSVAL( I ) = 1 - 130 CONTINUE - END IF -* -* Read the values for MAXB. -* - IF( ZGG ) THEN - READ( NIN, FMT = * )( MXBVAL( I ), I = 1, NPARMS ) - DO 140 I = 1, NPARMS - IF( MXBVAL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( I ), 0 - FATAL = .TRUE. - ELSE IF( MXBVAL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )' MAXB ', MXBVAL( I ), NMAX - FATAL = .TRUE. - END IF - 140 CONTINUE - WRITE( NOUT, FMT = 9983 )'MAXB: ', - $ ( MXBVAL( I ), I = 1, NPARMS ) - ELSE - DO 150 I = 1, NPARMS - MXBVAL( I ) = 1 - 150 CONTINUE - END IF -* -* Read the values for INMIN. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( INMIN( I ), I = 1, NPARMS ) - DO 540 I = 1, NPARMS - IF( INMIN( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( I ), 0 - FATAL = .TRUE. - END IF - 540 CONTINUE - WRITE( NOUT, FMT = 9983 )'INMIN: ', - $ ( INMIN( I ), I = 1, NPARMS ) - ELSE - DO 550 I = 1, NPARMS - INMIN( I ) = 1 - 550 CONTINUE - END IF -* -* Read the values for INWIN. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( INWIN( I ), I = 1, NPARMS ) - DO 560 I = 1, NPARMS - IF( INWIN( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( I ), 0 - FATAL = .TRUE. - END IF - 560 CONTINUE - WRITE( NOUT, FMT = 9983 )'INWIN: ', - $ ( INWIN( I ), I = 1, NPARMS ) - ELSE - DO 570 I = 1, NPARMS - INWIN( I ) = 1 - 570 CONTINUE - END IF -* -* Read the values for INIBL. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( INIBL( I ), I = 1, NPARMS ) - DO 580 I = 1, NPARMS - IF( INIBL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( I ), 0 - FATAL = .TRUE. - END IF - 580 CONTINUE - WRITE( NOUT, FMT = 9983 )'INIBL: ', - $ ( INIBL( I ), I = 1, NPARMS ) - ELSE - DO 590 I = 1, NPARMS - INIBL( I ) = 1 - 590 CONTINUE - END IF -* -* Read the values for ISHFTS. -* - IF( NEP ) THEN - READ( NIN, FMT = * )( ISHFTS( I ), I = 1, NPARMS ) - DO 600 I = 1, NPARMS - IF( ISHFTS( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( I ), 0 - FATAL = .TRUE. - END IF - 600 CONTINUE - WRITE( NOUT, FMT = 9983 )'ISHFTS: ', - $ ( ISHFTS( I ), I = 1, NPARMS ) - ELSE - DO 610 I = 1, NPARMS - ISHFTS( I ) = 1 - 610 CONTINUE - END IF -* -* Read the values for IACC22. -* - IF( NEP .OR. ZGG ) THEN - READ( NIN, FMT = * )( IACC22( I ), I = 1, NPARMS ) - DO 620 I = 1, NPARMS - IF( IACC22( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( I ), 0 - FATAL = .TRUE. - END IF - 620 CONTINUE - WRITE( NOUT, FMT = 9983 )'IACC22: ', - $ ( IACC22( I ), I = 1, NPARMS ) - ELSE - DO 630 I = 1, NPARMS - IACC22( I ) = 1 - 630 CONTINUE - END IF -* -* Read the values for NBCOL. -* - IF( ZGG ) THEN - READ( NIN, FMT = * )( NBCOL( I ), I = 1, NPARMS ) - DO 160 I = 1, NPARMS - IF( NBCOL( I ).LT.0 ) THEN - WRITE( NOUT, FMT = 9989 )'NBCOL ', NBCOL( I ), 0 - FATAL = .TRUE. - ELSE IF( NBCOL( I ).GT.NMAX ) THEN - WRITE( NOUT, FMT = 9988 )'NBCOL ', NBCOL( I ), NMAX - FATAL = .TRUE. - END IF - 160 CONTINUE - WRITE( NOUT, FMT = 9983 )'NBCOL:', - $ ( NBCOL( I ), I = 1, NPARMS ) - ELSE - DO 170 I = 1, NPARMS - NBCOL( I ) = 1 - 170 CONTINUE - END IF - END IF -* -* Calculate and print the machine dependent constants. -* - WRITE( NOUT, FMT = * ) - EPS = DLAMCH( 'Underflow threshold' ) - WRITE( NOUT, FMT = 9981 )'underflow', EPS - EPS = DLAMCH( 'Overflow threshold' ) - WRITE( NOUT, FMT = 9981 )'overflow ', EPS - EPS = DLAMCH( 'Epsilon' ) - WRITE( NOUT, FMT = 9981 )'precision', EPS -* -* Read the threshold value for the test ratios. -* - READ( NIN, FMT = * )THRESH - WRITE( NOUT, FMT = 9982 )THRESH - IF( SEP .OR. SVD .OR. ZGG ) THEN -* -* Read the flag that indicates whether to test LAPACK routines. -* - READ( NIN, FMT = * )TSTCHK -* -* Read the flag that indicates whether to test driver routines. -* - READ( NIN, FMT = * )TSTDRV - END IF -* -* Read the flag that indicates whether to test the error exits. -* - READ( NIN, FMT = * )TSTERR -* -* Read the code describing how to set the random number seed. -* - READ( NIN, FMT = * )NEWSD -* -* If NEWSD = 2, read another line with 4 integers for the seed. -* - IF( NEWSD.EQ.2 ) - $ READ( NIN, FMT = * )( IOLDSD( I ), I = 1, 4 ) -* - DO 180 I = 1, 4 - ISEED( I ) = IOLDSD( I ) - 180 CONTINUE -* - IF( FATAL ) THEN - WRITE( NOUT, FMT = 9999 ) - STOP - END IF -* -* Read the input lines indicating the test path and its parameters. -* The first three characters indicate the test path, and the number -* of test matrix types must be the first nonblank item in columns -* 4-80. -* - 190 CONTINUE -* - IF( .NOT.( ZGX .OR. ZXV ) ) THEN -* - 200 CONTINUE - READ( NIN, FMT = '(A80)', END = 380 )LINE - C3 = LINE( 1: 3 ) - LENP = LEN( LINE ) - I = 3 - ITMP = 0 - I1 = 0 - 210 CONTINUE - I = I + 1 - IF( I.GT.LENP ) THEN - IF( I1.GT.0 ) THEN - GO TO 240 - ELSE - NTYPES = MAXT - GO TO 240 - END IF - END IF - IF( LINE( I: I ).NE.' ' .AND. LINE( I: I ).NE.',' ) THEN - I1 = I - C1 = LINE( I1: I1 ) -* -* Check that a valid integer was read -* - DO 220 K = 1, 10 - IF( C1.EQ.INTSTR( K: K ) ) THEN - IC = K - 1 - GO TO 230 - END IF - 220 CONTINUE - WRITE( NOUT, FMT = 9991 )I, LINE - GO TO 200 - 230 CONTINUE - ITMP = 10*ITMP + IC - GO TO 210 - ELSE IF( I1.GT.0 ) THEN - GO TO 240 - ELSE - GO TO 210 - END IF - 240 CONTINUE - NTYPES = ITMP -* -* Skip the tests if NTYPES is <= 0. -* - IF( .NOT.( ZEV .OR. ZES .OR. ZVX .OR. ZSX .OR. ZGV .OR. - $ ZGS ) .AND. NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - GO TO 200 - END IF -* - ELSE - IF( ZGX ) - $ C3 = 'ZGX' - IF( ZXV ) - $ C3 = 'ZXV' - END IF -* -* Reset the random number seed. -* - IF( NEWSD.EQ.0 ) THEN - DO 250 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 250 CONTINUE - END IF -* - IF( LSAMEN( 3, C3, 'ZHS' ) .OR. LSAMEN( 3, C3, 'NEP' ) ) THEN -* -* ------------------------------------- -* NEP: Nonsymmetric Eigenvalue Problem -* ------------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* NS = number of shifts -* MAXB = minimum submatrix size -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL ZERRHS( 'ZHSEQR', NOUT ) - DO 270 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) - CALL XLAENV(12, MAX( 11, INMIN( I ) ) ) - CALL XLAENV(13, INWIN( I ) ) - CALL XLAENV(14, INIBL( I ) ) - CALL XLAENV(15, ISHFTS( I ) ) - CALL XLAENV(16, IACC22( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 260 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 260 CONTINUE - END IF - WRITE( NOUT, FMT = 9961 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ), MAX( 11, INMIN(I)), - $ INWIN( I ), INIBL( I ), ISHFTS( I ), IACC22( I ) - CALL ZCHKHS( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 5 ), NMAX, A( 1, 6 ), - $ A( 1, 7 ), DC( 1, 1 ), DC( 1, 2 ), A( 1, 8 ), - $ A( 1, 9 ), A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), - $ DC( 1, 3 ), WORK, LWORK, RWORK, IWORK, LOGWRK, - $ RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZCHKHS', INFO - 270 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'ZST' ) .OR. LSAMEN( 3, C3, 'SEP' ) - $ .OR. LSAMEN( 3, C3, 'SE2' ) ) THEN -* -* ---------------------------------- -* SEP: Symmetric Eigenvalue Problem -* ---------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 1, 1 ) - CALL XLAENV( 9, 25 ) - IF( TSTERR ) - $ CALL ZERRST( 'ZST', NOUT ) - DO 290 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 280 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 280 CONTINUE - END IF - WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ) - IF( TSTCHK ) THEN - IF( LSAMEN( 3, C3, 'SE2' ) ) THEN - CALL ZCHKST2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), - $ DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), - $ DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), - $ DR( 1, 7 ), DR( 1, 8 ), DR( 1, 9 ), - $ DR( 1, 10 ), DR( 1, 11 ), A( 1, 3 ), NMAX, - $ A( 1, 4 ), A( 1, 5 ), DC( 1, 1 ), A( 1, 6 ), - $ WORK, LWORK, RWORK, LWORK, IWORK, LIWORK, - $ RESULT, INFO ) - ELSE - CALL ZCHKST( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), - $ DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), - $ DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), - $ DR( 1, 7 ), DR( 1, 8 ), DR( 1, 9 ), - $ DR( 1, 10 ), DR( 1, 11 ), A( 1, 3 ), NMAX, - $ A( 1, 4 ), A( 1, 5 ), DC( 1, 1 ), A( 1, 6 ), - $ WORK, LWORK, RWORK, LWORK, IWORK, LIWORK, - $ RESULT, INFO ) - ENDIF - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZCHKST', INFO - END IF - IF( TSTDRV ) THEN - IF( LSAMEN( 3, C3, 'SE2' ) ) THEN - CALL ZDRVST2STG( NN, NVAL, 18, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, DR( 1, 3 ), DR( 1, 4 ), - $ DR( 1, 5 ), DR( 1, 8 ), DR( 1, 9 ), - $ DR( 1, 10 ), A( 1, 2 ), NMAX, A( 1, 3 ), - $ DC( 1, 1 ), A( 1, 4 ), WORK, LWORK, RWORK, - $ LWORK, IWORK, LIWORK, RESULT, INFO ) - ELSE - CALL ZDRVST( NN, NVAL, 18, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, DR( 1, 3 ), DR( 1, 4 ), - $ DR( 1, 5 ), DR( 1, 8 ), DR( 1, 9 ), - $ DR( 1, 10 ), A( 1, 2 ), NMAX, A( 1, 3 ), - $ DC( 1, 1 ), A( 1, 4 ), WORK, LWORK, RWORK, - $ LWORK, IWORK, LIWORK, RESULT, INFO ) - ENDIF - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZDRVST', INFO - END IF - 290 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'ZSG' ) ) THEN -* -* ---------------------------------------------- -* ZSG: Hermitian Generalized Eigenvalue Problem -* ---------------------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 9, 25 ) - DO 310 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 300 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 300 CONTINUE - END IF - WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ) - IF( TSTCHK ) THEN -* CALL ZDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, -* $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, -* $ DR( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), -* $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, -* $ LWORK, RWORK, LWORK, IWORK, LIWORK, RESULT, -* $ INFO ) - CALL ZDRVSG2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, - $ DR( 1, 3 ), DR( 1, 4 ), A( 1, 3 ), NMAX, - $ A( 1, 4 ), A( 1, 5 ), A( 1, 6 ), - $ A( 1, 7 ), WORK, LWORK, RWORK, LWORK, - $ IWORK, LIWORK, RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZDRVSG', INFO - END IF - 310 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'ZBD' ) .OR. LSAMEN( 3, C3, 'SVD' ) ) THEN -* -* ---------------------------------- -* SVD: Singular Value Decomposition -* ---------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NX = crossover point -* NRHS = number of right hand sides -* - MAXTYP = 16 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 9, 25 ) -* -* Test the error exits -* - CALL XLAENV( 1, 1 ) - IF( TSTERR .AND. TSTCHK ) - $ CALL ZERRBD( 'ZBD', NOUT ) - IF( TSTERR .AND. TSTDRV ) - $ CALL ZERRED( 'ZBD', NOUT ) -* - DO 330 I = 1, NPARMS - NRHS = NSVAL( I ) - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 3, NXVAL( I ) ) - IF( NEWSD.EQ.0 ) THEN - DO 320 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 320 CONTINUE - END IF - WRITE( NOUT, FMT = 9995 )C3, NBVAL( I ), NBMIN( I ), - $ NXVAL( I ), NRHS - IF( TSTCHK ) THEN - CALL ZCHKBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, NRHS, ISEED, - $ THRESH, A( 1, 1 ), NMAX, DR( 1, 1 ), - $ DR( 1, 2 ), DR( 1, 3 ), DR( 1, 4 ), - $ A( 1, 2 ), NMAX, A( 1, 3 ), A( 1, 4 ), - $ A( 1, 5 ), NMAX, A( 1, 6 ), NMAX, A( 1, 7 ), - $ A( 1, 8 ), WORK, LWORK, RWORK, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZCHKBD', INFO - END IF - IF( TSTDRV ) - $ CALL ZDRVBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, ISEED, - $ THRESH, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, - $ A( 1, 3 ), NMAX, A( 1, 4 ), A( 1, 5 ), - $ A( 1, 6 ), DR( 1, 1 ), DR( 1, 2 ), - $ DR( 1, 3 ), WORK, LWORK, RWORK, IWORK, NOUT, - $ INFO ) - 330 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'ZEV' ) ) THEN -* -* -------------------------------------------- -* ZEV: Nonsymmetric Eigenvalue Problem Driver -* ZGEEV (eigenvalues and eigenvectors) -* -------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL ZERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL ZDRVEV( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), DC( 1, 1 ), - $ DC( 1, 2 ), A( 1, 3 ), NMAX, A( 1, 4 ), NMAX, - $ A( 1, 5 ), NMAX, RESULT, WORK, LWORK, RWORK, - $ IWORK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZGEEV', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'ZES' ) ) THEN -* -* -------------------------------------------- -* ZES: Nonsymmetric Eigenvalue Problem Driver -* ZGEES (Schur form) -* -------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL ZERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL ZDRVES( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ DC( 1, 1 ), DC( 1, 2 ), A( 1, 4 ), NMAX, - $ RESULT, WORK, LWORK, RWORK, IWORK, LOGWRK, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZGEES', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'ZVX' ) ) THEN -* -* -------------------------------------------------------------- -* ZVX: Nonsymmetric Eigenvalue Problem Expert Driver -* ZGEEVX (eigenvalues, eigenvectors and condition numbers) -* -------------------------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL ZERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL ZDRVVX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), DC( 1, 1 ), - $ DC( 1, 2 ), A( 1, 3 ), NMAX, A( 1, 4 ), NMAX, - $ A( 1, 5 ), NMAX, DR( 1, 1 ), DR( 1, 2 ), - $ DR( 1, 3 ), DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), - $ DR( 1, 7 ), DR( 1, 8 ), RESULT, WORK, LWORK, - $ RWORK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZGEEVX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'ZSX' ) ) THEN -* -* --------------------------------------------------- -* ZSX: Nonsymmetric Eigenvalue Problem Expert Driver -* ZGEESX (Schur form and condition numbers) -* --------------------------------------------------- -* - MAXTYP = 21 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL ZERRED( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL ZDRVSX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ DC( 1, 1 ), DC( 1, 2 ), DC( 1, 3 ), A( 1, 4 ), - $ NMAX, A( 1, 5 ), RESULT, WORK, LWORK, RWORK, - $ LOGWRK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZGEESX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'ZGG' ) ) THEN -* -* ------------------------------------------------- -* ZGG: Generalized Nonsymmetric Eigenvalue Problem -* ------------------------------------------------- -* Vary the parameters -* NB = block size -* NBMIN = minimum block size -* NS = number of shifts -* MAXB = minimum submatrix size -* IACC22: structured matrix multiply -* NBCOL = minimum column dimension for blocks -* - MAXTYP = 26 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV(1,1) - IF( TSTCHK .AND. TSTERR ) - $ CALL ZERRGG( C3, NOUT ) - DO 350 I = 1, NPARMS - CALL XLAENV( 1, NBVAL( I ) ) - CALL XLAENV( 2, NBMIN( I ) ) - CALL XLAENV( 4, NSVAL( I ) ) - CALL XLAENV( 8, MXBVAL( I ) ) - CALL XLAENV( 16, IACC22( I ) ) - CALL XLAENV( 5, NBCOL( I ) ) -* - IF( NEWSD.EQ.0 ) THEN - DO 340 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 340 CONTINUE - END IF - WRITE( NOUT, FMT = 9996 )C3, NBVAL( I ), NBMIN( I ), - $ NSVAL( I ), MXBVAL( I ), IACC22( I ), NBCOL( I ) - TSTDIF = .FALSE. - THRSHN = 10.D0 - IF( TSTCHK ) THEN - CALL ZCHKGG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ TSTDIF, THRSHN, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), - $ A( 1, 6 ), A( 1, 7 ), A( 1, 8 ), A( 1, 9 ), - $ NMAX, A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), - $ DC( 1, 1 ), DC( 1, 2 ), DC( 1, 3 ), - $ DC( 1, 4 ), A( 1, 13 ), A( 1, 14 ), WORK, - $ LWORK, RWORK, LOGWRK, RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZCHKGG', INFO - END IF - 350 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'ZGS' ) ) THEN -* -* ------------------------------------------------- -* ZGS: Generalized Nonsymmetric Eigenvalue Problem -* ZGGES (Schur form) -* ------------------------------------------------- -* - MAXTYP = 26 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL ZERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL ZDRGES( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ DC( 1, 1 ), DC( 1, 2 ), WORK, LWORK, RWORK, - $ RESULT, LOGWRK, INFO ) -* - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZDRGES', INFO -* -* Blocked version -* - CALL ZDRGES3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ DC( 1, 1 ), DC( 1, 2 ), WORK, LWORK, RWORK, - $ RESULT, LOGWRK, INFO ) -* - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZDRGES3', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( ZGX ) THEN -* -* ------------------------------------------------- -* ZGX Generalized Nonsymmetric Eigenvalue Problem -* ZGGESX (Schur form and condition numbers) -* ------------------------------------------------- -* - MAXTYP = 5 - NTYPES = MAXTYP - IF( NN.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL ZERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL XLAENV( 5, 2 ) - CALL ZDRGSX( NN, NCMAX, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), - $ A( 1, 6 ), DC( 1, 1 ), DC( 1, 2 ), C, - $ NCMAX*NCMAX, S, WORK, LWORK, RWORK, IWORK, - $ LIWORK, LOGWRK, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZDRGSX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'ZGV' ) ) THEN -* -* ------------------------------------------------- -* ZGV: Generalized Nonsymmetric Eigenvalue Problem -* ZGGEV (Eigenvalue/vector form) -* ------------------------------------------------- -* - MAXTYP = 26 - NTYPES = MIN( MAXTYP, NTYPES ) - IF( NTYPES.LE.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL ZERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL ZDRGEV( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ A( 1, 9 ), NMAX, DC( 1, 1 ), DC( 1, 2 ), - $ DC( 1, 3 ), DC( 1, 4 ), WORK, LWORK, RWORK, - $ RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZDRGEV', INFO -* -* Blocked version -* - CALL XLAENV(16,2) - CALL ZDRGEV3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), - $ A( 1, 9 ), NMAX, DC( 1, 1 ), DC( 1, 2 ), - $ DC( 1, 3 ), DC( 1, 4 ), WORK, LWORK, RWORK, - $ RESULT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZDRGEV3', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( ZXV ) THEN -* -* ------------------------------------------------- -* ZXV: Generalized Nonsymmetric Eigenvalue Problem -* ZGGEVX (eigenvalue/vector with condition numbers) -* ------------------------------------------------- -* - MAXTYP = 2 - NTYPES = MAXTYP - IF( NN.LT.0 ) THEN - WRITE( NOUT, FMT = 9990 )C3 - ELSE - IF( TSTERR ) - $ CALL ZERRGG( C3, NOUT ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - CALL ZDRGVX( NN, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), DC( 1, 1 ), - $ DC( 1, 2 ), A( 1, 5 ), A( 1, 6 ), IWORK( 1 ), - $ IWORK( 2 ), DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), - $ DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), WORK, - $ LWORK, RWORK, IWORK( 3 ), LIWORK-2, RESULT, - $ LOGWRK, INFO ) -* - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZDRGVX', INFO - END IF - WRITE( NOUT, FMT = 9973 ) - GO TO 10 -* - ELSE IF( LSAMEN( 3, C3, 'ZHB' ) ) THEN -* -* ------------------------------ -* ZHB: Hermitian Band Reduction -* ------------------------------ -* - MAXTYP = 15 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - IF( TSTERR ) - $ CALL ZERRST( 'ZHB', NOUT ) -* CALL ZCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, -* $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ), -* $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, -* $ INFO ) - CALL ZCHKHB2STG( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, - $ THRESH, NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), - $ DR( 1, 2 ), DR( 1, 3 ), DR( 1, 4 ), DR( 1, 5 ), - $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZCHKHB', INFO -* - ELSE IF( LSAMEN( 3, C3, 'ZBB' ) ) THEN -* -* ------------------------------ -* ZBB: General Band Reduction -* ------------------------------ -* - MAXTYP = 15 - NTYPES = MIN( MAXTYP, NTYPES ) - CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) - DO 370 I = 1, NPARMS - NRHS = NSVAL( I ) -* - IF( NEWSD.EQ.0 ) THEN - DO 360 K = 1, 4 - ISEED( K ) = IOLDSD( K ) - 360 CONTINUE - END IF - WRITE( NOUT, FMT = 9966 )C3, NRHS - CALL ZCHKBB( NN, MVAL, NVAL, NK, KVAL, MAXTYP, DOTYPE, NRHS, - $ ISEED, THRESH, NOUT, A( 1, 1 ), NMAX, - $ A( 1, 2 ), 2*NMAX, DR( 1, 1 ), DR( 1, 2 ), - $ A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, A( 1, 6 ), - $ NMAX, A( 1, 7 ), WORK, LWORK, RWORK, RESULT, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZCHKBB', INFO - 370 CONTINUE -* - ELSE IF( LSAMEN( 3, C3, 'GLM' ) ) THEN -* -* ----------------------------------------- -* GLM: Generalized Linear Regression Model -* ----------------------------------------- -* - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL ZERRGG( 'GLM', NOUT ) - CALL ZCKGLM( NN, NVAL, MVAL, PVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, - $ WORK, DR( 1, 1 ), NIN, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZCKGLM', INFO -* - ELSE IF( LSAMEN( 3, C3, 'GQR' ) ) THEN -* -* ------------------------------------------ -* GQR: Generalized QR and RQ factorizations -* ------------------------------------------ -* - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL ZERRGG( 'GQR', NOUT ) - CALL ZCKGQR( NN, MVAL, NN, PVAL, NN, NVAL, NTYPES, ISEED, - $ THRESH, NMAX, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), - $ A( 1, 4 ), TAUA, B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), - $ B( 1, 4 ), B( 1, 5 ), TAUB, WORK, DR( 1, 1 ), NIN, - $ NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZCKGQR', INFO -* - ELSE IF( LSAMEN( 3, C3, 'GSV' ) ) THEN -* -* ---------------------------------------------- -* GSV: Generalized Singular Value Decomposition -* ---------------------------------------------- -* - CALL XLAENV(1,1) - IF( TSTERR ) - $ CALL ZERRGG( 'GSV', NOUT ) - CALL ZCKGSV( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), - $ A( 1, 3 ), B( 1, 3 ), A( 1, 4 ), ALPHA, BETA, - $ B( 1, 4 ), IWORK, WORK, DR( 1, 1 ), NIN, NOUT, - $ INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZCKGSV', INFO -* - ELSE IF( LSAMEN( 3, C3, 'CSD' ) ) THEN -* -* ---------------------------------------------- -* CSD: CS Decomposition -* ---------------------------------------------- -* - CALL XLAENV(1,1) - IF( TSTERR ) - $ CALL ZERRGG( 'CSD', NOUT ) - CALL ZCKCSD( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), - $ A( 1, 5 ), A( 1, 6 ), RWORK, IWORK, WORK, - $ DR( 1, 1 ), NIN, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZCKCSD', INFO -* - ELSE IF( LSAMEN( 3, C3, 'LSE' ) ) THEN -* -* -------------------------------------- -* LSE: Constrained Linear Least Squares -* -------------------------------------- -* - CALL XLAENV( 1, 1 ) - IF( TSTERR ) - $ CALL ZERRGG( 'LSE', NOUT ) - CALL ZCKLSE( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, - $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, - $ WORK, DR( 1, 1 ), NIN, NOUT, INFO ) - IF( INFO.NE.0 ) - $ WRITE( NOUT, FMT = 9980 )'ZCKLSE', INFO - ELSE - WRITE( NOUT, FMT = * ) - WRITE( NOUT, FMT = * ) - WRITE( NOUT, FMT = 9992 )C3 - END IF - IF( .NOT.( ZGX .OR. ZXV ) ) - $ GO TO 190 - 380 CONTINUE - WRITE( NOUT, FMT = 9994 ) - S2 = DSECND( ) - WRITE( NOUT, FMT = 9993 )S2 - S1 -* - 9999 FORMAT( / ' Execution not attempted due to input errors' ) - 9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 ) - 9996 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NS =', I4, - $ ', MAXB =', I4, ', IACC22 =', I4, ', NBCOL =', I4 ) - 9995 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, - $ ', NRHS =', I4 ) - 9994 FORMAT( / / ' End of tests' ) - 9993 FORMAT( ' Total time used = ', F12.2, ' seconds', / ) - 9992 FORMAT( 1X, A3, ': Unrecognized path name' ) - 9991 FORMAT( / / ' *** Invalid integer value in column ', I2, - $ ' of input', ' line:', / A79 ) - 9990 FORMAT( / / 1X, A3, ' routines were not tested' ) - 9989 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be >=', - $ I6 ) - 9988 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be <=', - $ I6 ) - 9987 FORMAT( ' Tests of the Nonsymmetric Eigenvalue Problem routines' ) - 9986 FORMAT( ' Tests of the Hermitian Eigenvalue Problem routines' ) - 9985 FORMAT( ' Tests of the Singular Value Decomposition routines' ) - 9984 FORMAT( / ' The following parameter values will be used:' ) - 9983 FORMAT( 4X, A, 10I6, / 10X, 10I6 ) - 9982 FORMAT( / ' Routines pass computational tests if test ratio is ', - $ 'less than', F8.2, / ) - 9981 FORMAT( ' Relative machine ', A, ' is taken to be', D16.6 ) - 9980 FORMAT( ' *** Error code from ', A, ' = ', I4 ) - 9979 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', - $ / ' ZGEEV (eigenvalues and eigevectors)' ) - 9978 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', - $ / ' ZGEES (Schur form)' ) - 9977 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', - $ ' Driver', / ' ZGEEVX (eigenvalues, eigenvectors and', - $ ' condition numbers)' ) - 9976 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', - $ ' Driver', / ' ZGEESX (Schur form and condition', - $ ' numbers)' ) - 9975 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem routines' ) - 9974 FORMAT( ' Tests of ZHBTRD', / ' (reduction of a Hermitian band ', - $ 'matrix to real tridiagonal form)' ) - 9973 FORMAT( / 1X, 71( '-' ) ) - 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I1, '.', I1 ) - 9971 FORMAT( / ' Tests of the Generalized Linear Regression Model ', - $ 'routines' ) - 9970 FORMAT( / ' Tests of the Generalized QR and RQ routines' ) - 9969 FORMAT( / ' Tests of the Generalized Singular Value', - $ ' Decomposition routines' ) - 9968 FORMAT( / ' Tests of the Linear Least Squares routines' ) - 9967 FORMAT( ' Tests of ZGBBRD', / ' (reduction of a general band ', - $ 'matrix to real bidiagonal form)' ) - 9966 FORMAT( / / 1X, A3, ': NRHS =', I4 ) - 9965 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Expert Driver ZGGESX' ) - 9964 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Driver ZGGES' ) - 9963 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Driver ZGGEV' ) - 9962 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', - $ 'Problem Expert Driver ZGGEVX' ) - 9961 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, - $ ', INMIN=', I4, - $ ', INWIN =', I4, ', INIBL =', I4, ', ISHFTS =', I4, - $ ', IACC22 =', I4) - 9960 FORMAT( / ' Tests of the CS Decomposition routines' ) -* -* End of ZCHKEE -* - END From 90bb4ac82100639ea5acf0ac48c409f081eceb48 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 28 Feb 2021 18:49:10 +0100 Subject: [PATCH 109/134] Add rewritten zchkee.F from Reference-LAPACK PR335 --- lapack-netlib/TESTING/EIG/zchkee.F | 2551 ++++++++++++++++++++++++++++ 1 file changed, 2551 insertions(+) create mode 100644 lapack-netlib/TESTING/EIG/zchkee.F diff --git a/lapack-netlib/TESTING/EIG/zchkee.F b/lapack-netlib/TESTING/EIG/zchkee.F new file mode 100644 index 000000000..29604956d --- /dev/null +++ b/lapack-netlib/TESTING/EIG/zchkee.F @@ -0,0 +1,2551 @@ +*> \brief \b ZCHKEE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM ZCHKEE +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZCHKEE tests the COMPLEX*16 LAPACK subroutines for the matrix +*> eigenvalue problem. The test paths in this version are +*> +*> NEP (Nonsymmetric Eigenvalue Problem): +*> Test ZGEHRD, ZUNGHR, ZHSEQR, ZTREVC, ZHSEIN, and ZUNMHR +*> +*> SEP (Hermitian Eigenvalue Problem): +*> Test ZHETRD, ZUNGTR, ZSTEQR, ZSTERF, ZSTEIN, ZSTEDC, +*> and drivers ZHEEV(X), ZHBEV(X), ZHPEV(X), +*> ZHEEVD, ZHBEVD, ZHPEVD +*> +*> SVD (Singular Value Decomposition): +*> Test ZGEBRD, ZUNGBR, and ZBDSQR +*> and the drivers ZGESVD, ZGESDD +*> +*> ZEV (Nonsymmetric Eigenvalue/eigenvector Driver): +*> Test ZGEEV +*> +*> ZES (Nonsymmetric Schur form Driver): +*> Test ZGEES +*> +*> ZVX (Nonsymmetric Eigenvalue/eigenvector Expert Driver): +*> Test ZGEEVX +*> +*> ZSX (Nonsymmetric Schur form Expert Driver): +*> Test ZGEESX +*> +*> ZGG (Generalized Nonsymmetric Eigenvalue Problem): +*> Test ZGGHD3, ZGGBAL, ZGGBAK, ZHGEQZ, and ZTGEVC +*> +*> ZGS (Generalized Nonsymmetric Schur form Driver): +*> Test ZGGES +*> +*> ZGV (Generalized Nonsymmetric Eigenvalue/eigenvector Driver): +*> Test ZGGEV +*> +*> ZGX (Generalized Nonsymmetric Schur form Expert Driver): +*> Test ZGGESX +*> +*> ZXV (Generalized Nonsymmetric Eigenvalue/eigenvector Expert Driver): +*> Test ZGGEVX +*> +*> ZSG (Hermitian Generalized Eigenvalue Problem): +*> Test ZHEGST, ZHEGV, ZHEGVD, ZHEGVX, ZHPGST, ZHPGV, ZHPGVD, +*> ZHPGVX, ZHBGST, ZHBGV, ZHBGVD, and ZHBGVX +*> +*> ZHB (Hermitian Band Eigenvalue Problem): +*> Test ZHBTRD +*> +*> ZBB (Band Singular Value Decomposition): +*> Test ZGBBRD +*> +*> ZEC (Eigencondition estimation): +*> Test ZTRSYL, ZTREXC, ZTRSNA, and ZTRSEN +*> +*> ZBL (Balancing a general matrix) +*> Test ZGEBAL +*> +*> ZBK (Back transformation on a balanced matrix) +*> Test ZGEBAK +*> +*> ZGL (Balancing a matrix pair) +*> Test ZGGBAL +*> +*> ZGK (Back transformation on a matrix pair) +*> Test ZGGBAK +*> +*> GLM (Generalized Linear Regression Model): +*> Tests ZGGGLM +*> +*> GQR (Generalized QR and RQ factorizations): +*> Tests ZGGQRF and ZGGRQF +*> +*> GSV (Generalized Singular Value Decomposition): +*> Tests ZGGSVD, ZGGSVP, ZTGSJA, ZLAGS2, ZLAPLL, and ZLAPMT +*> +*> CSD (CS decomposition): +*> Tests ZUNCSD +*> +*> LSE (Constrained Linear Least Squares): +*> Tests ZGGLSE +*> +*> Each test path has a different set of inputs, but the data sets for +*> the driver routines xEV, xES, xVX, and xSX can be concatenated in a +*> single input file. The first line of input should contain one of the +*> 3-character path names in columns 1-3. The number of remaining lines +*> depends on what is found on the first line. +*> +*> The number of matrix types used in testing is often controllable from +*> the input file. The number of matrix types for each path, and the +*> test routine that describes them, is as follows: +*> +*> Path name(s) Types Test routine +*> +*> ZHS or NEP 21 ZCHKHS +*> ZST or SEP 21 ZCHKST (routines) +*> 18 ZDRVST (drivers) +*> ZBD or SVD 16 ZCHKBD (routines) +*> 5 ZDRVBD (drivers) +*> ZEV 21 ZDRVEV +*> ZES 21 ZDRVES +*> ZVX 21 ZDRVVX +*> ZSX 21 ZDRVSX +*> ZGG 26 ZCHKGG (routines) +*> ZGS 26 ZDRGES +*> ZGX 5 ZDRGSX +*> ZGV 26 ZDRGEV +*> ZXV 2 ZDRGVX +*> ZSG 21 ZDRVSG +*> ZHB 15 ZCHKHB +*> ZBB 15 ZCHKBB +*> ZEC - ZCHKEC +*> ZBL - ZCHKBL +*> ZBK - ZCHKBK +*> ZGL - ZCHKGL +*> ZGK - ZCHKGK +*> GLM 8 ZCKGLM +*> GQR 8 ZCKGQR +*> GSV 8 ZCKGSV +*> CSD 3 ZCKCSD +*> LSE 8 ZCKLSE +*> +*>----------------------------------------------------------------------- +*> +*> NEP input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NPARMS, INTEGER +*> Number of values of the parameters NB, NBMIN, NX, NS, and +*> MAXB. +*> +*> line 5: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 6: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for the minimum blocksize NBMIN. +*> +*> line 7: NXVAL, INTEGER array, dimension (NPARMS) +*> The values for the crossover point NX. +*> +*> line 8: INMIN, INTEGER array, dimension (NPARMS) +*> LAHQR vs TTQRE crossover point, >= 11 +*> +*> line 9: INWIN, INTEGER array, dimension (NPARMS) +*> recommended deflation window size +*> +*> line 10: INIBL, INTEGER array, dimension (NPARMS) +*> nibble crossover point +*> +*> line 11: ISHFTS, INTEGER array, dimension (NPARMS) +*> number of simultaneous shifts) +*> +*> line 12: IACC22, INTEGER array, dimension (NPARMS) +*> select structured matrix multiply: 0, 1 or 2) +*> +*> line 13: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. To have all of the test +*> ratios printed, use THRESH = 0.0 . +*> +*> line 14: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 14 was 2: +*> +*> line 15: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 15-EOF: The remaining lines occur in sets of 1 or 2 and allow +*> the user to specify the matrix types. Each line contains +*> a 3-character path name in columns 1-3, and the number +*> of matrix types must be the first nonblank item in columns +*> 4-80. If the number of matrix types is at least 1 but is +*> less than the maximum number of possible types, a second +*> line will be read to get the numbers of the matrix types to +*> be used. For example, +*> NEP 21 +*> requests all of the matrix types for the nonsymmetric +*> eigenvalue problem, while +*> NEP 4 +*> 9 10 11 12 +*> requests only matrices of type 9, 10, 11, and 12. +*> +*> The valid 3-character path names are 'NEP' or 'ZHS' for the +*> nonsymmetric eigenvalue routines. +*> +*>----------------------------------------------------------------------- +*> +*> SEP or ZSG input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NPARMS, INTEGER +*> Number of values of the parameters NB, NBMIN, and NX. +*> +*> line 5: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 6: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for the minimum blocksize NBMIN. +*> +*> line 7: NXVAL, INTEGER array, dimension (NPARMS) +*> The values for the crossover point NX. +*> +*> line 8: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 9: TSTCHK, LOGICAL +*> Flag indicating whether or not to test the LAPACK routines. +*> +*> line 10: TSTDRV, LOGICAL +*> Flag indicating whether or not to test the driver routines. +*> +*> line 11: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 12: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 12 was 2: +*> +*> line 13: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 13-EOF: Lines specifying matrix types, as for NEP. +*> The valid 3-character path names are 'SEP' or 'ZST' for the +*> Hermitian eigenvalue routines and driver routines, and +*> 'ZSG' for the routines for the Hermitian generalized +*> eigenvalue problem. +*> +*>----------------------------------------------------------------------- +*> +*> SVD input file: +*> +*> line 2: NN, INTEGER +*> Number of values of M and N. +*> +*> line 3: MVAL, INTEGER array, dimension (NN) +*> The values for the matrix row dimension M. +*> +*> line 4: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix column dimension N. +*> +*> line 5: NPARMS, INTEGER +*> Number of values of the parameter NB, NBMIN, NX, and NRHS. +*> +*> line 6: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 7: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for the minimum blocksize NBMIN. +*> +*> line 8: NXVAL, INTEGER array, dimension (NPARMS) +*> The values for the crossover point NX. +*> +*> line 9: NSVAL, INTEGER array, dimension (NPARMS) +*> The values for the number of right hand sides NRHS. +*> +*> line 10: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 11: TSTCHK, LOGICAL +*> Flag indicating whether or not to test the LAPACK routines. +*> +*> line 12: TSTDRV, LOGICAL +*> Flag indicating whether or not to test the driver routines. +*> +*> line 13: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 14: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 14 was 2: +*> +*> line 15: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 15-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path names are 'SVD' or 'ZBD' for both the +*> SVD routines and the SVD driver routines. +*> +*>----------------------------------------------------------------------- +*> +*> ZEV and ZES data files: +*> +*> line 1: 'ZEV' or 'ZES' in columns 1 to 3. +*> +*> line 2: NSIZES, INTEGER +*> Number of sizes of matrices to use. Should be at least 0 +*> and at most 20. If NSIZES = 0, no testing is done +*> (although the remaining 3 lines are still read). +*> +*> line 3: NN, INTEGER array, dimension(NSIZES) +*> Dimensions of matrices to be tested. +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHSEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 5: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> If it is 0., all test case data will be printed. +*> +*> line 6: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 6 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 8 and following: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'ZEV' to test CGEEV, or +*> 'ZES' to test CGEES. +*> +*>----------------------------------------------------------------------- +*> +*> The ZVX data has two parts. The first part is identical to ZEV, +*> and the second part consists of test matrices with precomputed +*> solutions. +*> +*> line 1: 'ZVX' in columns 1-3. +*> +*> line 2: NSIZES, INTEGER +*> If NSIZES = 0, no testing of randomly generated examples +*> is done, but any precomputed examples are tested. +*> +*> line 3: NN, INTEGER array, dimension(NSIZES) +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> +*> line 5: THRESH, REAL +*> +*> line 6: NEWSD, INTEGER +*> +*> If line 6 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> +*> lines 8 and following: The first line contains 'ZVX' in columns 1-3 +*> followed by the number of matrix types, possibly with +*> a second line to specify certain matrix types. +*> If the number of matrix types = 0, no testing of randomly +*> generated examples is done, but any precomputed examples +*> are tested. +*> +*> remaining lines : Each matrix is stored on 1+N+N**2 lines, where N is +*> its dimension. The first line contains the dimension N and +*> ISRT (two integers). ISRT indicates whether the last N lines +*> are sorted by increasing real part of the eigenvalue +*> (ISRT=0) or by increasing imaginary part (ISRT=1). The next +*> N**2 lines contain the matrix rowwise, one entry per line. +*> The last N lines correspond to each eigenvalue. Each of +*> these last N lines contains 4 real values: the real part of +*> the eigenvalues, the imaginary part of the eigenvalue, the +*> reciprocal condition number of the eigenvalues, and the +*> reciprocal condition number of the vector eigenvector. The +*> end of data is indicated by dimension N=0. Even if no data +*> is to be tested, there must be at least one line containing +*> N=0. +*> +*>----------------------------------------------------------------------- +*> +*> The ZSX data is like ZVX. The first part is identical to ZEV, and the +*> second part consists of test matrices with precomputed solutions. +*> +*> line 1: 'ZSX' in columns 1-3. +*> +*> line 2: NSIZES, INTEGER +*> If NSIZES = 0, no testing of randomly generated examples +*> is done, but any precomputed examples are tested. +*> +*> line 3: NN, INTEGER array, dimension(NSIZES) +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> +*> line 5: THRESH, REAL +*> +*> line 6: NEWSD, INTEGER +*> +*> If line 6 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> +*> lines 8 and following: The first line contains 'ZSX' in columns 1-3 +*> followed by the number of matrix types, possibly with +*> a second line to specify certain matrix types. +*> If the number of matrix types = 0, no testing of randomly +*> generated examples is done, but any precomputed examples +*> are tested. +*> +*> remaining lines : Each matrix is stored on 3+N**2 lines, where N is +*> its dimension. The first line contains the dimension N, the +*> dimension M of an invariant subspace, and ISRT. The second +*> line contains M integers, identifying the eigenvalues in the +*> invariant subspace (by their position in a list of +*> eigenvalues ordered by increasing real part (if ISRT=0) or +*> by increasing imaginary part (if ISRT=1)). The next N**2 +*> lines contain the matrix rowwise. The last line contains the +*> reciprocal condition number for the average of the selected +*> eigenvalues, and the reciprocal condition number for the +*> corresponding right invariant subspace. The end of data in +*> indicated by a line containing N=0, M=0, and ISRT = 0. Even +*> if no data is to be tested, there must be at least one line +*> containing N=0, M=0 and ISRT=0. +*> +*>----------------------------------------------------------------------- +*> +*> ZGG input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NPARMS, INTEGER +*> Number of values of the parameters NB, NBMIN, NBCOL, NS, and +*> MAXB. +*> +*> line 5: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 6: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for NBMIN, the minimum row dimension for blocks. +*> +*> line 7: NSVAL, INTEGER array, dimension (NPARMS) +*> The values for the number of shifts. +*> +*> line 8: MXBVAL, INTEGER array, dimension (NPARMS) +*> The values for MAXB, used in determining minimum blocksize. +*> +*> line 9: IACC22, INTEGER array, dimension (NPARMS) +*> select structured matrix multiply: 1 or 2) +*> +*> line 10: NBCOL, INTEGER array, dimension (NPARMS) +*> The values for NBCOL, the minimum column dimension for +*> blocks. +*> +*> line 11: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 12: TSTCHK, LOGICAL +*> Flag indicating whether or not to test the LAPACK routines. +*> +*> line 13: TSTDRV, LOGICAL +*> Flag indicating whether or not to test the driver routines. +*> +*> line 14: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 15: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 15 was 2: +*> +*> line 16: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 17-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'ZGG' for the generalized +*> eigenvalue problem routines and driver routines. +*> +*>----------------------------------------------------------------------- +*> +*> ZGS and ZGV input files: +*> +*> line 1: 'ZGS' or 'ZGV' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension(NN) +*> Dimensions of matrices to be tested. +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHGEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 5: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> If it is 0., all test case data will be printed. +*> +*> line 6: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits. +*> +*> line 7: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 17 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 7-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'ZGS' for the generalized +*> eigenvalue problem routines and driver routines. +*> +*>----------------------------------------------------------------------- +*> +*> ZGX input file: +*> line 1: 'ZGX' in columns 1 to 3. +*> +*> line 2: N, INTEGER +*> Value of N. +*> +*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHGEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 4: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> Information will be printed about each test for which the +*> test ratio is greater than or equal to the threshold. +*> +*> line 5: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 6: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 6 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> If line 2 was 0: +*> +*> line 7-EOF: Precomputed examples are tested. +*> +*> remaining lines : Each example is stored on 3+2*N*N lines, where N is +*> its dimension. The first line contains the dimension (a +*> single integer). The next line contains an integer k such +*> that only the last k eigenvalues will be selected and appear +*> in the leading diagonal blocks of $A$ and $B$. The next N*N +*> lines contain the matrix A, one element per line. The next N*N +*> lines contain the matrix B. The last line contains the +*> reciprocal of the eigenvalue cluster condition number and the +*> reciprocal of the deflating subspace (associated with the +*> selected eigencluster) condition number. The end of data is +*> indicated by dimension N=0. Even if no data is to be tested, +*> there must be at least one line containing N=0. +*> +*>----------------------------------------------------------------------- +*> +*> ZXV input files: +*> line 1: 'ZXV' in columns 1 to 3. +*> +*> line 2: N, INTEGER +*> Value of N. +*> +*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHGEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 4: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> Information will be printed about each test for which the +*> test ratio is greater than or equal to the threshold. +*> +*> line 5: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 6: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 6 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> If line 2 was 0: +*> +*> line 7-EOF: Precomputed examples are tested. +*> +*> remaining lines : Each example is stored on 3+2*N*N lines, where N is +*> its dimension. The first line contains the dimension (a +*> single integer). The next N*N lines contain the matrix A, one +*> element per line. The next N*N lines contain the matrix B. +*> The next line contains the reciprocals of the eigenvalue +*> condition numbers. The last line contains the reciprocals of +*> the eigenvector condition numbers. The end of data is +*> indicated by dimension N=0. Even if no data is to be tested, +*> there must be at least one line containing N=0. +*> +*>----------------------------------------------------------------------- +*> +*> ZHB input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NK, INTEGER +*> Number of values of K. +*> +*> line 5: KVAL, INTEGER array, dimension (NK) +*> The values for the matrix dimension K. +*> +*> line 6: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 7 was 2: +*> +*> line 8: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 8-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'ZHB'. +*> +*>----------------------------------------------------------------------- +*> +*> ZBB input file: +*> +*> line 2: NN, INTEGER +*> Number of values of M and N. +*> +*> line 3: MVAL, INTEGER array, dimension (NN) +*> The values for the matrix row dimension M. +*> +*> line 4: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix column dimension N. +*> +*> line 4: NK, INTEGER +*> Number of values of K. +*> +*> line 5: KVAL, INTEGER array, dimension (NK) +*> The values for the matrix bandwidth K. +*> +*> line 6: NPARMS, INTEGER +*> Number of values of the parameter NRHS +*> +*> line 7: NSVAL, INTEGER array, dimension (NPARMS) +*> The values for the number of right hand sides NRHS. +*> +*> line 8: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 9: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 9 was 2: +*> +*> line 10: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 10-EOF: Lines specifying matrix types, as for SVD. +*> The 3-character path name is 'ZBB'. +*> +*>----------------------------------------------------------------------- +*> +*> ZEC input file: +*> +*> line 2: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> lines 3-EOF: +*> +*> Input for testing the eigencondition routines consists of a set of +*> specially constructed test cases and their solutions. The data +*> format is not intended to be modified by the user. +*> +*>----------------------------------------------------------------------- +*> +*> ZBL and ZBK input files: +*> +*> line 1: 'ZBL' in columns 1-3 to test CGEBAL, or 'ZBK' in +*> columns 1-3 to test CGEBAK. +*> +*> The remaining lines consist of specially constructed test cases. +*> +*>----------------------------------------------------------------------- +*> +*> ZGL and ZGK input files: +*> +*> line 1: 'ZGL' in columns 1-3 to test ZGGBAL, or 'ZGK' in +*> columns 1-3 to test ZGGBAK. +*> +*> The remaining lines consist of specially constructed test cases. +*> +*>----------------------------------------------------------------------- +*> +*> GLM data file: +*> +*> line 1: 'GLM' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M (row dimension). +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P (row dimension). +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N (column dimension), note M <= N <= M+P. +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GLM' for the generalized +*> linear regression model routines. +*> +*>----------------------------------------------------------------------- +*> +*> GQR data file: +*> +*> line 1: 'GQR' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M. +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P. +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N. +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GQR' for the generalized +*> QR and RQ routines. +*> +*>----------------------------------------------------------------------- +*> +*> GSV data file: +*> +*> line 1: 'GSV' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M (row dimension). +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P (row dimension). +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N (column dimension). +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GSV' for the generalized +*> SVD routines. +*> +*>----------------------------------------------------------------------- +*> +*> CSD data file: +*> +*> line 1: 'CSD' in columns 1 to 3. +*> +*> line 2: NM, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NM) +*> Values of M (row and column dimension of orthogonal matrix). +*> +*> line 4: PVAL, INTEGER array, dimension(NM) +*> Values of P (row dimension of top-left block). +*> +*> line 5: NVAL, INTEGER array, dimension(NM) +*> Values of N (column dimension of top-left block). +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'CSD' for the CSD routine. +*> +*>----------------------------------------------------------------------- +*> +*> LSE data file: +*> +*> line 1: 'LSE' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M. +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P. +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N, note P <= N <= P+M. +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GSV' for the generalized +*> SVD routines. +*> +*>----------------------------------------------------------------------- +*> +*> NMAX is currently set to 132 and must be at least 12 for some of the +*> precomputed examples, and LWORK = NMAX*(5*NMAX+20) in the parameter +*> statements below. For SVD, we assume NRHS may be as big as N. The +*> parameter NEED is set to 14 to allow for 14 N-by-N matrices for ZGG. +*> \endverbatim +* +* Arguments: +* ========== +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16_eig +* +* ===================================================================== + PROGRAM ZCHKEE +* +#if defined(_OPENMP) + use omp_lib +#endif +* +* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NMAX + PARAMETER ( NMAX = 132 ) + INTEGER NCMAX + PARAMETER ( NCMAX = 20 ) + INTEGER NEED + PARAMETER ( NEED = 14 ) + INTEGER LWORK + PARAMETER ( LWORK = NMAX*( 5*NMAX+20 ) ) + INTEGER LIWORK + PARAMETER ( LIWORK = NMAX*( NMAX+20 ) ) + INTEGER MAXIN + PARAMETER ( MAXIN = 20 ) + INTEGER MAXT + PARAMETER ( MAXT = 30 ) + INTEGER NIN, NOUT + PARAMETER ( NIN = 5, NOUT = 6 ) +* .. +* .. Local Scalars .. + LOGICAL ZBK, ZBL, ZES, ZEV, ZGK, ZGL, ZGS, ZGV, ZGX, + $ ZSX, ZVX, ZXV, CSD, FATAL, GLM, GQR, GSV, LSE, + $ NEP, SEP, SVD, TSTCHK, TSTDIF, TSTDRV, TSTERR, + $ ZBB, ZGG, ZHB + CHARACTER C1 + CHARACTER*3 C3, PATH + CHARACTER*32 VNAME + CHARACTER*10 INTSTR + CHARACTER*80 LINE + INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD, + $ NK, NN, NPARMS, NRHS, NTYPES, + $ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS + DOUBLE PRECISION EPS, S1, S2, THRESH, THRSHN +* .. +* .. Local Arrays .. + LOGICAL DOTYPE( MAXT ), LOGWRK( NMAX ) + INTEGER IOLDSD( 4 ), ISEED( 4 ), IWORK( LIWORK ), + $ KVAL( MAXIN ), MVAL( MAXIN ), MXBVAL( MAXIN ), + $ NBCOL( MAXIN ), NBMIN( MAXIN ), NBVAL( MAXIN ), + $ NSVAL( MAXIN ), NVAL( MAXIN ), NXVAL( MAXIN ), + $ PVAL( MAXIN ) + INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ), + $ ISHFTS( MAXIN ), IACC22( MAXIN ) + DOUBLE PRECISION ALPHA( NMAX ), BETA( NMAX ), DR( NMAX, 12 ), + $ RESULT( 500 ) + COMPLEX*16 DC( NMAX, 6 ), TAUA( NMAX ), TAUB( NMAX ), + $ X( 5*NMAX ) +* .. +* .. Allocatable Arrays .. + INTEGER AllocateStatus + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: RWORK, S + COMPLEX*16, DIMENSION(:), ALLOCATABLE :: WORK + COMPLEX*16, DIMENSION(:,:), ALLOCATABLE :: A, B, C +* .. +* .. External Functions .. + LOGICAL LSAMEN + DOUBLE PRECISION DLAMCH, DSECND + EXTERNAL LSAMEN, DLAMCH, DSECND +* .. +* .. External Subroutines .. + EXTERNAL ALAREQ, XLAENV, ZCHKBB, ZCHKBD, ZCHKBK, ZCHKBL, + $ ZCHKEC, ZCHKGG, ZCHKGK, ZCHKGL, ZCHKHB, ZCHKHS, + $ ZCHKST, ZCKCSD, ZCKGLM, ZCKGQR, ZCKGSV, ZCKLSE, + $ ZDRGES, ZDRGEV, ZDRGSX, ZDRGVX, ZDRVBD, ZDRVES, + $ ZDRVEV, ZDRVSG, ZDRVST, ZDRVSX, ZDRVVX, + $ ZERRBD, ZERRED, ZERRGG, ZERRHS, ZERRST, ILAVER, + $ ZDRGES3, ZDRGEV3, + $ ZCHKST2STG, ZDRVST2STG, ZCHKHB2STG +* .. +* .. Intrinsic Functions .. + INTRINSIC LEN, MIN +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, MAXB, NPROC, NSHIFT, NUNIT, SELDIM, + $ SELOPT +* .. +* .. Arrays in Common .. + LOGICAL SELVAL( 20 ) + INTEGER IPARMS( 100 ) + DOUBLE PRECISION SELWI( 20 ), SELWR( 20 ) +* .. +* .. Common blocks .. + COMMON / CENVIR / NPROC, NSHIFT, MAXB + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT + COMMON / SSLCT / SELOPT, SELDIM, SELVAL, SELWR, SELWI + COMMON / CLAENV / IPARMS +* .. +* .. Data statements .. + DATA INTSTR / '0123456789' / + DATA IOLDSD / 0, 0, 0, 1 / +* .. +* .. Allocate memory dynamically .. +* + ALLOCATE ( S(NMAX*NMAX), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( RWORK(LWORK), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( WORK(LWORK), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" +* .. +* .. Executable Statements .. +* + A = 0.0 + B = 0.0 + C = 0.0 + DC = 0.0 + S1 = DSECND( ) + FATAL = .FALSE. + NUNIT = NOUT +* +* Return to here to read multiple sets of data +* + 10 CONTINUE +* +* Read the first line and set the 3-character test path +* + READ( NIN, FMT = '(A80)', END = 380 )LINE + PATH = LINE( 1: 3 ) + NEP = LSAMEN( 3, PATH, 'NEP' ) .OR. LSAMEN( 3, PATH, 'ZHS' ) + SEP = LSAMEN( 3, PATH, 'SEP' ) .OR. LSAMEN( 3, PATH, 'ZST' ) .OR. + $ LSAMEN( 3, PATH, 'ZSG' ) .OR. LSAMEN( 3, PATH, 'SE2' ) + SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'ZBD' ) + ZEV = LSAMEN( 3, PATH, 'ZEV' ) + ZES = LSAMEN( 3, PATH, 'ZES' ) + ZVX = LSAMEN( 3, PATH, 'ZVX' ) + ZSX = LSAMEN( 3, PATH, 'ZSX' ) + ZGG = LSAMEN( 3, PATH, 'ZGG' ) + ZGS = LSAMEN( 3, PATH, 'ZGS' ) + ZGX = LSAMEN( 3, PATH, 'ZGX' ) + ZGV = LSAMEN( 3, PATH, 'ZGV' ) + ZXV = LSAMEN( 3, PATH, 'ZXV' ) + ZHB = LSAMEN( 3, PATH, 'ZHB' ) + ZBB = LSAMEN( 3, PATH, 'ZBB' ) + GLM = LSAMEN( 3, PATH, 'GLM' ) + GQR = LSAMEN( 3, PATH, 'GQR' ) .OR. LSAMEN( 3, PATH, 'GRQ' ) + GSV = LSAMEN( 3, PATH, 'GSV' ) + CSD = LSAMEN( 3, PATH, 'CSD' ) + LSE = LSAMEN( 3, PATH, 'LSE' ) + ZBL = LSAMEN( 3, PATH, 'ZBL' ) + ZBK = LSAMEN( 3, PATH, 'ZBK' ) + ZGL = LSAMEN( 3, PATH, 'ZGL' ) + ZGK = LSAMEN( 3, PATH, 'ZGK' ) +* +* Report values of parameters. +* + IF( PATH.EQ.' ' ) THEN + GO TO 10 + ELSE IF( NEP ) THEN + WRITE( NOUT, FMT = 9987 ) + ELSE IF( SEP ) THEN + WRITE( NOUT, FMT = 9986 ) + ELSE IF( SVD ) THEN + WRITE( NOUT, FMT = 9985 ) + ELSE IF( ZEV ) THEN + WRITE( NOUT, FMT = 9979 ) + ELSE IF( ZES ) THEN + WRITE( NOUT, FMT = 9978 ) + ELSE IF( ZVX ) THEN + WRITE( NOUT, FMT = 9977 ) + ELSE IF( ZSX ) THEN + WRITE( NOUT, FMT = 9976 ) + ELSE IF( ZGG ) THEN + WRITE( NOUT, FMT = 9975 ) + ELSE IF( ZGS ) THEN + WRITE( NOUT, FMT = 9964 ) + ELSE IF( ZGX ) THEN + WRITE( NOUT, FMT = 9965 ) + ELSE IF( ZGV ) THEN + WRITE( NOUT, FMT = 9963 ) + ELSE IF( ZXV ) THEN + WRITE( NOUT, FMT = 9962 ) + ELSE IF( ZHB ) THEN + WRITE( NOUT, FMT = 9974 ) + ELSE IF( ZBB ) THEN + WRITE( NOUT, FMT = 9967 ) + ELSE IF( GLM ) THEN + WRITE( NOUT, FMT = 9971 ) + ELSE IF( GQR ) THEN + WRITE( NOUT, FMT = 9970 ) + ELSE IF( GSV ) THEN + WRITE( NOUT, FMT = 9969 ) + ELSE IF( CSD ) THEN + WRITE( NOUT, FMT = 9960 ) + ELSE IF( LSE ) THEN + WRITE( NOUT, FMT = 9968 ) + ELSE IF( ZBL ) THEN +* +* ZGEBAL: Balancing +* + CALL ZCHKBL( NIN, NOUT ) + GO TO 380 + ELSE IF( ZBK ) THEN +* +* ZGEBAK: Back transformation +* + CALL ZCHKBK( NIN, NOUT ) + GO TO 380 + ELSE IF( ZGL ) THEN +* +* ZGGBAL: Balancing +* + CALL ZCHKGL( NIN, NOUT ) + GO TO 380 + ELSE IF( ZGK ) THEN +* +* ZGGBAK: Back transformation +* + CALL ZCHKGK( NIN, NOUT ) + GO TO 380 + ELSE IF( LSAMEN( 3, PATH, 'ZEC' ) ) THEN +* +* ZEC: Eigencondition estimation +* + READ( NIN, FMT = * )THRESH + CALL XLAENV( 1, 1 ) + CALL XLAENV( 12, 1 ) + TSTERR = .TRUE. + CALL ZCHKEC( THRESH, TSTERR, NIN, NOUT ) + GO TO 380 + ELSE + WRITE( NOUT, FMT = 9992 )PATH + GO TO 380 + END IF + CALL ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) + WRITE( NOUT, FMT = 9972 ) VERS_MAJOR, VERS_MINOR, VERS_PATCH + WRITE( NOUT, FMT = 9984 ) +* +* Read the number of values of M, P, and N. +* + READ( NIN, FMT = * )NN + IF( NN.LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NN ', NN, 1 + NN = 0 + FATAL = .TRUE. + ELSE IF( NN.GT.MAXIN ) THEN + WRITE( NOUT, FMT = 9988 )' NN ', NN, MAXIN + NN = 0 + FATAL = .TRUE. + END IF +* +* Read the values of M +* + IF( .NOT.( ZGX .OR. ZXV ) ) THEN + READ( NIN, FMT = * )( MVAL( I ), I = 1, NN ) + IF( SVD ) THEN + VNAME = ' M ' + ELSE + VNAME = ' N ' + END IF + DO 20 I = 1, NN + IF( MVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )VNAME, MVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( MVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )VNAME, MVAL( I ), NMAX + FATAL = .TRUE. + END IF + 20 CONTINUE + WRITE( NOUT, FMT = 9983 )'M: ', ( MVAL( I ), I = 1, NN ) + END IF +* +* Read the values of P +* + IF( GLM .OR. GQR .OR. GSV .OR. CSD .OR. LSE ) THEN + READ( NIN, FMT = * )( PVAL( I ), I = 1, NN ) + DO 30 I = 1, NN + IF( PVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' P ', PVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( PVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' P ', PVAL( I ), NMAX + FATAL = .TRUE. + END IF + 30 CONTINUE + WRITE( NOUT, FMT = 9983 )'P: ', ( PVAL( I ), I = 1, NN ) + END IF +* +* Read the values of N +* + IF( SVD .OR. ZBB .OR. GLM .OR. GQR .OR. GSV .OR. CSD .OR. + $ LSE ) THEN + READ( NIN, FMT = * )( NVAL( I ), I = 1, NN ) + DO 40 I = 1, NN + IF( NVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' N ', NVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' N ', NVAL( I ), NMAX + FATAL = .TRUE. + END IF + 40 CONTINUE + ELSE + DO 50 I = 1, NN + NVAL( I ) = MVAL( I ) + 50 CONTINUE + END IF + IF( .NOT.( ZGX .OR. ZXV ) ) THEN + WRITE( NOUT, FMT = 9983 )'N: ', ( NVAL( I ), I = 1, NN ) + ELSE + WRITE( NOUT, FMT = 9983 )'N: ', NN + END IF +* +* Read the number of values of K, followed by the values of K +* + IF( ZHB .OR. ZBB ) THEN + READ( NIN, FMT = * )NK + READ( NIN, FMT = * )( KVAL( I ), I = 1, NK ) + DO 60 I = 1, NK + IF( KVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' K ', KVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( KVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' K ', KVAL( I ), NMAX + FATAL = .TRUE. + END IF + 60 CONTINUE + WRITE( NOUT, FMT = 9983 )'K: ', ( KVAL( I ), I = 1, NK ) + END IF +* + IF( ZEV .OR. ZES .OR. ZVX .OR. ZSX ) THEN +* +* For the nonsymmetric QR driver routines, only one set of +* parameters is allowed. +* + READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), + $ INMIN( 1 ), INWIN( 1 ), INIBL(1), ISHFTS(1), IACC22(1) + IF( NBVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NBMIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NXVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( INMIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( INWIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( INIBL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( ISHFTS( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( 1 ), 1 + FATAL = .TRUE. + ELSE IF( IACC22( 1 ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( 1 ), 0 + FATAL = .TRUE. + END IF + CALL XLAENV( 1, NBVAL( 1 ) ) + CALL XLAENV( 2, NBMIN( 1 ) ) + CALL XLAENV( 3, NXVAL( 1 ) ) + CALL XLAENV(12, MAX( 11, INMIN( 1 ) ) ) + CALL XLAENV(13, INWIN( 1 ) ) + CALL XLAENV(14, INIBL( 1 ) ) + CALL XLAENV(15, ISHFTS( 1 ) ) + CALL XLAENV(16, IACC22( 1 ) ) + WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) + WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'INMIN: ', INMIN( 1 ) + WRITE( NOUT, FMT = 9983 )'INWIN: ', INWIN( 1 ) + WRITE( NOUT, FMT = 9983 )'INIBL: ', INIBL( 1 ) + WRITE( NOUT, FMT = 9983 )'ISHFTS: ', ISHFTS( 1 ) + WRITE( NOUT, FMT = 9983 )'IACC22: ', IACC22( 1 ) +* + ELSE IF( ZGS .OR. ZGX .OR. ZGV .OR. ZXV ) THEN +* +* For the nonsymmetric generalized driver routines, only one set of +* parameters is allowed. +* + READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), + $ NSVAL( 1 ), MXBVAL( 1 ) + IF( NBVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NBMIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NXVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NSVAL( 1 ).LT.2 ) THEN + WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( 1 ), 2 + FATAL = .TRUE. + ELSE IF( MXBVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( 1 ), 1 + FATAL = .TRUE. + END IF + CALL XLAENV( 1, NBVAL( 1 ) ) + CALL XLAENV( 2, NBMIN( 1 ) ) + CALL XLAENV( 3, NXVAL( 1 ) ) + CALL XLAENV( 4, NSVAL( 1 ) ) + CALL XLAENV( 8, MXBVAL( 1 ) ) + WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) + WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'NS: ', NSVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'MAXB: ', MXBVAL( 1 ) + ELSE IF( .NOT.ZHB .AND. .NOT.GLM .AND. .NOT.GQR .AND. .NOT. + $ GSV .AND. .NOT.CSD .AND. .NOT.LSE ) THEN +* +* For the other paths, the number of parameters can be varied +* from the input file. Read the number of parameter values. +* + READ( NIN, FMT = * )NPARMS + IF( NPARMS.LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )'NPARMS', NPARMS, 1 + NPARMS = 0 + FATAL = .TRUE. + ELSE IF( NPARMS.GT.MAXIN ) THEN + WRITE( NOUT, FMT = 9988 )'NPARMS', NPARMS, MAXIN + NPARMS = 0 + FATAL = .TRUE. + END IF +* +* Read the values of NB +* + IF( .NOT.ZBB ) THEN + READ( NIN, FMT = * )( NBVAL( I ), I = 1, NPARMS ) + DO 70 I = 1, NPARMS + IF( NBVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NBVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' NB ', NBVAL( I ), NMAX + FATAL = .TRUE. + END IF + 70 CONTINUE + WRITE( NOUT, FMT = 9983 )'NB: ', + $ ( NBVAL( I ), I = 1, NPARMS ) + END IF +* +* Read the values of NBMIN +* + IF( NEP .OR. SEP .OR. SVD .OR. ZGG ) THEN + READ( NIN, FMT = * )( NBMIN( I ), I = 1, NPARMS ) + DO 80 I = 1, NPARMS + IF( NBMIN( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( I ), 0 + FATAL = .TRUE. + ELSE IF( NBMIN( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )'NBMIN ', NBMIN( I ), NMAX + FATAL = .TRUE. + END IF + 80 CONTINUE + WRITE( NOUT, FMT = 9983 )'NBMIN:', + $ ( NBMIN( I ), I = 1, NPARMS ) + ELSE + DO 90 I = 1, NPARMS + NBMIN( I ) = 1 + 90 CONTINUE + END IF +* +* Read the values of NX +* + IF( NEP .OR. SEP .OR. SVD ) THEN + READ( NIN, FMT = * )( NXVAL( I ), I = 1, NPARMS ) + DO 100 I = 1, NPARMS + IF( NXVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NXVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' NX ', NXVAL( I ), NMAX + FATAL = .TRUE. + END IF + 100 CONTINUE + WRITE( NOUT, FMT = 9983 )'NX: ', + $ ( NXVAL( I ), I = 1, NPARMS ) + ELSE + DO 110 I = 1, NPARMS + NXVAL( I ) = 1 + 110 CONTINUE + END IF +* +* Read the values of NSHIFT (if ZGG) or NRHS (if SVD +* or ZBB). +* + IF( SVD .OR. ZBB .OR. ZGG ) THEN + READ( NIN, FMT = * )( NSVAL( I ), I = 1, NPARMS ) + DO 120 I = 1, NPARMS + IF( NSVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NSVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' NS ', NSVAL( I ), NMAX + FATAL = .TRUE. + END IF + 120 CONTINUE + WRITE( NOUT, FMT = 9983 )'NS: ', + $ ( NSVAL( I ), I = 1, NPARMS ) + ELSE + DO 130 I = 1, NPARMS + NSVAL( I ) = 1 + 130 CONTINUE + END IF +* +* Read the values for MAXB. +* + IF( ZGG ) THEN + READ( NIN, FMT = * )( MXBVAL( I ), I = 1, NPARMS ) + DO 140 I = 1, NPARMS + IF( MXBVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( MXBVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' MAXB ', MXBVAL( I ), NMAX + FATAL = .TRUE. + END IF + 140 CONTINUE + WRITE( NOUT, FMT = 9983 )'MAXB: ', + $ ( MXBVAL( I ), I = 1, NPARMS ) + ELSE + DO 150 I = 1, NPARMS + MXBVAL( I ) = 1 + 150 CONTINUE + END IF +* +* Read the values for INMIN. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( INMIN( I ), I = 1, NPARMS ) + DO 540 I = 1, NPARMS + IF( INMIN( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( I ), 0 + FATAL = .TRUE. + END IF + 540 CONTINUE + WRITE( NOUT, FMT = 9983 )'INMIN: ', + $ ( INMIN( I ), I = 1, NPARMS ) + ELSE + DO 550 I = 1, NPARMS + INMIN( I ) = 1 + 550 CONTINUE + END IF +* +* Read the values for INWIN. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( INWIN( I ), I = 1, NPARMS ) + DO 560 I = 1, NPARMS + IF( INWIN( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( I ), 0 + FATAL = .TRUE. + END IF + 560 CONTINUE + WRITE( NOUT, FMT = 9983 )'INWIN: ', + $ ( INWIN( I ), I = 1, NPARMS ) + ELSE + DO 570 I = 1, NPARMS + INWIN( I ) = 1 + 570 CONTINUE + END IF +* +* Read the values for INIBL. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( INIBL( I ), I = 1, NPARMS ) + DO 580 I = 1, NPARMS + IF( INIBL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( I ), 0 + FATAL = .TRUE. + END IF + 580 CONTINUE + WRITE( NOUT, FMT = 9983 )'INIBL: ', + $ ( INIBL( I ), I = 1, NPARMS ) + ELSE + DO 590 I = 1, NPARMS + INIBL( I ) = 1 + 590 CONTINUE + END IF +* +* Read the values for ISHFTS. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( ISHFTS( I ), I = 1, NPARMS ) + DO 600 I = 1, NPARMS + IF( ISHFTS( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( I ), 0 + FATAL = .TRUE. + END IF + 600 CONTINUE + WRITE( NOUT, FMT = 9983 )'ISHFTS: ', + $ ( ISHFTS( I ), I = 1, NPARMS ) + ELSE + DO 610 I = 1, NPARMS + ISHFTS( I ) = 1 + 610 CONTINUE + END IF +* +* Read the values for IACC22. +* + IF( NEP .OR. ZGG ) THEN + READ( NIN, FMT = * )( IACC22( I ), I = 1, NPARMS ) + DO 620 I = 1, NPARMS + IF( IACC22( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( I ), 0 + FATAL = .TRUE. + END IF + 620 CONTINUE + WRITE( NOUT, FMT = 9983 )'IACC22: ', + $ ( IACC22( I ), I = 1, NPARMS ) + ELSE + DO 630 I = 1, NPARMS + IACC22( I ) = 1 + 630 CONTINUE + END IF +* +* Read the values for NBCOL. +* + IF( ZGG ) THEN + READ( NIN, FMT = * )( NBCOL( I ), I = 1, NPARMS ) + DO 160 I = 1, NPARMS + IF( NBCOL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )'NBCOL ', NBCOL( I ), 0 + FATAL = .TRUE. + ELSE IF( NBCOL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )'NBCOL ', NBCOL( I ), NMAX + FATAL = .TRUE. + END IF + 160 CONTINUE + WRITE( NOUT, FMT = 9983 )'NBCOL:', + $ ( NBCOL( I ), I = 1, NPARMS ) + ELSE + DO 170 I = 1, NPARMS + NBCOL( I ) = 1 + 170 CONTINUE + END IF + END IF +* +* Calculate and print the machine dependent constants. +* + WRITE( NOUT, FMT = * ) + EPS = DLAMCH( 'Underflow threshold' ) + WRITE( NOUT, FMT = 9981 )'underflow', EPS + EPS = DLAMCH( 'Overflow threshold' ) + WRITE( NOUT, FMT = 9981 )'overflow ', EPS + EPS = DLAMCH( 'Epsilon' ) + WRITE( NOUT, FMT = 9981 )'precision', EPS +* +* Read the threshold value for the test ratios. +* + READ( NIN, FMT = * )THRESH + WRITE( NOUT, FMT = 9982 )THRESH + IF( SEP .OR. SVD .OR. ZGG ) THEN +* +* Read the flag that indicates whether to test LAPACK routines. +* + READ( NIN, FMT = * )TSTCHK +* +* Read the flag that indicates whether to test driver routines. +* + READ( NIN, FMT = * )TSTDRV + END IF +* +* Read the flag that indicates whether to test the error exits. +* + READ( NIN, FMT = * )TSTERR +* +* Read the code describing how to set the random number seed. +* + READ( NIN, FMT = * )NEWSD +* +* If NEWSD = 2, read another line with 4 integers for the seed. +* + IF( NEWSD.EQ.2 ) + $ READ( NIN, FMT = * )( IOLDSD( I ), I = 1, 4 ) +* + DO 180 I = 1, 4 + ISEED( I ) = IOLDSD( I ) + 180 CONTINUE +* + IF( FATAL ) THEN + WRITE( NOUT, FMT = 9999 ) + STOP + END IF +* +* Read the input lines indicating the test path and its parameters. +* The first three characters indicate the test path, and the number +* of test matrix types must be the first nonblank item in columns +* 4-80. +* + 190 CONTINUE +* + IF( .NOT.( ZGX .OR. ZXV ) ) THEN +* + 200 CONTINUE + READ( NIN, FMT = '(A80)', END = 380 )LINE + C3 = LINE( 1: 3 ) + LENP = LEN( LINE ) + I = 3 + ITMP = 0 + I1 = 0 + 210 CONTINUE + I = I + 1 + IF( I.GT.LENP ) THEN + IF( I1.GT.0 ) THEN + GO TO 240 + ELSE + NTYPES = MAXT + GO TO 240 + END IF + END IF + IF( LINE( I: I ).NE.' ' .AND. LINE( I: I ).NE.',' ) THEN + I1 = I + C1 = LINE( I1: I1 ) +* +* Check that a valid integer was read +* + DO 220 K = 1, 10 + IF( C1.EQ.INTSTR( K: K ) ) THEN + IC = K - 1 + GO TO 230 + END IF + 220 CONTINUE + WRITE( NOUT, FMT = 9991 )I, LINE + GO TO 200 + 230 CONTINUE + ITMP = 10*ITMP + IC + GO TO 210 + ELSE IF( I1.GT.0 ) THEN + GO TO 240 + ELSE + GO TO 210 + END IF + 240 CONTINUE + NTYPES = ITMP +* +* Skip the tests if NTYPES is <= 0. +* + IF( .NOT.( ZEV .OR. ZES .OR. ZVX .OR. ZSX .OR. ZGV .OR. + $ ZGS ) .AND. NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + GO TO 200 + END IF +* + ELSE + IF( ZGX ) + $ C3 = 'ZGX' + IF( ZXV ) + $ C3 = 'ZXV' + END IF +* +* Reset the random number seed. +* + IF( NEWSD.EQ.0 ) THEN + DO 250 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 250 CONTINUE + END IF +* + IF( LSAMEN( 3, C3, 'ZHS' ) .OR. LSAMEN( 3, C3, 'NEP' ) ) THEN +* +* ------------------------------------- +* NEP: Nonsymmetric Eigenvalue Problem +* ------------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* NS = number of shifts +* MAXB = minimum submatrix size +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL ZERRHS( 'ZHSEQR', NOUT ) + DO 270 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) + CALL XLAENV(12, MAX( 11, INMIN( I ) ) ) + CALL XLAENV(13, INWIN( I ) ) + CALL XLAENV(14, INIBL( I ) ) + CALL XLAENV(15, ISHFTS( I ) ) + CALL XLAENV(16, IACC22( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 260 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 260 CONTINUE + END IF + WRITE( NOUT, FMT = 9961 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ), MAX( 11, INMIN(I)), + $ INWIN( I ), INIBL( I ), ISHFTS( I ), IACC22( I ) + CALL ZCHKHS( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 5 ), NMAX, A( 1, 6 ), + $ A( 1, 7 ), DC( 1, 1 ), DC( 1, 2 ), A( 1, 8 ), + $ A( 1, 9 ), A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), + $ DC( 1, 3 ), WORK, LWORK, RWORK, IWORK, LOGWRK, + $ RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZCHKHS', INFO + 270 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'ZST' ) .OR. LSAMEN( 3, C3, 'SEP' ) + $ .OR. LSAMEN( 3, C3, 'SE2' ) ) THEN +* +* ---------------------------------- +* SEP: Symmetric Eigenvalue Problem +* ---------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 1, 1 ) + CALL XLAENV( 9, 25 ) + IF( TSTERR ) THEN +#if defined(_OPENMP) + N_THREADS = OMP_GET_NUM_THREADS() + CALL OMP_SET_NUM_THREADS(1) +#endif + CALL ZERRST( 'ZST', NOUT ) +#if defined(_OPENMP) + CALL OMP_SET_NUM_THREADS(N_THREADS) +#endif + END IF + DO 290 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 280 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 280 CONTINUE + END IF + WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ) + IF( TSTCHK ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL ZCHKST2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), + $ DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), + $ DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), + $ DR( 1, 7 ), DR( 1, 8 ), DR( 1, 9 ), + $ DR( 1, 10 ), DR( 1, 11 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), DC( 1, 1 ), A( 1, 6 ), + $ WORK, LWORK, RWORK, LWORK, IWORK, LIWORK, + $ RESULT, INFO ) + ELSE + CALL ZCHKST( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), + $ DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), + $ DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), + $ DR( 1, 7 ), DR( 1, 8 ), DR( 1, 9 ), + $ DR( 1, 10 ), DR( 1, 11 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), DC( 1, 1 ), A( 1, 6 ), + $ WORK, LWORK, RWORK, LWORK, IWORK, LIWORK, + $ RESULT, INFO ) + ENDIF + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZCHKST', INFO + END IF + IF( TSTDRV ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL ZDRVST2STG( NN, NVAL, 18, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, DR( 1, 3 ), DR( 1, 4 ), + $ DR( 1, 5 ), DR( 1, 8 ), DR( 1, 9 ), + $ DR( 1, 10 ), A( 1, 2 ), NMAX, A( 1, 3 ), + $ DC( 1, 1 ), A( 1, 4 ), WORK, LWORK, RWORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ELSE + CALL ZDRVST( NN, NVAL, 18, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, DR( 1, 3 ), DR( 1, 4 ), + $ DR( 1, 5 ), DR( 1, 8 ), DR( 1, 9 ), + $ DR( 1, 10 ), A( 1, 2 ), NMAX, A( 1, 3 ), + $ DC( 1, 1 ), A( 1, 4 ), WORK, LWORK, RWORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ENDIF + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZDRVST', INFO + END IF + 290 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'ZSG' ) ) THEN +* +* ---------------------------------------------- +* ZSG: Hermitian Generalized Eigenvalue Problem +* ---------------------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 9, 25 ) + DO 310 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 300 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 300 CONTINUE + END IF + WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ) + IF( TSTCHK ) THEN +* CALL ZDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, +* $ DR( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), +* $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, +* $ LWORK, RWORK, LWORK, IWORK, LIWORK, RESULT, +* $ INFO ) + CALL ZDRVSG2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, + $ DR( 1, 3 ), DR( 1, 4 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), A( 1, 6 ), + $ A( 1, 7 ), WORK, LWORK, RWORK, LWORK, + $ IWORK, LIWORK, RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZDRVSG', INFO + END IF + 310 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'ZBD' ) .OR. LSAMEN( 3, C3, 'SVD' ) ) THEN +* +* ---------------------------------- +* SVD: Singular Value Decomposition +* ---------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* NRHS = number of right hand sides +* + MAXTYP = 16 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 9, 25 ) +* +* Test the error exits +* + CALL XLAENV( 1, 1 ) + IF( TSTERR .AND. TSTCHK ) + $ CALL ZERRBD( 'ZBD', NOUT ) + IF( TSTERR .AND. TSTDRV ) + $ CALL ZERRED( 'ZBD', NOUT ) +* + DO 330 I = 1, NPARMS + NRHS = NSVAL( I ) + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) + IF( NEWSD.EQ.0 ) THEN + DO 320 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 320 CONTINUE + END IF + WRITE( NOUT, FMT = 9995 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ), NRHS + IF( TSTCHK ) THEN + CALL ZCHKBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, NRHS, ISEED, + $ THRESH, A( 1, 1 ), NMAX, DR( 1, 1 ), + $ DR( 1, 2 ), DR( 1, 3 ), DR( 1, 4 ), + $ A( 1, 2 ), NMAX, A( 1, 3 ), A( 1, 4 ), + $ A( 1, 5 ), NMAX, A( 1, 6 ), NMAX, A( 1, 7 ), + $ A( 1, 8 ), WORK, LWORK, RWORK, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZCHKBD', INFO + END IF + IF( TSTDRV ) + $ CALL ZDRVBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, ISEED, + $ THRESH, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, + $ A( 1, 3 ), NMAX, A( 1, 4 ), A( 1, 5 ), + $ A( 1, 6 ), DR( 1, 1 ), DR( 1, 2 ), + $ DR( 1, 3 ), WORK, LWORK, RWORK, IWORK, NOUT, + $ INFO ) + 330 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'ZEV' ) ) THEN +* +* -------------------------------------------- +* ZEV: Nonsymmetric Eigenvalue Problem Driver +* ZGEEV (eigenvalues and eigenvectors) +* -------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL ZERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL ZDRVEV( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), DC( 1, 1 ), + $ DC( 1, 2 ), A( 1, 3 ), NMAX, A( 1, 4 ), NMAX, + $ A( 1, 5 ), NMAX, RESULT, WORK, LWORK, RWORK, + $ IWORK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZGEEV', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'ZES' ) ) THEN +* +* -------------------------------------------- +* ZES: Nonsymmetric Eigenvalue Problem Driver +* ZGEES (Schur form) +* -------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL ZERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL ZDRVES( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ DC( 1, 1 ), DC( 1, 2 ), A( 1, 4 ), NMAX, + $ RESULT, WORK, LWORK, RWORK, IWORK, LOGWRK, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZGEES', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'ZVX' ) ) THEN +* +* -------------------------------------------------------------- +* ZVX: Nonsymmetric Eigenvalue Problem Expert Driver +* ZGEEVX (eigenvalues, eigenvectors and condition numbers) +* -------------------------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL ZERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL ZDRVVX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), DC( 1, 1 ), + $ DC( 1, 2 ), A( 1, 3 ), NMAX, A( 1, 4 ), NMAX, + $ A( 1, 5 ), NMAX, DR( 1, 1 ), DR( 1, 2 ), + $ DR( 1, 3 ), DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), + $ DR( 1, 7 ), DR( 1, 8 ), RESULT, WORK, LWORK, + $ RWORK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZGEEVX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'ZSX' ) ) THEN +* +* --------------------------------------------------- +* ZSX: Nonsymmetric Eigenvalue Problem Expert Driver +* ZGEESX (Schur form and condition numbers) +* --------------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL ZERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL ZDRVSX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ DC( 1, 1 ), DC( 1, 2 ), DC( 1, 3 ), A( 1, 4 ), + $ NMAX, A( 1, 5 ), RESULT, WORK, LWORK, RWORK, + $ LOGWRK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZGEESX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'ZGG' ) ) THEN +* +* ------------------------------------------------- +* ZGG: Generalized Nonsymmetric Eigenvalue Problem +* ------------------------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NS = number of shifts +* MAXB = minimum submatrix size +* IACC22: structured matrix multiply +* NBCOL = minimum column dimension for blocks +* + MAXTYP = 26 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV(1,1) + IF( TSTCHK .AND. TSTERR ) + $ CALL ZERRGG( C3, NOUT ) + DO 350 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 4, NSVAL( I ) ) + CALL XLAENV( 8, MXBVAL( I ) ) + CALL XLAENV( 16, IACC22( I ) ) + CALL XLAENV( 5, NBCOL( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 340 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 340 CONTINUE + END IF + WRITE( NOUT, FMT = 9996 )C3, NBVAL( I ), NBMIN( I ), + $ NSVAL( I ), MXBVAL( I ), IACC22( I ), NBCOL( I ) + TSTDIF = .FALSE. + THRSHN = 10.D0 + IF( TSTCHK ) THEN + CALL ZCHKGG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ TSTDIF, THRSHN, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), + $ A( 1, 6 ), A( 1, 7 ), A( 1, 8 ), A( 1, 9 ), + $ NMAX, A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), + $ DC( 1, 1 ), DC( 1, 2 ), DC( 1, 3 ), + $ DC( 1, 4 ), A( 1, 13 ), A( 1, 14 ), WORK, + $ LWORK, RWORK, LOGWRK, RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZCHKGG', INFO + END IF + 350 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'ZGS' ) ) THEN +* +* ------------------------------------------------- +* ZGS: Generalized Nonsymmetric Eigenvalue Problem +* ZGGES (Schur form) +* ------------------------------------------------- +* + MAXTYP = 26 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL ZERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL ZDRGES( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ DC( 1, 1 ), DC( 1, 2 ), WORK, LWORK, RWORK, + $ RESULT, LOGWRK, INFO ) +* + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZDRGES', INFO +* +* Blocked version +* + CALL ZDRGES3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ DC( 1, 1 ), DC( 1, 2 ), WORK, LWORK, RWORK, + $ RESULT, LOGWRK, INFO ) +* + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZDRGES3', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( ZGX ) THEN +* +* ------------------------------------------------- +* ZGX Generalized Nonsymmetric Eigenvalue Problem +* ZGGESX (Schur form and condition numbers) +* ------------------------------------------------- +* + MAXTYP = 5 + NTYPES = MAXTYP + IF( NN.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL ZERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 5, 2 ) + CALL ZDRGSX( NN, NCMAX, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), + $ A( 1, 6 ), DC( 1, 1 ), DC( 1, 2 ), C, + $ NCMAX*NCMAX, S, WORK, LWORK, RWORK, IWORK, + $ LIWORK, LOGWRK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZDRGSX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'ZGV' ) ) THEN +* +* ------------------------------------------------- +* ZGV: Generalized Nonsymmetric Eigenvalue Problem +* ZGGEV (Eigenvalue/vector form) +* ------------------------------------------------- +* + MAXTYP = 26 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL ZERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL ZDRGEV( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ A( 1, 9 ), NMAX, DC( 1, 1 ), DC( 1, 2 ), + $ DC( 1, 3 ), DC( 1, 4 ), WORK, LWORK, RWORK, + $ RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZDRGEV', INFO +* +* Blocked version +* + CALL XLAENV(16,2) + CALL ZDRGEV3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ A( 1, 9 ), NMAX, DC( 1, 1 ), DC( 1, 2 ), + $ DC( 1, 3 ), DC( 1, 4 ), WORK, LWORK, RWORK, + $ RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZDRGEV3', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( ZXV ) THEN +* +* ------------------------------------------------- +* ZXV: Generalized Nonsymmetric Eigenvalue Problem +* ZGGEVX (eigenvalue/vector with condition numbers) +* ------------------------------------------------- +* + MAXTYP = 2 + NTYPES = MAXTYP + IF( NN.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL ZERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL ZDRGVX( NN, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), DC( 1, 1 ), + $ DC( 1, 2 ), A( 1, 5 ), A( 1, 6 ), IWORK( 1 ), + $ IWORK( 2 ), DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), + $ DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), WORK, + $ LWORK, RWORK, IWORK( 3 ), LIWORK-2, RESULT, + $ LOGWRK, INFO ) +* + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZDRGVX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'ZHB' ) ) THEN +* +* ------------------------------ +* ZHB: Hermitian Band Reduction +* ------------------------------ +* + MAXTYP = 15 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + IF( TSTERR ) THEN +#if defined(_OPENMP) + N_THREADS = OMP_GET_NUM_THREADS() + CALL OMP_SET_NUM_THREADS(1) +#endif + CALL ZERRST( 'ZHB', NOUT ) +#if defined(_OPENMP) + CALL OMP_SET_NUM_THREADS(N_THREADS) +#endif + END IF +* CALL ZCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ), +* $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, +* $ INFO ) + CALL ZCHKHB2STG( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, + $ THRESH, NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), + $ DR( 1, 2 ), DR( 1, 3 ), DR( 1, 4 ), DR( 1, 5 ), + $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZCHKHB', INFO +* + ELSE IF( LSAMEN( 3, C3, 'ZBB' ) ) THEN +* +* ------------------------------ +* ZBB: General Band Reduction +* ------------------------------ +* + MAXTYP = 15 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + DO 370 I = 1, NPARMS + NRHS = NSVAL( I ) +* + IF( NEWSD.EQ.0 ) THEN + DO 360 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 360 CONTINUE + END IF + WRITE( NOUT, FMT = 9966 )C3, NRHS + CALL ZCHKBB( NN, MVAL, NVAL, NK, KVAL, MAXTYP, DOTYPE, NRHS, + $ ISEED, THRESH, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), 2*NMAX, DR( 1, 1 ), DR( 1, 2 ), + $ A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, A( 1, 6 ), + $ NMAX, A( 1, 7 ), WORK, LWORK, RWORK, RESULT, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZCHKBB', INFO + 370 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'GLM' ) ) THEN +* +* ----------------------------------------- +* GLM: Generalized Linear Regression Model +* ----------------------------------------- +* + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL ZERRGG( 'GLM', NOUT ) + CALL ZCKGLM( NN, NVAL, MVAL, PVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, + $ WORK, DR( 1, 1 ), NIN, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZCKGLM', INFO +* + ELSE IF( LSAMEN( 3, C3, 'GQR' ) ) THEN +* +* ------------------------------------------ +* GQR: Generalized QR and RQ factorizations +* ------------------------------------------ +* + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL ZERRGG( 'GQR', NOUT ) + CALL ZCKGQR( NN, MVAL, NN, PVAL, NN, NVAL, NTYPES, ISEED, + $ THRESH, NMAX, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), TAUA, B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), + $ B( 1, 4 ), B( 1, 5 ), TAUB, WORK, DR( 1, 1 ), NIN, + $ NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZCKGQR', INFO +* + ELSE IF( LSAMEN( 3, C3, 'GSV' ) ) THEN +* +* ---------------------------------------------- +* GSV: Generalized Singular Value Decomposition +* ---------------------------------------------- +* + CALL XLAENV(1,1) + IF( TSTERR ) + $ CALL ZERRGG( 'GSV', NOUT ) + CALL ZCKGSV( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), + $ A( 1, 3 ), B( 1, 3 ), A( 1, 4 ), ALPHA, BETA, + $ B( 1, 4 ), IWORK, WORK, DR( 1, 1 ), NIN, NOUT, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZCKGSV', INFO +* + ELSE IF( LSAMEN( 3, C3, 'CSD' ) ) THEN +* +* ---------------------------------------------- +* CSD: CS Decomposition +* ---------------------------------------------- +* + CALL XLAENV(1,1) + IF( TSTERR ) + $ CALL ZERRGG( 'CSD', NOUT ) + CALL ZCKCSD( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), + $ A( 1, 5 ), A( 1, 6 ), RWORK, IWORK, WORK, + $ DR( 1, 1 ), NIN, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZCKCSD', INFO +* + ELSE IF( LSAMEN( 3, C3, 'LSE' ) ) THEN +* +* -------------------------------------- +* LSE: Constrained Linear Least Squares +* -------------------------------------- +* + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL ZERRGG( 'LSE', NOUT ) + CALL ZCKLSE( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, + $ WORK, DR( 1, 1 ), NIN, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'ZCKLSE', INFO + ELSE + WRITE( NOUT, FMT = * ) + WRITE( NOUT, FMT = * ) + WRITE( NOUT, FMT = 9992 )C3 + END IF + IF( .NOT.( ZGX .OR. ZXV ) ) + $ GO TO 190 + 380 CONTINUE + WRITE( NOUT, FMT = 9994 ) + S2 = DSECND( ) + WRITE( NOUT, FMT = 9993 )S2 - S1 +* + DEALLOCATE (S, STAT = AllocateStatus) + DEALLOCATE (A, STAT = AllocateStatus) + DEALLOCATE (B, STAT = AllocateStatus) + DEALLOCATE (C, STAT = AllocateStatus) + DEALLOCATE (RWORK, STAT = AllocateStatus) + DEALLOCATE (WORK, STAT = AllocateStatus) +* + 9999 FORMAT( / ' Execution not attempted due to input errors' ) + 9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 ) + 9996 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NS =', I4, + $ ', MAXB =', I4, ', IACC22 =', I4, ', NBCOL =', I4 ) + 9995 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, + $ ', NRHS =', I4 ) + 9994 FORMAT( / / ' End of tests' ) + 9993 FORMAT( ' Total time used = ', F12.2, ' seconds', / ) + 9992 FORMAT( 1X, A3, ': Unrecognized path name' ) + 9991 FORMAT( / / ' *** Invalid integer value in column ', I2, + $ ' of input', ' line:', / A79 ) + 9990 FORMAT( / / 1X, A3, ' routines were not tested' ) + 9989 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be >=', + $ I6 ) + 9988 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be <=', + $ I6 ) + 9987 FORMAT( ' Tests of the Nonsymmetric Eigenvalue Problem routines' ) + 9986 FORMAT( ' Tests of the Hermitian Eigenvalue Problem routines' ) + 9985 FORMAT( ' Tests of the Singular Value Decomposition routines' ) + 9984 FORMAT( / ' The following parameter values will be used:' ) + 9983 FORMAT( 4X, A, 10I6, / 10X, 10I6 ) + 9982 FORMAT( / ' Routines pass computational tests if test ratio is ', + $ 'less than', F8.2, / ) + 9981 FORMAT( ' Relative machine ', A, ' is taken to be', D16.6 ) + 9980 FORMAT( ' *** Error code from ', A, ' = ', I4 ) + 9979 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', + $ / ' ZGEEV (eigenvalues and eigevectors)' ) + 9978 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', + $ / ' ZGEES (Schur form)' ) + 9977 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', + $ ' Driver', / ' ZGEEVX (eigenvalues, eigenvectors and', + $ ' condition numbers)' ) + 9976 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', + $ ' Driver', / ' ZGEESX (Schur form and condition', + $ ' numbers)' ) + 9975 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem routines' ) + 9974 FORMAT( ' Tests of ZHBTRD', / ' (reduction of a Hermitian band ', + $ 'matrix to real tridiagonal form)' ) + 9973 FORMAT( / 1X, 71( '-' ) ) + 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I1, '.', I1 ) + 9971 FORMAT( / ' Tests of the Generalized Linear Regression Model ', + $ 'routines' ) + 9970 FORMAT( / ' Tests of the Generalized QR and RQ routines' ) + 9969 FORMAT( / ' Tests of the Generalized Singular Value', + $ ' Decomposition routines' ) + 9968 FORMAT( / ' Tests of the Linear Least Squares routines' ) + 9967 FORMAT( ' Tests of ZGBBRD', / ' (reduction of a general band ', + $ 'matrix to real bidiagonal form)' ) + 9966 FORMAT( / / 1X, A3, ': NRHS =', I4 ) + 9965 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Expert Driver ZGGESX' ) + 9964 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Driver ZGGES' ) + 9963 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Driver ZGGEV' ) + 9962 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Expert Driver ZGGEVX' ) + 9961 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, + $ ', INMIN=', I4, + $ ', INWIN =', I4, ', INIBL =', I4, ', ISHFTS =', I4, + $ ', IACC22 =', I4) + 9960 FORMAT( / ' Tests of the CS Decomposition routines' ) +* +* End of ZCHKEE +* + END From a5ab891292052b5c6ea58ba47e3f58efc5043ce3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 28 Feb 2021 18:49:50 +0100 Subject: [PATCH 110/134] Add rewritten schkee.F from Reference-LAPACK PR335 --- lapack-netlib/TESTING/EIG/schkee.F | 2541 ++++++++++++++++++++++++++++ 1 file changed, 2541 insertions(+) create mode 100644 lapack-netlib/TESTING/EIG/schkee.F diff --git a/lapack-netlib/TESTING/EIG/schkee.F b/lapack-netlib/TESTING/EIG/schkee.F new file mode 100644 index 000000000..a063c18b5 --- /dev/null +++ b/lapack-netlib/TESTING/EIG/schkee.F @@ -0,0 +1,2541 @@ +*> \brief \b SCHKEE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM SCHKEE +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SCHKEE tests the REAL LAPACK subroutines for the matrix +*> eigenvalue problem. The test paths in this version are +*> +*> NEP (Nonsymmetric Eigenvalue Problem): +*> Test SGEHRD, SORGHR, SHSEQR, STREVC, SHSEIN, and SORMHR +*> +*> SEP (Symmetric Eigenvalue Problem): +*> Test SSYTRD, SORGTR, SSTEQR, SSTERF, SSTEIN, SSTEDC, +*> and drivers SSYEV(X), SSBEV(X), SSPEV(X), SSTEV(X), +*> SSYEVD, SSBEVD, SSPEVD, SSTEVD +*> +*> SVD (Singular Value Decomposition): +*> Test SGEBRD, SORGBR, SBDSQR, SBDSDC +*> and the drivers SGESVD, SGESDD +*> +*> SEV (Nonsymmetric Eigenvalue/eigenvector Driver): +*> Test SGEEV +*> +*> SES (Nonsymmetric Schur form Driver): +*> Test SGEES +*> +*> SVX (Nonsymmetric Eigenvalue/eigenvector Expert Driver): +*> Test SGEEVX +*> +*> SSX (Nonsymmetric Schur form Expert Driver): +*> Test SGEESX +*> +*> SGG (Generalized Nonsymmetric Eigenvalue Problem): +*> Test SGGHD3, SGGBAL, SGGBAK, SHGEQZ, and STGEVC +*> +*> SGS (Generalized Nonsymmetric Schur form Driver): +*> Test SGGES +*> +*> SGV (Generalized Nonsymmetric Eigenvalue/eigenvector Driver): +*> Test SGGEV +*> +*> SGX (Generalized Nonsymmetric Schur form Expert Driver): +*> Test SGGESX +*> +*> SXV (Generalized Nonsymmetric Eigenvalue/eigenvector Expert Driver): +*> Test SGGEVX +*> +*> SSG (Symmetric Generalized Eigenvalue Problem): +*> Test SSYGST, SSYGV, SSYGVD, SSYGVX, SSPGST, SSPGV, SSPGVD, +*> SSPGVX, SSBGST, SSBGV, SSBGVD, and SSBGVX +*> +*> SSB (Symmetric Band Eigenvalue Problem): +*> Test SSBTRD +*> +*> SBB (Band Singular Value Decomposition): +*> Test SGBBRD +*> +*> SEC (Eigencondition estimation): +*> Test SLALN2, SLASY2, SLAEQU, SLAEXC, STRSYL, STREXC, STRSNA, +*> STRSEN, and SLAQTR +*> +*> SBL (Balancing a general matrix) +*> Test SGEBAL +*> +*> SBK (Back transformation on a balanced matrix) +*> Test SGEBAK +*> +*> SGL (Balancing a matrix pair) +*> Test SGGBAL +*> +*> SGK (Back transformation on a matrix pair) +*> Test SGGBAK +*> +*> GLM (Generalized Linear Regression Model): +*> Tests SGGGLM +*> +*> GQR (Generalized QR and RQ factorizations): +*> Tests SGGQRF and SGGRQF +*> +*> GSV (Generalized Singular Value Decomposition): +*> Tests SGGSVD, SGGSVP, STGSJA, SLAGS2, SLAPLL, and SLAPMT +*> +*> CSD (CS decomposition): +*> Tests SORCSD +*> +*> LSE (Constrained Linear Least Squares): +*> Tests SGGLSE +*> +*> Each test path has a different set of inputs, but the data sets for +*> the driver routines xEV, xES, xVX, and xSX can be concatenated in a +*> single input file. The first line of input should contain one of the +*> 3-character path names in columns 1-3. The number of remaining lines +*> depends on what is found on the first line. +*> +*> The number of matrix types used in testing is often controllable from +*> the input file. The number of matrix types for each path, and the +*> test routine that describes them, is as follows: +*> +*> Path name(s) Types Test routine +*> +*> SHS or NEP 21 SCHKHS +*> SST or SEP 21 SCHKST (routines) +*> 18 SDRVST (drivers) +*> SBD or SVD 16 SCHKBD (routines) +*> 5 SDRVBD (drivers) +*> SEV 21 SDRVEV +*> SES 21 SDRVES +*> SVX 21 SDRVVX +*> SSX 21 SDRVSX +*> SGG 26 SCHKGG (routines) +*> SGS 26 SDRGES +*> SGX 5 SDRGSX +*> SGV 26 SDRGEV +*> SXV 2 SDRGVX +*> SSG 21 SDRVSG +*> SSB 15 SCHKSB +*> SBB 15 SCHKBB +*> SEC - SCHKEC +*> SBL - SCHKBL +*> SBK - SCHKBK +*> SGL - SCHKGL +*> SGK - SCHKGK +*> GLM 8 SCKGLM +*> GQR 8 SCKGQR +*> GSV 8 SCKGSV +*> CSD 3 SCKCSD +*> LSE 8 SCKLSE +*> +*>----------------------------------------------------------------------- +*> +*> NEP input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NPARMS, INTEGER +*> Number of values of the parameters NB, NBMIN, NX, NS, and +*> MAXB. +*> +*> line 5: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 6: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for the minimum blocksize NBMIN. +*> +*> line 7: NXVAL, INTEGER array, dimension (NPARMS) +*> The values for the crossover point NX. +*> +*> line 8: INMIN, INTEGER array, dimension (NPARMS) +*> LAHQR vs TTQRE crossover point, >= 11 +*> +*> line 9: INWIN, INTEGER array, dimension (NPARMS) +*> recommended deflation window size +*> +*> line 10: INIBL, INTEGER array, dimension (NPARMS) +*> nibble crossover point +*> +*> line 11: ISHFTS, INTEGER array, dimension (NPARMS) +*> number of simultaneous shifts) +*> +*> line 12: IACC22, INTEGER array, dimension (NPARMS) +*> select structured matrix multiply: 0, 1 or 2) +*> +*> line 13: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. To have all of the test +*> ratios printed, use THRESH = 0.0 . +*> +*> line 14: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 14 was 2: +*> +*> line 15: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 15-EOF: The remaining lines occur in sets of 1 or 2 and allow +*> the user to specify the matrix types. Each line contains +*> a 3-character path name in columns 1-3, and the number +*> of matrix types must be the first nonblank item in columns +*> 4-80. If the number of matrix types is at least 1 but is +*> less than the maximum number of possible types, a second +*> line will be read to get the numbers of the matrix types to +*> be used. For example, +*> NEP 21 +*> requests all of the matrix types for the nonsymmetric +*> eigenvalue problem, while +*> NEP 4 +*> 9 10 11 12 +*> requests only matrices of type 9, 10, 11, and 12. +*> +*> The valid 3-character path names are 'NEP' or 'SHS' for the +*> nonsymmetric eigenvalue routines. +*> +*>----------------------------------------------------------------------- +*> +*> SEP or SSG input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NPARMS, INTEGER +*> Number of values of the parameters NB, NBMIN, and NX. +*> +*> line 5: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 6: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for the minimum blocksize NBMIN. +*> +*> line 7: NXVAL, INTEGER array, dimension (NPARMS) +*> The values for the crossover point NX. +*> +*> line 8: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 9: TSTCHK, LOGICAL +*> Flag indicating whether or not to test the LAPACK routines. +*> +*> line 10: TSTDRV, LOGICAL +*> Flag indicating whether or not to test the driver routines. +*> +*> line 11: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 12: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 12 was 2: +*> +*> line 13: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 13-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path names are 'SEP' or 'SST' for the +*> symmetric eigenvalue routines and driver routines, and +*> 'SSG' for the routines for the symmetric generalized +*> eigenvalue problem. +*> +*>----------------------------------------------------------------------- +*> +*> SVD input file: +*> +*> line 2: NN, INTEGER +*> Number of values of M and N. +*> +*> line 3: MVAL, INTEGER array, dimension (NN) +*> The values for the matrix row dimension M. +*> +*> line 4: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix column dimension N. +*> +*> line 5: NPARMS, INTEGER +*> Number of values of the parameter NB, NBMIN, NX, and NRHS. +*> +*> line 6: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 7: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for the minimum blocksize NBMIN. +*> +*> line 8: NXVAL, INTEGER array, dimension (NPARMS) +*> The values for the crossover point NX. +*> +*> line 9: NSVAL, INTEGER array, dimension (NPARMS) +*> The values for the number of right hand sides NRHS. +*> +*> line 10: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 11: TSTCHK, LOGICAL +*> Flag indicating whether or not to test the LAPACK routines. +*> +*> line 12: TSTDRV, LOGICAL +*> Flag indicating whether or not to test the driver routines. +*> +*> line 13: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 14: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 14 was 2: +*> +*> line 15: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 15-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path names are 'SVD' or 'SBD' for both the +*> SVD routines and the SVD driver routines. +*> +*>----------------------------------------------------------------------- +*> +*> SEV and SES data files: +*> +*> line 1: 'SEV' or 'SES' in columns 1 to 3. +*> +*> line 2: NSIZES, INTEGER +*> Number of sizes of matrices to use. Should be at least 0 +*> and at most 20. If NSIZES = 0, no testing is done +*> (although the remaining 3 lines are still read). +*> +*> line 3: NN, INTEGER array, dimension(NSIZES) +*> Dimensions of matrices to be tested. +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHSEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 5: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> If it is 0., all test case data will be printed. +*> +*> line 6: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits. +*> +*> line 7: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 7 was 2: +*> +*> line 8: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9 and following: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'SEV' to test SGEEV, or +*> 'SES' to test SGEES. +*> +*>----------------------------------------------------------------------- +*> +*> The SVX data has two parts. The first part is identical to SEV, +*> and the second part consists of test matrices with precomputed +*> solutions. +*> +*> line 1: 'SVX' in columns 1-3. +*> +*> line 2: NSIZES, INTEGER +*> If NSIZES = 0, no testing of randomly generated examples +*> is done, but any precomputed examples are tested. +*> +*> line 3: NN, INTEGER array, dimension(NSIZES) +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> +*> line 5: THRESH, REAL +*> +*> line 6: TSTERR, LOGICAL +*> +*> line 7: NEWSD, INTEGER +*> +*> If line 7 was 2: +*> +*> line 8: INTEGER array, dimension (4) +*> +*> lines 9 and following: The first line contains 'SVX' in columns 1-3 +*> followed by the number of matrix types, possibly with +*> a second line to specify certain matrix types. +*> If the number of matrix types = 0, no testing of randomly +*> generated examples is done, but any precomputed examples +*> are tested. +*> +*> remaining lines : Each matrix is stored on 1+2*N lines, where N is +*> its dimension. The first line contains the dimension (a +*> single integer). The next N lines contain the matrix, one +*> row per line. The last N lines correspond to each +*> eigenvalue. Each of these last N lines contains 4 real +*> values: the real part of the eigenvalue, the imaginary +*> part of the eigenvalue, the reciprocal condition number of +*> the eigenvalues, and the reciprocal condition number of the +*> eigenvector. The end of data is indicated by dimension N=0. +*> Even if no data is to be tested, there must be at least one +*> line containing N=0. +*> +*>----------------------------------------------------------------------- +*> +*> The SSX data is like SVX. The first part is identical to SEV, and the +*> second part consists of test matrices with precomputed solutions. +*> +*> line 1: 'SSX' in columns 1-3. +*> +*> line 2: NSIZES, INTEGER +*> If NSIZES = 0, no testing of randomly generated examples +*> is done, but any precomputed examples are tested. +*> +*> line 3: NN, INTEGER array, dimension(NSIZES) +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> +*> line 5: THRESH, REAL +*> +*> line 6: TSTERR, LOGICAL +*> +*> line 7: NEWSD, INTEGER +*> +*> If line 7 was 2: +*> +*> line 8: INTEGER array, dimension (4) +*> +*> lines 9 and following: The first line contains 'SSX' in columns 1-3 +*> followed by the number of matrix types, possibly with +*> a second line to specify certain matrix types. +*> If the number of matrix types = 0, no testing of randomly +*> generated examples is done, but any precomputed examples +*> are tested. +*> +*> remaining lines : Each matrix is stored on 3+N lines, where N is its +*> dimension. The first line contains the dimension N and the +*> dimension M of an invariant subspace. The second line +*> contains M integers, identifying the eigenvalues in the +*> invariant subspace (by their position in a list of +*> eigenvalues ordered by increasing real part). The next N +*> lines contain the matrix. The last line contains the +*> reciprocal condition number for the average of the selected +*> eigenvalues, and the reciprocal condition number for the +*> corresponding right invariant subspace. The end of data is +*> indicated by a line containing N=0 and M=0. Even if no data +*> is to be tested, there must be at least one line containing +*> N=0 and M=0. +*> +*>----------------------------------------------------------------------- +*> +*> SGG input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NPARMS, INTEGER +*> Number of values of the parameters NB, NBMIN, NS, MAXB, and +*> NBCOL. +*> +*> line 5: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 6: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for NBMIN, the minimum row dimension for blocks. +*> +*> line 7: NSVAL, INTEGER array, dimension (NPARMS) +*> The values for the number of shifts. +*> +*> line 8: MXBVAL, INTEGER array, dimension (NPARMS) +*> The values for MAXB, used in determining minimum blocksize. +*> +*> line 9: IACC22, INTEGER array, dimension (NPARMS) +*> select structured matrix multiply: 1 or 2) +*> +*> line 10: NBCOL, INTEGER array, dimension (NPARMS) +*> The values for NBCOL, the minimum column dimension for +*> blocks. +*> +*> line 11: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 12: TSTCHK, LOGICAL +*> Flag indicating whether or not to test the LAPACK routines. +*> +*> line 13: TSTDRV, LOGICAL +*> Flag indicating whether or not to test the driver routines. +*> +*> line 14: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 15: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 15 was 2: +*> +*> line 16: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 17-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'SGG' for the generalized +*> eigenvalue problem routines and driver routines. +*> +*>----------------------------------------------------------------------- +*> +*> SGS and SGV input files: +*> +*> line 1: 'SGS' or 'SGV' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension(NN) +*> Dimensions of matrices to be tested. +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHGEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 5: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> If it is 0., all test case data will be printed. +*> +*> line 6: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits. +*> +*> line 7: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 17 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 7-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'SGS' for the generalized +*> eigenvalue problem routines and driver routines. +*> +*>----------------------------------------------------------------------- +*> +*> SXV input files: +*> +*> line 1: 'SXV' in columns 1 to 3. +*> +*> line 2: N, INTEGER +*> Value of N. +*> +*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHGEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 4: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> Information will be printed about each test for which the +*> test ratio is greater than or equal to the threshold. +*> +*> line 5: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 6: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 6 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> If line 2 was 0: +*> +*> line 7-EOF: Precomputed examples are tested. +*> +*> remaining lines : Each example is stored on 3+2*N lines, where N is +*> its dimension. The first line contains the dimension (a +*> single integer). The next N lines contain the matrix A, one +*> row per line. The next N lines contain the matrix B. The +*> next line contains the reciprocals of the eigenvalue +*> condition numbers. The last line contains the reciprocals of +*> the eigenvector condition numbers. The end of data is +*> indicated by dimension N=0. Even if no data is to be tested, +*> there must be at least one line containing N=0. +*> +*>----------------------------------------------------------------------- +*> +*> SGX input files: +*> +*> line 1: 'SGX' in columns 1 to 3. +*> +*> line 2: N, INTEGER +*> Value of N. +*> +*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHGEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 4: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> Information will be printed about each test for which the +*> test ratio is greater than or equal to the threshold. +*> +*> line 5: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 6: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 6 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> If line 2 was 0: +*> +*> line 7-EOF: Precomputed examples are tested. +*> +*> remaining lines : Each example is stored on 3+2*N lines, where N is +*> its dimension. The first line contains the dimension (a +*> single integer). The next line contains an integer k such +*> that only the last k eigenvalues will be selected and appear +*> in the leading diagonal blocks of $A$ and $B$. The next N +*> lines contain the matrix A, one row per line. The next N +*> lines contain the matrix B. The last line contains the +*> reciprocal of the eigenvalue cluster condition number and the +*> reciprocal of the deflating subspace (associated with the +*> selected eigencluster) condition number. The end of data is +*> indicated by dimension N=0. Even if no data is to be tested, +*> there must be at least one line containing N=0. +*> +*>----------------------------------------------------------------------- +*> +*> SSB input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NK, INTEGER +*> Number of values of K. +*> +*> line 5: KVAL, INTEGER array, dimension (NK) +*> The values for the matrix dimension K. +*> +*> line 6: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 7 was 2: +*> +*> line 8: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 8-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'SSB'. +*> +*>----------------------------------------------------------------------- +*> +*> SBB input file: +*> +*> line 2: NN, INTEGER +*> Number of values of M and N. +*> +*> line 3: MVAL, INTEGER array, dimension (NN) +*> The values for the matrix row dimension M. +*> +*> line 4: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix column dimension N. +*> +*> line 4: NK, INTEGER +*> Number of values of K. +*> +*> line 5: KVAL, INTEGER array, dimension (NK) +*> The values for the matrix bandwidth K. +*> +*> line 6: NPARMS, INTEGER +*> Number of values of the parameter NRHS +*> +*> line 7: NSVAL, INTEGER array, dimension (NPARMS) +*> The values for the number of right hand sides NRHS. +*> +*> line 8: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 9: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 9 was 2: +*> +*> line 10: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 10-EOF: Lines specifying matrix types, as for SVD. +*> The 3-character path name is 'SBB'. +*> +*>----------------------------------------------------------------------- +*> +*> SEC input file: +*> +*> line 2: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> lines 3-EOF: +*> +*> Input for testing the eigencondition routines consists of a set of +*> specially constructed test cases and their solutions. The data +*> format is not intended to be modified by the user. +*> +*>----------------------------------------------------------------------- +*> +*> SBL and SBK input files: +*> +*> line 1: 'SBL' in columns 1-3 to test SGEBAL, or 'SBK' in +*> columns 1-3 to test SGEBAK. +*> +*> The remaining lines consist of specially constructed test cases. +*> +*>----------------------------------------------------------------------- +*> +*> SGL and SGK input files: +*> +*> line 1: 'SGL' in columns 1-3 to test SGGBAL, or 'SGK' in +*> columns 1-3 to test SGGBAK. +*> +*> The remaining lines consist of specially constructed test cases. +*> +*>----------------------------------------------------------------------- +*> +*> GLM data file: +*> +*> line 1: 'GLM' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M (row dimension). +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P (row dimension). +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N (column dimension), note M <= N <= M+P. +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GLM' for the generalized +*> linear regression model routines. +*> +*>----------------------------------------------------------------------- +*> +*> GQR data file: +*> +*> line 1: 'GQR' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M. +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P. +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N. +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GQR' for the generalized +*> QR and RQ routines. +*> +*>----------------------------------------------------------------------- +*> +*> GSV data file: +*> +*> line 1: 'GSV' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M (row dimension). +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P (row dimension). +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N (column dimension). +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GSV' for the generalized +*> SVD routines. +*> +*>----------------------------------------------------------------------- +*> +*> CSD data file: +*> +*> line 1: 'CSD' in columns 1 to 3. +*> +*> line 2: NM, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NM) +*> Values of M (row and column dimension of orthogonal matrix). +*> +*> line 4: PVAL, INTEGER array, dimension(NM) +*> Values of P (row dimension of top-left block). +*> +*> line 5: NVAL, INTEGER array, dimension(NM) +*> Values of N (column dimension of top-left block). +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'CSD' for the CSD routine. +*> +*>----------------------------------------------------------------------- +*> +*> LSE data file: +*> +*> line 1: 'LSE' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M. +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P. +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N, note P <= N <= P+M. +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GSV' for the generalized +*> SVD routines. +*> +*>----------------------------------------------------------------------- +*> +*> NMAX is currently set to 132 and must be at least 12 for some of the +*> precomputed examples, and LWORK = NMAX*(5*NMAX+5)+1 in the parameter +*> statements below. For SVD, we assume NRHS may be as big as N. The +*> parameter NEED is set to 14 to allow for 14 N-by-N matrices for SGG. +*> \endverbatim +* +* Arguments: +* ========== +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup single_eig +* +* ===================================================================== + PROGRAM SCHKEE +* +#if defined(_OPENMP) + use omp_lib +#endif +* +* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NMAX + PARAMETER ( NMAX = 132 ) + INTEGER NCMAX + PARAMETER ( NCMAX = 20 ) + INTEGER NEED + PARAMETER ( NEED = 14 ) + INTEGER LWORK + PARAMETER ( LWORK = NMAX*( 5*NMAX+5 )+1 ) + INTEGER LIWORK + PARAMETER ( LIWORK = NMAX*( 5*NMAX+20 ) ) + INTEGER MAXIN + PARAMETER ( MAXIN = 20 ) + INTEGER MAXT + PARAMETER ( MAXT = 30 ) + INTEGER NIN, NOUT + PARAMETER ( NIN = 5, NOUT = 6 ) +* .. +* .. Local Scalars .. + LOGICAL CSD, FATAL, GLM, GQR, GSV, LSE, NEP, SBB, SBK, + $ SBL, SEP, SES, SEV, SGG, SGK, SGL, SGS, SGV, + $ SGX, SSB, SSX, SVD, SVX, SXV, TSTCHK, TSTDIF, + $ TSTDRV, TSTERR + CHARACTER C1 + CHARACTER*3 C3, PATH + CHARACTER*32 VNAME + CHARACTER*10 INTSTR + CHARACTER*80 LINE + INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD, + $ NK, NN, NPARMS, NRHS, NTYPES, + $ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS + REAL EPS, S1, S2, THRESH, THRSHN +* .. +* .. Local Arrays .. + LOGICAL DOTYPE( MAXT ), LOGWRK( NMAX ) + INTEGER IOLDSD( 4 ), ISEED( 4 ), IWORK( LIWORK ), + $ KVAL( MAXIN ), MVAL( MAXIN ), MXBVAL( MAXIN ), + $ NBCOL( MAXIN ), NBMIN( MAXIN ), NBVAL( MAXIN ), + $ NSVAL( MAXIN ), NVAL( MAXIN ), NXVAL( MAXIN ), + $ PVAL( MAXIN ) + INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ), + $ ISHFTS( MAXIN ), IACC22( MAXIN ) + REAL D( NMAX, 12 ), RESULT( 500 ), TAUA( NMAX ), + $ TAUB( NMAX ), X( 5*NMAX ) +* .. +* .. Allocatable Arrays .. + INTEGER AllocateStatus + REAL, DIMENSION(:), ALLOCATABLE :: WORK + REAL, DIMENSION(:,:), ALLOCATABLE :: A, B, C +* .. +* .. External Functions .. + LOGICAL LSAMEN + REAL SECOND, SLAMCH + EXTERNAL LSAMEN, SECOND, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL ALAREQ, SCHKBB, SCHKBD, SCHKBK, SCHKBL, SCHKEC, + $ SCHKGG, SCHKGK, SCHKGL, SCHKHS, SCHKSB, SCHKST, + $ SCKCSD, SCKGLM, SCKGQR, SCKGSV, SCKLSE, SDRGES, + $ SDRGEV, SDRGSX, SDRGVX, SDRVBD, SDRVES, SDRVEV, + $ SDRVSG, SDRVST, SDRVSX, SDRVVX, SERRBD, + $ SERRED, SERRGG, SERRHS, SERRST, ILAVER, XLAENV, + $ SDRGES3, SDRGEV3, + $ SCHKST2STG, SDRVST2STG, SCHKSB2STG, SDRVSG2STG +* .. +* .. Intrinsic Functions .. + INTRINSIC LEN, MIN +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, MAXB, NPROC, NSHIFT, NUNIT, SELDIM, + $ SELOPT +* .. +* .. Arrays in Common .. + LOGICAL SELVAL( 20 ) + INTEGER IPARMS( 100 ) + REAL SELWI( 20 ), SELWR( 20 ) +* .. +* .. Common blocks .. + COMMON / CENVIR / NPROC, NSHIFT, MAXB + COMMON / CLAENV / IPARMS + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT + COMMON / SSLCT / SELOPT, SELDIM, SELVAL, SELWR, SELWI +* .. +* .. Data statements .. + DATA INTSTR / '0123456789' / + DATA IOLDSD / 0, 0, 0, 1 / +* .. +* .. Allocate memory dynamically .. +* + ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( WORK(LWORK), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" +* .. +* .. Executable Statements .. +* + A = 0.0 + B = 0.0 + C = 0.0 + D = 0.0 + S1 = SECOND( ) + FATAL = .FALSE. + NUNIT = NOUT +* +* Return to here to read multiple sets of data +* + 10 CONTINUE +* +* Read the first line and set the 3-character test path +* + READ( NIN, FMT = '(A80)', END = 380 )LINE + PATH = LINE( 1: 3 ) + NEP = LSAMEN( 3, PATH, 'NEP' ) .OR. LSAMEN( 3, PATH, 'SHS' ) + SEP = LSAMEN( 3, PATH, 'SEP' ) .OR. LSAMEN( 3, PATH, 'SST' ) .OR. + $ LSAMEN( 3, PATH, 'SSG' ) .OR. LSAMEN( 3, PATH, 'SE2' ) + SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'DBD' ) + SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'SBD' ) + SEV = LSAMEN( 3, PATH, 'SEV' ) + SES = LSAMEN( 3, PATH, 'SES' ) + SVX = LSAMEN( 3, PATH, 'SVX' ) + SSX = LSAMEN( 3, PATH, 'SSX' ) + SGG = LSAMEN( 3, PATH, 'SGG' ) + SGS = LSAMEN( 3, PATH, 'SGS' ) + SGX = LSAMEN( 3, PATH, 'SGX' ) + SGV = LSAMEN( 3, PATH, 'SGV' ) + SXV = LSAMEN( 3, PATH, 'SXV' ) + SSB = LSAMEN( 3, PATH, 'SSB' ) + SBB = LSAMEN( 3, PATH, 'SBB' ) + GLM = LSAMEN( 3, PATH, 'GLM' ) + GQR = LSAMEN( 3, PATH, 'GQR' ) .OR. LSAMEN( 3, PATH, 'GRQ' ) + GSV = LSAMEN( 3, PATH, 'GSV' ) + CSD = LSAMEN( 3, PATH, 'CSD' ) + LSE = LSAMEN( 3, PATH, 'LSE' ) + SBL = LSAMEN( 3, PATH, 'SBL' ) + SBK = LSAMEN( 3, PATH, 'SBK' ) + SGL = LSAMEN( 3, PATH, 'SGL' ) + SGK = LSAMEN( 3, PATH, 'SGK' ) +* +* Report values of parameters. +* + IF( PATH.EQ.' ' ) THEN + GO TO 10 + ELSE IF( NEP ) THEN + WRITE( NOUT, FMT = 9987 ) + ELSE IF( SEP ) THEN + WRITE( NOUT, FMT = 9986 ) + ELSE IF( SVD ) THEN + WRITE( NOUT, FMT = 9985 ) + ELSE IF( SEV ) THEN + WRITE( NOUT, FMT = 9979 ) + ELSE IF( SES ) THEN + WRITE( NOUT, FMT = 9978 ) + ELSE IF( SVX ) THEN + WRITE( NOUT, FMT = 9977 ) + ELSE IF( SSX ) THEN + WRITE( NOUT, FMT = 9976 ) + ELSE IF( SGG ) THEN + WRITE( NOUT, FMT = 9975 ) + ELSE IF( SGS ) THEN + WRITE( NOUT, FMT = 9964 ) + ELSE IF( SGX ) THEN + WRITE( NOUT, FMT = 9965 ) + ELSE IF( SGV ) THEN + WRITE( NOUT, FMT = 9963 ) + ELSE IF( SXV ) THEN + WRITE( NOUT, FMT = 9962 ) + ELSE IF( SSB ) THEN + WRITE( NOUT, FMT = 9974 ) + ELSE IF( SBB ) THEN + WRITE( NOUT, FMT = 9967 ) + ELSE IF( GLM ) THEN + WRITE( NOUT, FMT = 9971 ) + ELSE IF( GQR ) THEN + WRITE( NOUT, FMT = 9970 ) + ELSE IF( GSV ) THEN + WRITE( NOUT, FMT = 9969 ) + ELSE IF( CSD ) THEN + WRITE( NOUT, FMT = 9960 ) + ELSE IF( LSE ) THEN + WRITE( NOUT, FMT = 9968 ) + ELSE IF( SBL ) THEN +* +* SGEBAL: Balancing +* + CALL SCHKBL( NIN, NOUT ) + GO TO 10 + ELSE IF( SBK ) THEN +* +* SGEBAK: Back transformation +* + CALL SCHKBK( NIN, NOUT ) + GO TO 10 + ELSE IF( SGL ) THEN +* +* SGGBAL: Balancing +* + CALL SCHKGL( NIN, NOUT ) + GO TO 10 + ELSE IF( SGK ) THEN +* +* SGGBAK: Back transformation +* + CALL SCHKGK( NIN, NOUT ) + GO TO 10 + ELSE IF( LSAMEN( 3, PATH, 'SEC' ) ) THEN +* +* SEC: Eigencondition estimation +* + READ( NIN, FMT = * )THRESH + CALL XLAENV( 1, 1 ) + CALL XLAENV( 12, 11 ) + CALL XLAENV( 13, 2 ) + CALL XLAENV( 14, 0 ) + CALL XLAENV( 15, 2 ) + CALL XLAENV( 16, 2 ) + TSTERR = .TRUE. + CALL SCHKEC( THRESH, TSTERR, NIN, NOUT ) + GO TO 10 + ELSE + WRITE( NOUT, FMT = 9992 )PATH + GO TO 10 + END IF + CALL ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) + WRITE( NOUT, FMT = 9972 ) VERS_MAJOR, VERS_MINOR, VERS_PATCH + WRITE( NOUT, FMT = 9984 ) +* +* Read the number of values of M, P, and N. +* + READ( NIN, FMT = * )NN + IF( NN.LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NN ', NN, 1 + NN = 0 + FATAL = .TRUE. + ELSE IF( NN.GT.MAXIN ) THEN + WRITE( NOUT, FMT = 9988 )' NN ', NN, MAXIN + NN = 0 + FATAL = .TRUE. + END IF +* +* Read the values of M +* + IF( .NOT.( SGX .OR. SXV ) ) THEN + READ( NIN, FMT = * )( MVAL( I ), I = 1, NN ) + IF( SVD ) THEN + VNAME = ' M ' + ELSE + VNAME = ' N ' + END IF + DO 20 I = 1, NN + IF( MVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )VNAME, MVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( MVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )VNAME, MVAL( I ), NMAX + FATAL = .TRUE. + END IF + 20 CONTINUE + WRITE( NOUT, FMT = 9983 )'M: ', ( MVAL( I ), I = 1, NN ) + END IF +* +* Read the values of P +* + IF( GLM .OR. GQR .OR. GSV .OR. CSD .OR. LSE ) THEN + READ( NIN, FMT = * )( PVAL( I ), I = 1, NN ) + DO 30 I = 1, NN + IF( PVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' P ', PVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( PVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' P ', PVAL( I ), NMAX + FATAL = .TRUE. + END IF + 30 CONTINUE + WRITE( NOUT, FMT = 9983 )'P: ', ( PVAL( I ), I = 1, NN ) + END IF +* +* Read the values of N +* + IF( SVD .OR. SBB .OR. GLM .OR. GQR .OR. GSV .OR. CSD .OR. + $ LSE ) THEN + READ( NIN, FMT = * )( NVAL( I ), I = 1, NN ) + DO 40 I = 1, NN + IF( NVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' N ', NVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' N ', NVAL( I ), NMAX + FATAL = .TRUE. + END IF + 40 CONTINUE + ELSE + DO 50 I = 1, NN + NVAL( I ) = MVAL( I ) + 50 CONTINUE + END IF + IF( .NOT.( SGX .OR. SXV ) ) THEN + WRITE( NOUT, FMT = 9983 )'N: ', ( NVAL( I ), I = 1, NN ) + ELSE + WRITE( NOUT, FMT = 9983 )'N: ', NN + END IF +* +* Read the number of values of K, followed by the values of K +* + IF( SSB .OR. SBB ) THEN + READ( NIN, FMT = * )NK + READ( NIN, FMT = * )( KVAL( I ), I = 1, NK ) + DO 60 I = 1, NK + IF( KVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' K ', KVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( KVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' K ', KVAL( I ), NMAX + FATAL = .TRUE. + END IF + 60 CONTINUE + WRITE( NOUT, FMT = 9983 )'K: ', ( KVAL( I ), I = 1, NK ) + END IF +* + IF( SEV .OR. SES .OR. SVX .OR. SSX ) THEN +* +* For the nonsymmetric QR driver routines, only one set of +* parameters is allowed. +* + READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), + $ INMIN( 1 ), INWIN( 1 ), INIBL(1), ISHFTS(1), IACC22(1) + IF( NBVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NBMIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NXVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( INMIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( INWIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( INIBL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( ISHFTS( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( 1 ), 1 + FATAL = .TRUE. + ELSE IF( IACC22( 1 ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( 1 ), 0 + FATAL = .TRUE. + END IF + CALL XLAENV( 1, NBVAL( 1 ) ) + CALL XLAENV( 2, NBMIN( 1 ) ) + CALL XLAENV( 3, NXVAL( 1 ) ) + CALL XLAENV(12, MAX( 11, INMIN( 1 ) ) ) + CALL XLAENV(13, INWIN( 1 ) ) + CALL XLAENV(14, INIBL( 1 ) ) + CALL XLAENV(15, ISHFTS( 1 ) ) + CALL XLAENV(16, IACC22( 1 ) ) + WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) + WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'INMIN: ', INMIN( 1 ) + WRITE( NOUT, FMT = 9983 )'INWIN: ', INWIN( 1 ) + WRITE( NOUT, FMT = 9983 )'INIBL: ', INIBL( 1 ) + WRITE( NOUT, FMT = 9983 )'ISHFTS: ', ISHFTS( 1 ) + WRITE( NOUT, FMT = 9983 )'IACC22: ', IACC22( 1 ) +* + ELSE IF( SGS .OR. SGX .OR. SGV .OR. SXV ) THEN +* +* For the nonsymmetric generalized driver routines, only one set +* of parameters is allowed. +* + READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), + $ NSVAL( 1 ), MXBVAL( 1 ) + IF( NBVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NBMIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NXVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NSVAL( 1 ).LT.2 ) THEN + WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( 1 ), 2 + FATAL = .TRUE. + ELSE IF( MXBVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( 1 ), 1 + FATAL = .TRUE. + END IF + CALL XLAENV( 1, NBVAL( 1 ) ) + CALL XLAENV( 2, NBMIN( 1 ) ) + CALL XLAENV( 3, NXVAL( 1 ) ) + CALL XLAENV( 4, NSVAL( 1 ) ) + CALL XLAENV( 8, MXBVAL( 1 ) ) + WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) + WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'NS: ', NSVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'MAXB: ', MXBVAL( 1 ) +* + ELSE IF( .NOT.SSB .AND. .NOT.GLM .AND. .NOT.GQR .AND. .NOT. + $ GSV .AND. .NOT.CSD .AND. .NOT.LSE ) THEN +* +* For the other paths, the number of parameters can be varied +* from the input file. Read the number of parameter values. +* + READ( NIN, FMT = * )NPARMS + IF( NPARMS.LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )'NPARMS', NPARMS, 1 + NPARMS = 0 + FATAL = .TRUE. + ELSE IF( NPARMS.GT.MAXIN ) THEN + WRITE( NOUT, FMT = 9988 )'NPARMS', NPARMS, MAXIN + NPARMS = 0 + FATAL = .TRUE. + END IF +* +* Read the values of NB +* + IF( .NOT.SBB ) THEN + READ( NIN, FMT = * )( NBVAL( I ), I = 1, NPARMS ) + DO 70 I = 1, NPARMS + IF( NBVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NBVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' NB ', NBVAL( I ), NMAX + FATAL = .TRUE. + END IF + 70 CONTINUE + WRITE( NOUT, FMT = 9983 )'NB: ', + $ ( NBVAL( I ), I = 1, NPARMS ) + END IF +* +* Read the values of NBMIN +* + IF( NEP .OR. SEP .OR. SVD .OR. SGG ) THEN + READ( NIN, FMT = * )( NBMIN( I ), I = 1, NPARMS ) + DO 80 I = 1, NPARMS + IF( NBMIN( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( I ), 0 + FATAL = .TRUE. + ELSE IF( NBMIN( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )'NBMIN ', NBMIN( I ), NMAX + FATAL = .TRUE. + END IF + 80 CONTINUE + WRITE( NOUT, FMT = 9983 )'NBMIN:', + $ ( NBMIN( I ), I = 1, NPARMS ) + ELSE + DO 90 I = 1, NPARMS + NBMIN( I ) = 1 + 90 CONTINUE + END IF +* +* Read the values of NX +* + IF( NEP .OR. SEP .OR. SVD ) THEN + READ( NIN, FMT = * )( NXVAL( I ), I = 1, NPARMS ) + DO 100 I = 1, NPARMS + IF( NXVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NXVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' NX ', NXVAL( I ), NMAX + FATAL = .TRUE. + END IF + 100 CONTINUE + WRITE( NOUT, FMT = 9983 )'NX: ', + $ ( NXVAL( I ), I = 1, NPARMS ) + ELSE + DO 110 I = 1, NPARMS + NXVAL( I ) = 1 + 110 CONTINUE + END IF +* +* Read the values of NSHIFT (if SGG) or NRHS (if SVD +* or SBB). +* + IF( SVD .OR. SBB .OR. SGG ) THEN + READ( NIN, FMT = * )( NSVAL( I ), I = 1, NPARMS ) + DO 120 I = 1, NPARMS + IF( NSVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NSVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' NS ', NSVAL( I ), NMAX + FATAL = .TRUE. + END IF + 120 CONTINUE + WRITE( NOUT, FMT = 9983 )'NS: ', + $ ( NSVAL( I ), I = 1, NPARMS ) + ELSE + DO 130 I = 1, NPARMS + NSVAL( I ) = 1 + 130 CONTINUE + END IF +* +* Read the values for MAXB. +* + IF( SGG ) THEN + READ( NIN, FMT = * )( MXBVAL( I ), I = 1, NPARMS ) + DO 140 I = 1, NPARMS + IF( MXBVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( MXBVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' MAXB ', MXBVAL( I ), NMAX + FATAL = .TRUE. + END IF + 140 CONTINUE + WRITE( NOUT, FMT = 9983 )'MAXB: ', + $ ( MXBVAL( I ), I = 1, NPARMS ) + ELSE + DO 150 I = 1, NPARMS + MXBVAL( I ) = 1 + 150 CONTINUE + END IF +* +* Read the values for INMIN. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( INMIN( I ), I = 1, NPARMS ) + DO 540 I = 1, NPARMS + IF( INMIN( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( I ), 0 + FATAL = .TRUE. + END IF + 540 CONTINUE + WRITE( NOUT, FMT = 9983 )'INMIN: ', + $ ( INMIN( I ), I = 1, NPARMS ) + ELSE + DO 550 I = 1, NPARMS + INMIN( I ) = 1 + 550 CONTINUE + END IF +* +* Read the values for INWIN. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( INWIN( I ), I = 1, NPARMS ) + DO 560 I = 1, NPARMS + IF( INWIN( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( I ), 0 + FATAL = .TRUE. + END IF + 560 CONTINUE + WRITE( NOUT, FMT = 9983 )'INWIN: ', + $ ( INWIN( I ), I = 1, NPARMS ) + ELSE + DO 570 I = 1, NPARMS + INWIN( I ) = 1 + 570 CONTINUE + END IF +* +* Read the values for INIBL. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( INIBL( I ), I = 1, NPARMS ) + DO 580 I = 1, NPARMS + IF( INIBL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( I ), 0 + FATAL = .TRUE. + END IF + 580 CONTINUE + WRITE( NOUT, FMT = 9983 )'INIBL: ', + $ ( INIBL( I ), I = 1, NPARMS ) + ELSE + DO 590 I = 1, NPARMS + INIBL( I ) = 1 + 590 CONTINUE + END IF +* +* Read the values for ISHFTS. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( ISHFTS( I ), I = 1, NPARMS ) + DO 600 I = 1, NPARMS + IF( ISHFTS( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( I ), 0 + FATAL = .TRUE. + END IF + 600 CONTINUE + WRITE( NOUT, FMT = 9983 )'ISHFTS: ', + $ ( ISHFTS( I ), I = 1, NPARMS ) + ELSE + DO 610 I = 1, NPARMS + ISHFTS( I ) = 1 + 610 CONTINUE + END IF +* +* Read the values for IACC22. +* + IF( NEP .OR. SGG ) THEN + READ( NIN, FMT = * )( IACC22( I ), I = 1, NPARMS ) + DO 620 I = 1, NPARMS + IF( IACC22( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( I ), 0 + FATAL = .TRUE. + END IF + 620 CONTINUE + WRITE( NOUT, FMT = 9983 )'IACC22: ', + $ ( IACC22( I ), I = 1, NPARMS ) + ELSE + DO 630 I = 1, NPARMS + IACC22( I ) = 1 + 630 CONTINUE + END IF +* +* Read the values for NBCOL. +* + IF( SGG ) THEN + READ( NIN, FMT = * )( NBCOL( I ), I = 1, NPARMS ) + DO 160 I = 1, NPARMS + IF( NBCOL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )'NBCOL ', NBCOL( I ), 0 + FATAL = .TRUE. + ELSE IF( NBCOL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )'NBCOL ', NBCOL( I ), NMAX + FATAL = .TRUE. + END IF + 160 CONTINUE + WRITE( NOUT, FMT = 9983 )'NBCOL:', + $ ( NBCOL( I ), I = 1, NPARMS ) + ELSE + DO 170 I = 1, NPARMS + NBCOL( I ) = 1 + 170 CONTINUE + END IF + END IF +* +* Calculate and print the machine dependent constants. +* + WRITE( NOUT, FMT = * ) + EPS = SLAMCH( 'Underflow threshold' ) + WRITE( NOUT, FMT = 9981 )'underflow', EPS + EPS = SLAMCH( 'Overflow threshold' ) + WRITE( NOUT, FMT = 9981 )'overflow ', EPS + EPS = SLAMCH( 'Epsilon' ) + WRITE( NOUT, FMT = 9981 )'precision', EPS +* +* Read the threshold value for the test ratios. +* + READ( NIN, FMT = * )THRESH + WRITE( NOUT, FMT = 9982 )THRESH + IF( SEP .OR. SVD .OR. SGG ) THEN +* +* Read the flag that indicates whether to test LAPACK routines. +* + READ( NIN, FMT = * )TSTCHK +* +* Read the flag that indicates whether to test driver routines. +* + READ( NIN, FMT = * )TSTDRV + END IF +* +* Read the flag that indicates whether to test the error exits. +* + READ( NIN, FMT = * )TSTERR +* +* Read the code describing how to set the random number seed. +* + READ( NIN, FMT = * )NEWSD +* +* If NEWSD = 2, read another line with 4 integers for the seed. +* + IF( NEWSD.EQ.2 ) + $ READ( NIN, FMT = * )( IOLDSD( I ), I = 1, 4 ) +* + DO 180 I = 1, 4 + ISEED( I ) = IOLDSD( I ) + 180 CONTINUE +* + IF( FATAL ) THEN + WRITE( NOUT, FMT = 9999 ) + STOP + END IF +* +* Read the input lines indicating the test path and its parameters. +* The first three characters indicate the test path, and the number +* of test matrix types must be the first nonblank item in columns +* 4-80. +* + 190 CONTINUE +* + IF( .NOT.( SGX .OR. SXV ) ) THEN +* + 200 CONTINUE + READ( NIN, FMT = '(A80)', END = 380 )LINE + C3 = LINE( 1: 3 ) + LENP = LEN( LINE ) + I = 3 + ITMP = 0 + I1 = 0 + 210 CONTINUE + I = I + 1 + IF( I.GT.LENP ) THEN + IF( I1.GT.0 ) THEN + GO TO 240 + ELSE + NTYPES = MAXT + GO TO 240 + END IF + END IF + IF( LINE( I: I ).NE.' ' .AND. LINE( I: I ).NE.',' ) THEN + I1 = I + C1 = LINE( I1: I1 ) +* +* Check that a valid integer was read +* + DO 220 K = 1, 10 + IF( C1.EQ.INTSTR( K: K ) ) THEN + IC = K - 1 + GO TO 230 + END IF + 220 CONTINUE + WRITE( NOUT, FMT = 9991 )I, LINE + GO TO 200 + 230 CONTINUE + ITMP = 10*ITMP + IC + GO TO 210 + ELSE IF( I1.GT.0 ) THEN + GO TO 240 + ELSE + GO TO 210 + END IF + 240 CONTINUE + NTYPES = ITMP +* +* Skip the tests if NTYPES is <= 0. +* + IF( .NOT.( SEV .OR. SES .OR. SVX .OR. SSX .OR. SGV .OR. + $ SGS ) .AND. NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + GO TO 200 + END IF +* + ELSE + IF( SXV ) + $ C3 = 'SXV' + IF( SGX ) + $ C3 = 'SGX' + END IF +* +* Reset the random number seed. +* + IF( NEWSD.EQ.0 ) THEN + DO 250 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 250 CONTINUE + END IF +* + IF( LSAMEN( 3, C3, 'SHS' ) .OR. LSAMEN( 3, C3, 'NEP' ) ) THEN +* +* ------------------------------------- +* NEP: Nonsymmetric Eigenvalue Problem +* ------------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* NS = number of shifts +* MAXB = minimum submatrix size +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL SERRHS( 'SHSEQR', NOUT ) + DO 270 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) + CALL XLAENV(12, MAX( 11, INMIN( I ) ) ) + CALL XLAENV(13, INWIN( I ) ) + CALL XLAENV(14, INIBL( I ) ) + CALL XLAENV(15, ISHFTS( I ) ) + CALL XLAENV(16, IACC22( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 260 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 260 CONTINUE + END IF + WRITE( NOUT, FMT = 9961 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ), MAX( 11, INMIN(I)), + $ INWIN( I ), INIBL( I ), ISHFTS( I ), IACC22( I ) + CALL SCHKHS( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 5 ), NMAX, A( 1, 6 ), + $ A( 1, 7 ), D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), + $ D( 1, 4 ), D( 1, 5 ), D( 1, 6 ), A( 1, 8 ), + $ A( 1, 9 ), A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), + $ D( 1, 7 ), WORK, LWORK, IWORK, LOGWRK, RESULT, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SCHKHS', INFO + 270 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'SST' ) .OR. LSAMEN( 3, C3, 'SEP' ) + $ .OR. LSAMEN( 3, C3, 'SE2' ) ) THEN +* +* ---------------------------------- +* SEP: Symmetric Eigenvalue Problem +* ---------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 1, 1 ) + CALL XLAENV( 9, 25 ) + IF( TSTERR ) THEN +#if defined(_OPENMP) + N_THREADS = OMP_GET_NUM_THREADS() + CALL OMP_SET_NUM_THREADS(1) +#endif + CALL SERRST( 'SST', NOUT ) +#if defined(_OPENMP) + CALL OMP_SET_NUM_THREADS(N_THREADS) +#endif + END IF + DO 290 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 280 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 280 CONTINUE + END IF + WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ) + IF( TSTCHK ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL SCHKST2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), + $ D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), D( 1, 9 ), + $ D( 1, 10 ), D( 1, 11 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), D( 1, 12 ), A( 1, 6 ), + $ WORK, LWORK, IWORK, LIWORK, RESULT, INFO ) + ELSE + CALL SCHKST( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), + $ D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), D( 1, 9 ), + $ D( 1, 10 ), D( 1, 11 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), D( 1, 12 ), A( 1, 6 ), + $ WORK, LWORK, IWORK, LIWORK, RESULT, INFO ) + ENDIF + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SCHKST', INFO + END IF + IF( TSTDRV ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL SDRVST2STG( NN, NVAL, 18, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, D( 1, 3 ), D( 1, 4 ), + $ D( 1, 5 ), D( 1, 6 ), D( 1, 8 ), D( 1, 9 ), + $ D( 1, 10 ), D( 1, 11), A( 1, 2 ), NMAX, + $ A( 1, 3 ), D( 1, 12 ), A( 1, 4 ), WORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ELSE + CALL SDRVST( NN, NVAL, 18, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, D( 1, 3 ), D( 1, 4 ), + $ D( 1, 5 ), D( 1, 6 ), D( 1, 8 ), D( 1, 9 ), + $ D( 1, 10 ), D( 1, 11), A( 1, 2 ), NMAX, + $ A( 1, 3 ), D( 1, 12 ), A( 1, 4 ), WORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ENDIF + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SDRVST', INFO + END IF + 290 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'SSG' ) ) THEN +* +* ---------------------------------------------- +* SSG: Symmetric Generalized Eigenvalue Problem +* ---------------------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 9, 25 ) + DO 310 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 300 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 300 CONTINUE + END IF + WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ) + IF( TSTCHK ) THEN +* CALL SDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, +* $ D( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), +* $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, +* $ LWORK, IWORK, LIWORK, RESULT, INFO ) + CALL SDRVSG2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, + $ D( 1, 3 ), D( 1, 3 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), A( 1, 6 ), + $ A( 1, 7 ), WORK, LWORK, IWORK, LIWORK, + $ RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SDRVSG', INFO + END IF + 310 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'SBD' ) .OR. LSAMEN( 3, C3, 'SVD' ) ) THEN +* +* ---------------------------------- +* SVD: Singular Value Decomposition +* ---------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* NRHS = number of right hand sides +* + MAXTYP = 16 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 1, 1 ) + CALL XLAENV( 9, 25 ) +* +* Test the error exits +* + IF( TSTERR .AND. TSTCHK ) + $ CALL SERRBD( 'SBD', NOUT ) + IF( TSTERR .AND. TSTDRV ) + $ CALL SERRED( 'SBD', NOUT ) +* + DO 330 I = 1, NPARMS + NRHS = NSVAL( I ) + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) + IF( NEWSD.EQ.0 ) THEN + DO 320 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 320 CONTINUE + END IF + WRITE( NOUT, FMT = 9995 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ), NRHS + IF( TSTCHK ) THEN + CALL SCHKBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, NRHS, ISEED, + $ THRESH, A( 1, 1 ), NMAX, D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), A( 1, 2 ), + $ NMAX, A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), NMAX, + $ A( 1, 6 ), NMAX, A( 1, 7 ), A( 1, 8 ), WORK, + $ LWORK, IWORK, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SCHKBD', INFO + END IF + IF( TSTDRV ) + $ CALL SDRVBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, ISEED, + $ THRESH, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, + $ A( 1, 3 ), NMAX, A( 1, 4 ), A( 1, 5 ), + $ A( 1, 6 ), D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), + $ WORK, LWORK, IWORK, NOUT, INFO ) + 330 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'SEV' ) ) THEN +* +* -------------------------------------------- +* SEV: Nonsymmetric Eigenvalue Problem Driver +* SGEEV (eigenvalues and eigenvectors) +* -------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL SERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL SDRVEV( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), A( 1, 3 ), + $ NMAX, A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, RESULT, + $ WORK, LWORK, IWORK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SGEEV', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'SES' ) ) THEN +* +* -------------------------------------------- +* SES: Nonsymmetric Eigenvalue Problem Driver +* SGEES (Schur form) +* -------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL SERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL SDRVES( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), + $ A( 1, 4 ), NMAX, RESULT, WORK, LWORK, IWORK, + $ LOGWRK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SGEES', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'SVX' ) ) THEN +* +* -------------------------------------------------------------- +* SVX: Nonsymmetric Eigenvalue Problem Expert Driver +* SGEEVX (eigenvalues, eigenvectors and condition numbers) +* -------------------------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL SERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL SDRVVX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), A( 1, 3 ), + $ NMAX, A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, + $ D( 1, 5 ), D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), + $ D( 1, 9 ), D( 1, 10 ), D( 1, 11 ), D( 1, 12 ), + $ RESULT, WORK, LWORK, IWORK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SGEEVX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'SSX' ) ) THEN +* +* --------------------------------------------------- +* SSX: Nonsymmetric Eigenvalue Problem Expert Driver +* SGEESX (Schur form and condition numbers) +* --------------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL SERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL SDRVSX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), + $ D( 1, 5 ), D( 1, 6 ), A( 1, 4 ), NMAX, + $ A( 1, 5 ), RESULT, WORK, LWORK, IWORK, LOGWRK, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SGEESX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'SGG' ) ) THEN +* +* ------------------------------------------------- +* SGG: Generalized Nonsymmetric Eigenvalue Problem +* ------------------------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NS = number of shifts +* MAXB = minimum submatrix size +* IACC22: structured matrix multiply +* NBCOL = minimum column dimension for blocks +* + MAXTYP = 26 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV(1,1) + IF( TSTCHK .AND. TSTERR ) + & CALL SERRGG( C3, NOUT ) + DO 350 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 4, NSVAL( I ) ) + CALL XLAENV( 8, MXBVAL( I ) ) + CALL XLAENV( 16, IACC22( I ) ) + CALL XLAENV( 5, NBCOL( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 340 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 340 CONTINUE + END IF + WRITE( NOUT, FMT = 9996 )C3, NBVAL( I ), NBMIN( I ), + $ NSVAL( I ), MXBVAL( I ), IACC22( I ), NBCOL( I ) + TSTDIF = .FALSE. + THRSHN = 10. + IF( TSTCHK ) THEN + CALL SCHKGG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ TSTDIF, THRSHN, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), + $ A( 1, 6 ), A( 1, 7 ), A( 1, 8 ), A( 1, 9 ), + $ NMAX, A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), + $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), + $ D( 1, 5 ), D( 1, 6 ), A( 1, 13 ), + $ A( 1, 14 ), WORK, LWORK, LOGWRK, RESULT, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SCHKGG', INFO + END IF + 350 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'SGS' ) ) THEN +* +* ------------------------------------------------- +* SGS: Generalized Nonsymmetric Eigenvalue Problem +* SGGES (Schur form) +* ------------------------------------------------- +* + MAXTYP = 26 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL SERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL SDRGES( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), WORK, LWORK, + $ RESULT, LOGWRK, INFO ) +* + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SDRGES', INFO +* +* Blocked version +* + CALL XLAENV(16,1) + CALL SDRGES3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), WORK, LWORK, + $ RESULT, LOGWRK, INFO ) +* + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SDRGES3', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( SGX ) THEN +* +* ------------------------------------------------- +* SGX: Generalized Nonsymmetric Eigenvalue Problem +* SGGESX (Schur form and condition numbers) +* ------------------------------------------------- +* + MAXTYP = 5 + NTYPES = MAXTYP + IF( NN.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL SERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 5, 2 ) + CALL SDRGSX( NN, NCMAX, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), + $ A( 1, 6 ), D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), + $ C( 1, 1 ), NCMAX*NCMAX, A( 1, 12 ), WORK, + $ LWORK, IWORK, LIWORK, LOGWRK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SDRGSX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'SGV' ) ) THEN +* +* ------------------------------------------------- +* SGV: Generalized Nonsymmetric Eigenvalue Problem +* SGGEV (Eigenvalue/vector form) +* ------------------------------------------------- +* + MAXTYP = 26 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL SERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL SDRGEV( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ A( 1, 9 ), NMAX, D( 1, 1 ), D( 1, 2 ), + $ D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), D( 1, 6 ), + $ WORK, LWORK, RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SDRGEV', INFO +* +* Blocked version +* + CALL SDRGEV3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ A( 1, 9 ), NMAX, D( 1, 1 ), D( 1, 2 ), + $ D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), D( 1, 6 ), + $ WORK, LWORK, RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SDRGEV3', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( SXV ) THEN +* +* ------------------------------------------------- +* SXV: Generalized Nonsymmetric Eigenvalue Problem +* SGGEVX (eigenvalue/vector with condition numbers) +* ------------------------------------------------- +* + MAXTYP = 2 + NTYPES = MAXTYP + IF( NN.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL SERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL SDRGVX( NN, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), A( 1, 5 ), A( 1, 6 ), + $ IWORK( 1 ), IWORK( 2 ), D( 1, 4 ), D( 1, 5 ), + $ D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), D( 1, 9 ), + $ WORK, LWORK, IWORK( 3 ), LIWORK-2, RESULT, + $ LOGWRK, INFO ) +* + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SDRGVX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'SSB' ) ) THEN +* +* ------------------------------ +* SSB: Symmetric Band Reduction +* ------------------------------ +* + MAXTYP = 15 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + IF( TSTERR ) + $ CALL SERRST( 'SSB', NOUT ) +* CALL SCHKSB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, D( 1, 1 ), D( 1, 2 ), +* $ A( 1, 2 ), NMAX, WORK, LWORK, RESULT, INFO ) + CALL SCHKSB2STG( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, + $ THRESH, NOUT, A( 1, 1 ), NMAX, D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), + $ A( 1, 2 ), NMAX, WORK, LWORK, RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SCHKSB', INFO +* + ELSE IF( LSAMEN( 3, C3, 'SBB' ) ) THEN +* +* ------------------------------ +* SBB: General Band Reduction +* ------------------------------ +* + MAXTYP = 15 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + DO 370 I = 1, NPARMS + NRHS = NSVAL( I ) +* + IF( NEWSD.EQ.0 ) THEN + DO 360 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 360 CONTINUE + END IF + WRITE( NOUT, FMT = 9966 )C3, NRHS + CALL SCHKBB( NN, MVAL, NVAL, NK, KVAL, MAXTYP, DOTYPE, NRHS, + $ ISEED, THRESH, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), 2*NMAX, D( 1, 1 ), D( 1, 2 ), + $ A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, A( 1, 6 ), + $ NMAX, A( 1, 7 ), WORK, LWORK, RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SCHKBB', INFO + 370 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'GLM' ) ) THEN +* +* ----------------------------------------- +* GLM: Generalized Linear Regression Model +* ----------------------------------------- +* + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL SERRGG( 'GLM', NOUT ) + CALL SCKGLM( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, + $ WORK, D( 1, 1 ), NIN, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SCKGLM', INFO +* + ELSE IF( LSAMEN( 3, C3, 'GQR' ) ) THEN +* +* ------------------------------------------ +* GQR: Generalized QR and RQ factorizations +* ------------------------------------------ +* + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL SERRGG( 'GQR', NOUT ) + CALL SCKGQR( NN, MVAL, NN, PVAL, NN, NVAL, NTYPES, ISEED, + $ THRESH, NMAX, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), TAUA, B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), + $ B( 1, 4 ), B( 1, 5 ), TAUB, WORK, D( 1, 1 ), NIN, + $ NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SCKGQR', INFO +* + ELSE IF( LSAMEN( 3, C3, 'GSV' ) ) THEN +* +* ---------------------------------------------- +* GSV: Generalized Singular Value Decomposition +* ---------------------------------------------- +* + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL SERRGG( 'GSV', NOUT ) + CALL SCKGSV( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), + $ A( 1, 3 ), B( 1, 3 ), A( 1, 4 ), TAUA, TAUB, + $ B( 1, 4 ), IWORK, WORK, D( 1, 1 ), NIN, NOUT, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SCKGSV', INFO +* + ELSE IF( LSAMEN( 3, C3, 'CSD' ) ) THEN +* +* ---------------------------------------------- +* CSD: CS Decomposition +* ---------------------------------------------- +* + CALL XLAENV(1,1) + IF( TSTERR ) + $ CALL SERRGG( 'CSD', NOUT ) + CALL SCKCSD( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), + $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), IWORK, WORK, + $ D( 1, 1 ), NIN, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SCKCSD', INFO +* + ELSE IF( LSAMEN( 3, C3, 'LSE' ) ) THEN +* +* -------------------------------------- +* LSE: Constrained Linear Least Squares +* -------------------------------------- +* + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL SERRGG( 'LSE', NOUT ) + CALL SCKLSE( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, + $ WORK, D( 1, 1 ), NIN, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'SCKLSE', INFO +* + ELSE + WRITE( NOUT, FMT = * ) + WRITE( NOUT, FMT = * ) + WRITE( NOUT, FMT = 9992 )C3 + END IF + IF( .NOT.( SGX .OR. SXV ) ) + $ GO TO 190 + 380 CONTINUE + WRITE( NOUT, FMT = 9994 ) + S2 = SECOND( ) + WRITE( NOUT, FMT = 9993 )S2 - S1 +* + DEALLOCATE (A, STAT = AllocateStatus) + DEALLOCATE (B, STAT = AllocateStatus) + DEALLOCATE (C, STAT = AllocateStatus) + DEALLOCATE (WORK, STAT = AllocateStatus) +* + 9999 FORMAT( / ' Execution not attempted due to input errors' ) + 9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 ) + 9996 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NS =', I4, + $ ', MAXB =', I4, ', IACC22 =', I4, ', NBCOL =', I4 ) + 9995 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, + $ ', NRHS =', I4 ) + 9994 FORMAT( / / ' End of tests' ) + 9993 FORMAT( ' Total time used = ', F12.2, ' seconds', / ) + 9992 FORMAT( 1X, A3, ': Unrecognized path name' ) + 9991 FORMAT( / / ' *** Invalid integer value in column ', I2, + $ ' of input', ' line:', / A79 ) + 9990 FORMAT( / / 1X, A3, ' routines were not tested' ) + 9989 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be >=', + $ I6 ) + 9988 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be <=', + $ I6 ) + 9987 FORMAT( ' Tests of the Nonsymmetric Eigenvalue Problem routines' ) + 9986 FORMAT( ' Tests of the Symmetric Eigenvalue Problem routines' ) + 9985 FORMAT( ' Tests of the Singular Value Decomposition routines' ) + 9984 FORMAT( / ' The following parameter values will be used:' ) + 9983 FORMAT( 4X, A, 10I6, / 10X, 10I6 ) + 9982 FORMAT( / ' Routines pass computational tests if test ratio is ', + $ 'less than', F8.2, / ) + 9981 FORMAT( ' Relative machine ', A, ' is taken to be', E16.6 ) + 9980 FORMAT( ' *** Error code from ', A, ' = ', I4 ) + 9979 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', + $ / ' SGEEV (eigenvalues and eigevectors)' ) + 9978 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', + $ / ' SGEES (Schur form)' ) + 9977 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', + $ ' Driver', / ' SGEEVX (eigenvalues, eigenvectors and', + $ ' condition numbers)' ) + 9976 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', + $ ' Driver', / ' SGEESX (Schur form and condition', + $ ' numbers)' ) + 9975 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem routines' ) + 9974 FORMAT( ' Tests of SSBTRD', / ' (reduction of a symmetric band ', + $ 'matrix to tridiagonal form)' ) + 9973 FORMAT( / 1X, 71( '-' ) ) + 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I1, '.', I1 ) + 9971 FORMAT( / ' Tests of the Generalized Linear Regression Model ', + $ 'routines' ) + 9970 FORMAT( / ' Tests of the Generalized QR and RQ routines' ) + 9969 FORMAT( / ' Tests of the Generalized Singular Value', + $ ' Decomposition routines' ) + 9968 FORMAT( / ' Tests of the Linear Least Squares routines' ) + 9967 FORMAT( ' Tests of SGBBRD', / ' (reduction of a general band ', + $ 'matrix to real bidiagonal form)' ) + 9966 FORMAT( / / 1X, A3, ': NRHS =', I4 ) + 9965 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Expert Driver SGGESX' ) + 9964 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Driver SGGES' ) + 9963 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Driver SGGEV' ) + 9962 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Expert Driver SGGEVX' ) + 9961 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, + $ ', INMIN=', I4, + $ ', INWIN =', I4, ', INIBL =', I4, ', ISHFTS =', I4, + $ ', IACC22 =', I4) + 9960 FORMAT( / ' Tests of the CS Decomposition routines' ) +* +* End of SCHKEE +* + END From 9b7b1da133a6c9c6d77d36dc37247044551ccd75 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 28 Feb 2021 18:50:26 +0100 Subject: [PATCH 111/134] Add rewritten dchkee.F from Reference-LAPACK PR335 --- lapack-netlib/TESTING/EIG/dchkee.F | 2538 ++++++++++++++++++++++++++++ 1 file changed, 2538 insertions(+) create mode 100644 lapack-netlib/TESTING/EIG/dchkee.F diff --git a/lapack-netlib/TESTING/EIG/dchkee.F b/lapack-netlib/TESTING/EIG/dchkee.F new file mode 100644 index 000000000..ee22ce33d --- /dev/null +++ b/lapack-netlib/TESTING/EIG/dchkee.F @@ -0,0 +1,2538 @@ +*> \brief \b DCHKEE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM DCHKEE +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DCHKEE tests the DOUBLE PRECISION LAPACK subroutines for the matrix +*> eigenvalue problem. The test paths in this version are +*> +*> NEP (Nonsymmetric Eigenvalue Problem): +*> Test DGEHRD, DORGHR, DHSEQR, DTREVC, DHSEIN, and DORMHR +*> +*> SEP (Symmetric Eigenvalue Problem): +*> Test DSYTRD, DORGTR, DSTEQR, DSTERF, DSTEIN, DSTEDC, +*> and drivers DSYEV(X), DSBEV(X), DSPEV(X), DSTEV(X), +*> DSYEVD, DSBEVD, DSPEVD, DSTEVD +*> +*> SVD (Singular Value Decomposition): +*> Test DGEBRD, DORGBR, DBDSQR, DBDSDC +*> and the drivers DGESVD, DGESDD +*> +*> DEV (Nonsymmetric Eigenvalue/eigenvector Driver): +*> Test DGEEV +*> +*> DES (Nonsymmetric Schur form Driver): +*> Test DGEES +*> +*> DVX (Nonsymmetric Eigenvalue/eigenvector Expert Driver): +*> Test DGEEVX +*> +*> DSX (Nonsymmetric Schur form Expert Driver): +*> Test DGEESX +*> +*> DGG (Generalized Nonsymmetric Eigenvalue Problem): +*> Test DGGHD3, DGGBAL, DGGBAK, DHGEQZ, and DTGEVC +*> +*> DGS (Generalized Nonsymmetric Schur form Driver): +*> Test DGGES +*> +*> DGV (Generalized Nonsymmetric Eigenvalue/eigenvector Driver): +*> Test DGGEV +*> +*> DGX (Generalized Nonsymmetric Schur form Expert Driver): +*> Test DGGESX +*> +*> DXV (Generalized Nonsymmetric Eigenvalue/eigenvector Expert Driver): +*> Test DGGEVX +*> +*> DSG (Symmetric Generalized Eigenvalue Problem): +*> Test DSYGST, DSYGV, DSYGVD, DSYGVX, DSPGST, DSPGV, DSPGVD, +*> DSPGVX, DSBGST, DSBGV, DSBGVD, and DSBGVX +*> +*> DSB (Symmetric Band Eigenvalue Problem): +*> Test DSBTRD +*> +*> DBB (Band Singular Value Decomposition): +*> Test DGBBRD +*> +*> DEC (Eigencondition estimation): +*> Test DLALN2, DLASY2, DLAEQU, DLAEXC, DTRSYL, DTREXC, DTRSNA, +*> DTRSEN, and DLAQTR +*> +*> DBL (Balancing a general matrix) +*> Test DGEBAL +*> +*> DBK (Back transformation on a balanced matrix) +*> Test DGEBAK +*> +*> DGL (Balancing a matrix pair) +*> Test DGGBAL +*> +*> DGK (Back transformation on a matrix pair) +*> Test DGGBAK +*> +*> GLM (Generalized Linear Regression Model): +*> Tests DGGGLM +*> +*> GQR (Generalized QR and RQ factorizations): +*> Tests DGGQRF and DGGRQF +*> +*> GSV (Generalized Singular Value Decomposition): +*> Tests DGGSVD, DGGSVP, DTGSJA, DLAGS2, DLAPLL, and DLAPMT +*> +*> CSD (CS decomposition): +*> Tests DORCSD +*> +*> LSE (Constrained Linear Least Squares): +*> Tests DGGLSE +*> +*> Each test path has a different set of inputs, but the data sets for +*> the driver routines xEV, xES, xVX, and xSX can be concatenated in a +*> single input file. The first line of input should contain one of the +*> 3-character path names in columns 1-3. The number of remaining lines +*> depends on what is found on the first line. +*> +*> The number of matrix types used in testing is often controllable from +*> the input file. The number of matrix types for each path, and the +*> test routine that describes them, is as follows: +*> +*> Path name(s) Types Test routine +*> +*> DHS or NEP 21 DCHKHS +*> DST or SEP 21 DCHKST (routines) +*> 18 DDRVST (drivers) +*> DBD or SVD 16 DCHKBD (routines) +*> 5 DDRVBD (drivers) +*> DEV 21 DDRVEV +*> DES 21 DDRVES +*> DVX 21 DDRVVX +*> DSX 21 DDRVSX +*> DGG 26 DCHKGG (routines) +*> DGS 26 DDRGES +*> DGX 5 DDRGSX +*> DGV 26 DDRGEV +*> DXV 2 DDRGVX +*> DSG 21 DDRVSG +*> DSB 15 DCHKSB +*> DBB 15 DCHKBB +*> DEC - DCHKEC +*> DBL - DCHKBL +*> DBK - DCHKBK +*> DGL - DCHKGL +*> DGK - DCHKGK +*> GLM 8 DCKGLM +*> GQR 8 DCKGQR +*> GSV 8 DCKGSV +*> CSD 3 DCKCSD +*> LSE 8 DCKLSE +*> +*>----------------------------------------------------------------------- +*> +*> NEP input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NPARMS, INTEGER +*> Number of values of the parameters NB, NBMIN, NX, NS, and +*> MAXB. +*> +*> line 5: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 6: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for the minimum blocksize NBMIN. +*> +*> line 7: NXVAL, INTEGER array, dimension (NPARMS) +*> The values for the crossover point NX. +*> +*> line 8: INMIN, INTEGER array, dimension (NPARMS) +*> LAHQR vs TTQRE crossover point, >= 11 +*> +*> line 9: INWIN, INTEGER array, dimension (NPARMS) +*> recommended deflation window size +*> +*> line 10: INIBL, INTEGER array, dimension (NPARMS) +*> nibble crossover point +*> +*> line 11: ISHFTS, INTEGER array, dimension (NPARMS) +*> number of simultaneous shifts) +*> +*> line 12: IACC22, INTEGER array, dimension (NPARMS) +*> select structured matrix multiply: 0, 1 or 2) +*> +*> line 13: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. To have all of the test +*> ratios printed, use THRESH = 0.0 . +*> +*> line 14: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 14 was 2: +*> +*> line 15: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 15-EOF: The remaining lines occur in sets of 1 or 2 and allow +*> the user to specify the matrix types. Each line contains +*> a 3-character path name in columns 1-3, and the number +*> of matrix types must be the first nonblank item in columns +*> 4-80. If the number of matrix types is at least 1 but is +*> less than the maximum number of possible types, a second +*> line will be read to get the numbers of the matrix types to +*> be used. For example, +*> NEP 21 +*> requests all of the matrix types for the nonsymmetric +*> eigenvalue problem, while +*> NEP 4 +*> 9 10 11 12 +*> requests only matrices of type 9, 10, 11, and 12. +*> +*> The valid 3-character path names are 'NEP' or 'SHS' for the +*> nonsymmetric eigenvalue routines. +*> +*>----------------------------------------------------------------------- +*> +*> SEP or DSG input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NPARMS, INTEGER +*> Number of values of the parameters NB, NBMIN, and NX. +*> +*> line 5: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 6: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for the minimum blocksize NBMIN. +*> +*> line 7: NXVAL, INTEGER array, dimension (NPARMS) +*> The values for the crossover point NX. +*> +*> line 8: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 9: TSTCHK, LOGICAL +*> Flag indicating whether or not to test the LAPACK routines. +*> +*> line 10: TSTDRV, LOGICAL +*> Flag indicating whether or not to test the driver routines. +*> +*> line 11: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 12: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 12 was 2: +*> +*> line 13: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 13-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path names are 'SEP' or 'SST' for the +*> symmetric eigenvalue routines and driver routines, and +*> 'DSG' for the routines for the symmetric generalized +*> eigenvalue problem. +*> +*>----------------------------------------------------------------------- +*> +*> SVD input file: +*> +*> line 2: NN, INTEGER +*> Number of values of M and N. +*> +*> line 3: MVAL, INTEGER array, dimension (NN) +*> The values for the matrix row dimension M. +*> +*> line 4: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix column dimension N. +*> +*> line 5: NPARMS, INTEGER +*> Number of values of the parameter NB, NBMIN, NX, and NRHS. +*> +*> line 6: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 7: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for the minimum blocksize NBMIN. +*> +*> line 8: NXVAL, INTEGER array, dimension (NPARMS) +*> The values for the crossover point NX. +*> +*> line 9: NSVAL, INTEGER array, dimension (NPARMS) +*> The values for the number of right hand sides NRHS. +*> +*> line 10: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 11: TSTCHK, LOGICAL +*> Flag indicating whether or not to test the LAPACK routines. +*> +*> line 12: TSTDRV, LOGICAL +*> Flag indicating whether or not to test the driver routines. +*> +*> line 13: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 14: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 14 was 2: +*> +*> line 15: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 15-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path names are 'SVD' or 'SBD' for both the +*> SVD routines and the SVD driver routines. +*> +*>----------------------------------------------------------------------- +*> +*> DEV and DES data files: +*> +*> line 1: 'DEV' or 'DES' in columns 1 to 3. +*> +*> line 2: NSIZES, INTEGER +*> Number of sizes of matrices to use. Should be at least 0 +*> and at most 20. If NSIZES = 0, no testing is done +*> (although the remaining 3 lines are still read). +*> +*> line 3: NN, INTEGER array, dimension(NSIZES) +*> Dimensions of matrices to be tested. +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHSEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 5: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> If it is 0., all test case data will be printed. +*> +*> line 6: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits. +*> +*> line 7: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 7 was 2: +*> +*> line 8: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9 and following: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'DEV' to test SGEEV, or +*> 'DES' to test SGEES. +*> +*>----------------------------------------------------------------------- +*> +*> The DVX data has two parts. The first part is identical to DEV, +*> and the second part consists of test matrices with precomputed +*> solutions. +*> +*> line 1: 'DVX' in columns 1-3. +*> +*> line 2: NSIZES, INTEGER +*> If NSIZES = 0, no testing of randomly generated examples +*> is done, but any precomputed examples are tested. +*> +*> line 3: NN, INTEGER array, dimension(NSIZES) +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> +*> line 5: THRESH, REAL +*> +*> line 6: TSTERR, LOGICAL +*> +*> line 7: NEWSD, INTEGER +*> +*> If line 7 was 2: +*> +*> line 8: INTEGER array, dimension (4) +*> +*> lines 9 and following: The first line contains 'DVX' in columns 1-3 +*> followed by the number of matrix types, possibly with +*> a second line to specify certain matrix types. +*> If the number of matrix types = 0, no testing of randomly +*> generated examples is done, but any precomputed examples +*> are tested. +*> +*> remaining lines : Each matrix is stored on 1+2*N lines, where N is +*> its dimension. The first line contains the dimension (a +*> single integer). The next N lines contain the matrix, one +*> row per line. The last N lines correspond to each +*> eigenvalue. Each of these last N lines contains 4 real +*> values: the real part of the eigenvalue, the imaginary +*> part of the eigenvalue, the reciprocal condition number of +*> the eigenvalues, and the reciprocal condition number of the +*> eigenvector. The end of data is indicated by dimension N=0. +*> Even if no data is to be tested, there must be at least one +*> line containing N=0. +*> +*>----------------------------------------------------------------------- +*> +*> The DSX data is like DVX. The first part is identical to DEV, and the +*> second part consists of test matrices with precomputed solutions. +*> +*> line 1: 'DSX' in columns 1-3. +*> +*> line 2: NSIZES, INTEGER +*> If NSIZES = 0, no testing of randomly generated examples +*> is done, but any precomputed examples are tested. +*> +*> line 3: NN, INTEGER array, dimension(NSIZES) +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> +*> line 5: THRESH, REAL +*> +*> line 6: TSTERR, LOGICAL +*> +*> line 7: NEWSD, INTEGER +*> +*> If line 7 was 2: +*> +*> line 8: INTEGER array, dimension (4) +*> +*> lines 9 and following: The first line contains 'DSX' in columns 1-3 +*> followed by the number of matrix types, possibly with +*> a second line to specify certain matrix types. +*> If the number of matrix types = 0, no testing of randomly +*> generated examples is done, but any precomputed examples +*> are tested. +*> +*> remaining lines : Each matrix is stored on 3+N lines, where N is its +*> dimension. The first line contains the dimension N and the +*> dimension M of an invariant subspace. The second line +*> contains M integers, identifying the eigenvalues in the +*> invariant subspace (by their position in a list of +*> eigenvalues ordered by increasing real part). The next N +*> lines contain the matrix. The last line contains the +*> reciprocal condition number for the average of the selected +*> eigenvalues, and the reciprocal condition number for the +*> corresponding right invariant subspace. The end of data is +*> indicated by a line containing N=0 and M=0. Even if no data +*> is to be tested, there must be at least one line containing +*> N=0 and M=0. +*> +*>----------------------------------------------------------------------- +*> +*> DGG input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NPARMS, INTEGER +*> Number of values of the parameters NB, NBMIN, NS, MAXB, and +*> NBCOL. +*> +*> line 5: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 6: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for NBMIN, the minimum row dimension for blocks. +*> +*> line 7: NSVAL, INTEGER array, dimension (NPARMS) +*> The values for the number of shifts. +*> +*> line 8: MXBVAL, INTEGER array, dimension (NPARMS) +*> The values for MAXB, used in determining minimum blocksize. +*> +*> line 9: IACC22, INTEGER array, dimension (NPARMS) +*> select structured matrix multiply: 1 or 2) +*> +*> line 10: NBCOL, INTEGER array, dimension (NPARMS) +*> The values for NBCOL, the minimum column dimension for +*> blocks. +*> +*> line 11: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 12: TSTCHK, LOGICAL +*> Flag indicating whether or not to test the LAPACK routines. +*> +*> line 13: TSTDRV, LOGICAL +*> Flag indicating whether or not to test the driver routines. +*> +*> line 14: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 15: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 15 was 2: +*> +*> line 16: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 17-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'DGG' for the generalized +*> eigenvalue problem routines and driver routines. +*> +*>----------------------------------------------------------------------- +*> +*> DGS and DGV input files: +*> +*> line 1: 'DGS' or 'DGV' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension(NN) +*> Dimensions of matrices to be tested. +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHGEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 5: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> If it is 0., all test case data will be printed. +*> +*> line 6: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits. +*> +*> line 7: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 17 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 7-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'DGS' for the generalized +*> eigenvalue problem routines and driver routines. +*> +*>----------------------------------------------------------------------- +*> +*> DXV input files: +*> +*> line 1: 'DXV' in columns 1 to 3. +*> +*> line 2: N, INTEGER +*> Value of N. +*> +*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHGEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 4: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> Information will be printed about each test for which the +*> test ratio is greater than or equal to the threshold. +*> +*> line 5: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 6: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 6 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> If line 2 was 0: +*> +*> line 7-EOF: Precomputed examples are tested. +*> +*> remaining lines : Each example is stored on 3+2*N lines, where N is +*> its dimension. The first line contains the dimension (a +*> single integer). The next N lines contain the matrix A, one +*> row per line. The next N lines contain the matrix B. The +*> next line contains the reciprocals of the eigenvalue +*> condition numbers. The last line contains the reciprocals of +*> the eigenvector condition numbers. The end of data is +*> indicated by dimension N=0. Even if no data is to be tested, +*> there must be at least one line containing N=0. +*> +*>----------------------------------------------------------------------- +*> +*> DGX input files: +*> +*> line 1: 'DGX' in columns 1 to 3. +*> +*> line 2: N, INTEGER +*> Value of N. +*> +*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHGEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 4: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> Information will be printed about each test for which the +*> test ratio is greater than or equal to the threshold. +*> +*> line 5: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 6: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 6 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> If line 2 was 0: +*> +*> line 7-EOF: Precomputed examples are tested. +*> +*> remaining lines : Each example is stored on 3+2*N lines, where N is +*> its dimension. The first line contains the dimension (a +*> single integer). The next line contains an integer k such +*> that only the last k eigenvalues will be selected and appear +*> in the leading diagonal blocks of $A$ and $B$. The next N +*> lines contain the matrix A, one row per line. The next N +*> lines contain the matrix B. The last line contains the +*> reciprocal of the eigenvalue cluster condition number and the +*> reciprocal of the deflating subspace (associated with the +*> selected eigencluster) condition number. The end of data is +*> indicated by dimension N=0. Even if no data is to be tested, +*> there must be at least one line containing N=0. +*> +*>----------------------------------------------------------------------- +*> +*> DSB input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NK, INTEGER +*> Number of values of K. +*> +*> line 5: KVAL, INTEGER array, dimension (NK) +*> The values for the matrix dimension K. +*> +*> line 6: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 7 was 2: +*> +*> line 8: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 8-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'DSB'. +*> +*>----------------------------------------------------------------------- +*> +*> DBB input file: +*> +*> line 2: NN, INTEGER +*> Number of values of M and N. +*> +*> line 3: MVAL, INTEGER array, dimension (NN) +*> The values for the matrix row dimension M. +*> +*> line 4: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix column dimension N. +*> +*> line 4: NK, INTEGER +*> Number of values of K. +*> +*> line 5: KVAL, INTEGER array, dimension (NK) +*> The values for the matrix bandwidth K. +*> +*> line 6: NPARMS, INTEGER +*> Number of values of the parameter NRHS +*> +*> line 7: NSVAL, INTEGER array, dimension (NPARMS) +*> The values for the number of right hand sides NRHS. +*> +*> line 8: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 9: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 9 was 2: +*> +*> line 10: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 10-EOF: Lines specifying matrix types, as for SVD. +*> The 3-character path name is 'DBB'. +*> +*>----------------------------------------------------------------------- +*> +*> DEC input file: +*> +*> line 2: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> lines 3-EOF: +*> +*> Input for testing the eigencondition routines consists of a set of +*> specially constructed test cases and their solutions. The data +*> format is not intended to be modified by the user. +*> +*>----------------------------------------------------------------------- +*> +*> DBL and DBK input files: +*> +*> line 1: 'DBL' in columns 1-3 to test SGEBAL, or 'DBK' in +*> columns 1-3 to test SGEBAK. +*> +*> The remaining lines consist of specially constructed test cases. +*> +*>----------------------------------------------------------------------- +*> +*> DGL and DGK input files: +*> +*> line 1: 'DGL' in columns 1-3 to test DGGBAL, or 'DGK' in +*> columns 1-3 to test DGGBAK. +*> +*> The remaining lines consist of specially constructed test cases. +*> +*>----------------------------------------------------------------------- +*> +*> GLM data file: +*> +*> line 1: 'GLM' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M (row dimension). +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P (row dimension). +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N (column dimension), note M <= N <= M+P. +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GLM' for the generalized +*> linear regression model routines. +*> +*>----------------------------------------------------------------------- +*> +*> GQR data file: +*> +*> line 1: 'GQR' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M. +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P. +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N. +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GQR' for the generalized +*> QR and RQ routines. +*> +*>----------------------------------------------------------------------- +*> +*> GSV data file: +*> +*> line 1: 'GSV' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M (row dimension). +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P (row dimension). +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N (column dimension). +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GSV' for the generalized +*> SVD routines. +*> +*>----------------------------------------------------------------------- +*> +*> CSD data file: +*> +*> line 1: 'CSD' in columns 1 to 3. +*> +*> line 2: NM, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NM) +*> Values of M (row and column dimension of orthogonal matrix). +*> +*> line 4: PVAL, INTEGER array, dimension(NM) +*> Values of P (row dimension of top-left block). +*> +*> line 5: NVAL, INTEGER array, dimension(NM) +*> Values of N (column dimension of top-left block). +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'CSD' for the CSD routine. +*> +*>----------------------------------------------------------------------- +*> +*> LSE data file: +*> +*> line 1: 'LSE' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M. +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P. +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N, note P <= N <= P+M. +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GSV' for the generalized +*> SVD routines. +*> +*>----------------------------------------------------------------------- +*> +*> NMAX is currently set to 132 and must be at least 12 for some of the +*> precomputed examples, and LWORK = NMAX*(5*NMAX+5)+1 in the parameter +*> statements below. For SVD, we assume NRHS may be as big as N. The +*> parameter NEED is set to 14 to allow for 14 N-by-N matrices for DGG. +*> \endverbatim +* +* Arguments: +* ========== +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup double_eig +* +* ===================================================================== + PROGRAM DCHKEE +* +#if defined(_OPENMP) + use omp_lib +#endif +* +* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NMAX + PARAMETER ( NMAX = 132 ) + INTEGER NCMAX + PARAMETER ( NCMAX = 20 ) + INTEGER NEED + PARAMETER ( NEED = 14 ) + INTEGER LWORK + PARAMETER ( LWORK = NMAX*( 5*NMAX+5 )+1 ) + INTEGER LIWORK + PARAMETER ( LIWORK = NMAX*( 5*NMAX+20 ) ) + INTEGER MAXIN + PARAMETER ( MAXIN = 20 ) + INTEGER MAXT + PARAMETER ( MAXT = 30 ) + INTEGER NIN, NOUT + PARAMETER ( NIN = 5, NOUT = 6 ) +* .. +* .. Local Scalars .. + LOGICAL CSD, DBB, DGG, DSB, FATAL, GLM, GQR, GSV, LSE, + $ NEP, DBK, DBL, SEP, DES, DEV, DGK, DGL, DGS, + $ DGV, DGX, DSX, SVD, DVX, DXV, TSTCHK, TSTDIF, + $ TSTDRV, TSTERR + CHARACTER C1 + CHARACTER*3 C3, PATH + CHARACTER*32 VNAME + CHARACTER*10 INTSTR + CHARACTER*80 LINE + INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD, + $ NK, NN, NPARMS, NRHS, NTYPES, + $ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS + DOUBLE PRECISION EPS, S1, S2, THRESH, THRSHN +* .. +* .. Local Arrays .. + LOGICAL DOTYPE( MAXT ), LOGWRK( NMAX ) + INTEGER IOLDSD( 4 ), ISEED( 4 ), IWORK( LIWORK ), + $ KVAL( MAXIN ), MVAL( MAXIN ), MXBVAL( MAXIN ), + $ NBCOL( MAXIN ), NBMIN( MAXIN ), NBVAL( MAXIN ), + $ NSVAL( MAXIN ), NVAL( MAXIN ), NXVAL( MAXIN ), + $ PVAL( MAXIN ) + INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ), + $ ISHFTS( MAXIN ), IACC22( MAXIN ) + DOUBLE PRECISION D( NMAX, 12 ), RESULT( 500 ), TAUA( NMAX ), + $ TAUB( NMAX ), X( 5*NMAX ) +* .. +* .. Allocatable Arrays .. + INTEGER AllocateStatus + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: WORK + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: A, B, C +* .. +* .. External Functions .. + LOGICAL LSAMEN + DOUBLE PRECISION DLAMCH, DSECND + EXTERNAL LSAMEN, DLAMCH, DSECND +* .. +* .. External Subroutines .. + EXTERNAL ALAREQ, DCHKBB, DCHKBD, DCHKBK, DCHKBL, DCHKEC, + $ DCHKGG, DCHKGK, DCHKGL, DCHKHS, DCHKSB, DCHKST, + $ DCKCSD, DCKGLM, DCKGQR, DCKGSV, DCKLSE, DDRGES, + $ DDRGEV, DDRGSX, DDRGVX, DDRVBD, DDRVES, DDRVEV, + $ DDRVSG, DDRVST, DDRVSX, DDRVVX, DERRBD, + $ DERRED, DERRGG, DERRHS, DERRST, ILAVER, XLAENV, + $ DDRGES3, DDRGEV3, + $ DCHKST2STG, DDRVST2STG, DCHKSB2STG, DDRVSG2STG +* .. +* .. Intrinsic Functions .. + INTRINSIC LEN, MIN +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, MAXB, NPROC, NSHIFT, NUNIT, SELDIM, + $ SELOPT +* .. +* .. Arrays in Common .. + LOGICAL SELVAL( 20 ) + INTEGER IPARMS( 100 ) + DOUBLE PRECISION SELWI( 20 ), SELWR( 20 ) +* .. +* .. Common blocks .. + COMMON / CENVIR / NPROC, NSHIFT, MAXB + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT + COMMON / SSLCT / SELOPT, SELDIM, SELVAL, SELWR, SELWI + COMMON / CLAENV / IPARMS +* .. +* .. Data statements .. + DATA INTSTR / '0123456789' / + DATA IOLDSD / 0, 0, 0, 1 / +* .. +* .. Allocate memory dynamically .. +* + ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( WORK(LWORK), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" +* .. +* .. Executable Statements .. +* + A = 0.0 + B = 0.0 + C = 0.0 + D = 0.0 + S1 = DSECND( ) + FATAL = .FALSE. + NUNIT = NOUT +* +* Return to here to read multiple sets of data +* + 10 CONTINUE +* +* Read the first line and set the 3-character test path +* + READ( NIN, FMT = '(A80)', END = 380 )LINE + PATH = LINE( 1: 3 ) + NEP = LSAMEN( 3, PATH, 'NEP' ) .OR. LSAMEN( 3, PATH, 'DHS' ) + SEP = LSAMEN( 3, PATH, 'SEP' ) .OR. LSAMEN( 3, PATH, 'DST' ) .OR. + $ LSAMEN( 3, PATH, 'DSG' ) .OR. LSAMEN( 3, PATH, 'SE2' ) + SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'DBD' ) + DEV = LSAMEN( 3, PATH, 'DEV' ) + DES = LSAMEN( 3, PATH, 'DES' ) + DVX = LSAMEN( 3, PATH, 'DVX' ) + DSX = LSAMEN( 3, PATH, 'DSX' ) + DGG = LSAMEN( 3, PATH, 'DGG' ) + DGS = LSAMEN( 3, PATH, 'DGS' ) + DGX = LSAMEN( 3, PATH, 'DGX' ) + DGV = LSAMEN( 3, PATH, 'DGV' ) + DXV = LSAMEN( 3, PATH, 'DXV' ) + DSB = LSAMEN( 3, PATH, 'DSB' ) + DBB = LSAMEN( 3, PATH, 'DBB' ) + GLM = LSAMEN( 3, PATH, 'GLM' ) + GQR = LSAMEN( 3, PATH, 'GQR' ) .OR. LSAMEN( 3, PATH, 'GRQ' ) + GSV = LSAMEN( 3, PATH, 'GSV' ) + CSD = LSAMEN( 3, PATH, 'CSD' ) + LSE = LSAMEN( 3, PATH, 'LSE' ) + DBL = LSAMEN( 3, PATH, 'DBL' ) + DBK = LSAMEN( 3, PATH, 'DBK' ) + DGL = LSAMEN( 3, PATH, 'DGL' ) + DGK = LSAMEN( 3, PATH, 'DGK' ) +* +* Report values of parameters. +* + IF( PATH.EQ.' ' ) THEN + GO TO 10 + ELSE IF( NEP ) THEN + WRITE( NOUT, FMT = 9987 ) + ELSE IF( SEP ) THEN + WRITE( NOUT, FMT = 9986 ) + ELSE IF( SVD ) THEN + WRITE( NOUT, FMT = 9985 ) + ELSE IF( DEV ) THEN + WRITE( NOUT, FMT = 9979 ) + ELSE IF( DES ) THEN + WRITE( NOUT, FMT = 9978 ) + ELSE IF( DVX ) THEN + WRITE( NOUT, FMT = 9977 ) + ELSE IF( DSX ) THEN + WRITE( NOUT, FMT = 9976 ) + ELSE IF( DGG ) THEN + WRITE( NOUT, FMT = 9975 ) + ELSE IF( DGS ) THEN + WRITE( NOUT, FMT = 9964 ) + ELSE IF( DGX ) THEN + WRITE( NOUT, FMT = 9965 ) + ELSE IF( DGV ) THEN + WRITE( NOUT, FMT = 9963 ) + ELSE IF( DXV ) THEN + WRITE( NOUT, FMT = 9962 ) + ELSE IF( DSB ) THEN + WRITE( NOUT, FMT = 9974 ) + ELSE IF( DBB ) THEN + WRITE( NOUT, FMT = 9967 ) + ELSE IF( GLM ) THEN + WRITE( NOUT, FMT = 9971 ) + ELSE IF( GQR ) THEN + WRITE( NOUT, FMT = 9970 ) + ELSE IF( GSV ) THEN + WRITE( NOUT, FMT = 9969 ) + ELSE IF( CSD ) THEN + WRITE( NOUT, FMT = 9960 ) + ELSE IF( LSE ) THEN + WRITE( NOUT, FMT = 9968 ) + ELSE IF( DBL ) THEN +* +* DGEBAL: Balancing +* + CALL DCHKBL( NIN, NOUT ) + GO TO 10 + ELSE IF( DBK ) THEN +* +* DGEBAK: Back transformation +* + CALL DCHKBK( NIN, NOUT ) + GO TO 10 + ELSE IF( DGL ) THEN +* +* DGGBAL: Balancing +* + CALL DCHKGL( NIN, NOUT ) + GO TO 10 + ELSE IF( DGK ) THEN +* +* DGGBAK: Back transformation +* + CALL DCHKGK( NIN, NOUT ) + GO TO 10 + ELSE IF( LSAMEN( 3, PATH, 'DEC' ) ) THEN +* +* DEC: Eigencondition estimation +* + READ( NIN, FMT = * )THRESH + CALL XLAENV( 1, 1 ) + CALL XLAENV( 12, 11 ) + CALL XLAENV( 13, 2 ) + CALL XLAENV( 14, 0 ) + CALL XLAENV( 15, 2 ) + CALL XLAENV( 16, 2 ) + TSTERR = .TRUE. + CALL DCHKEC( THRESH, TSTERR, NIN, NOUT ) + GO TO 10 + ELSE + WRITE( NOUT, FMT = 9992 )PATH + GO TO 10 + END IF + CALL ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) + WRITE( NOUT, FMT = 9972 ) VERS_MAJOR, VERS_MINOR, VERS_PATCH + WRITE( NOUT, FMT = 9984 ) +* +* Read the number of values of M, P, and N. +* + READ( NIN, FMT = * )NN + IF( NN.LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NN ', NN, 1 + NN = 0 + FATAL = .TRUE. + ELSE IF( NN.GT.MAXIN ) THEN + WRITE( NOUT, FMT = 9988 )' NN ', NN, MAXIN + NN = 0 + FATAL = .TRUE. + END IF +* +* Read the values of M +* + IF( .NOT.( DGX .OR. DXV ) ) THEN + READ( NIN, FMT = * )( MVAL( I ), I = 1, NN ) + IF( SVD ) THEN + VNAME = ' M ' + ELSE + VNAME = ' N ' + END IF + DO 20 I = 1, NN + IF( MVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )VNAME, MVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( MVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )VNAME, MVAL( I ), NMAX + FATAL = .TRUE. + END IF + 20 CONTINUE + WRITE( NOUT, FMT = 9983 )'M: ', ( MVAL( I ), I = 1, NN ) + END IF +* +* Read the values of P +* + IF( GLM .OR. GQR .OR. GSV .OR. CSD .OR. LSE ) THEN + READ( NIN, FMT = * )( PVAL( I ), I = 1, NN ) + DO 30 I = 1, NN + IF( PVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' P ', PVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( PVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' P ', PVAL( I ), NMAX + FATAL = .TRUE. + END IF + 30 CONTINUE + WRITE( NOUT, FMT = 9983 )'P: ', ( PVAL( I ), I = 1, NN ) + END IF +* +* Read the values of N +* + IF( SVD .OR. DBB .OR. GLM .OR. GQR .OR. GSV .OR. CSD .OR. + $ LSE ) THEN + READ( NIN, FMT = * )( NVAL( I ), I = 1, NN ) + DO 40 I = 1, NN + IF( NVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' N ', NVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' N ', NVAL( I ), NMAX + FATAL = .TRUE. + END IF + 40 CONTINUE + ELSE + DO 50 I = 1, NN + NVAL( I ) = MVAL( I ) + 50 CONTINUE + END IF + IF( .NOT.( DGX .OR. DXV ) ) THEN + WRITE( NOUT, FMT = 9983 )'N: ', ( NVAL( I ), I = 1, NN ) + ELSE + WRITE( NOUT, FMT = 9983 )'N: ', NN + END IF +* +* Read the number of values of K, followed by the values of K +* + IF( DSB .OR. DBB ) THEN + READ( NIN, FMT = * )NK + READ( NIN, FMT = * )( KVAL( I ), I = 1, NK ) + DO 60 I = 1, NK + IF( KVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' K ', KVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( KVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' K ', KVAL( I ), NMAX + FATAL = .TRUE. + END IF + 60 CONTINUE + WRITE( NOUT, FMT = 9983 )'K: ', ( KVAL( I ), I = 1, NK ) + END IF +* + IF( DEV .OR. DES .OR. DVX .OR. DSX ) THEN +* +* For the nonsymmetric QR driver routines, only one set of +* parameters is allowed. +* + READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), + $ INMIN( 1 ), INWIN( 1 ), INIBL(1), ISHFTS(1), IACC22(1) + IF( NBVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NBMIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NXVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( INMIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( INWIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( INIBL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( ISHFTS( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( 1 ), 1 + FATAL = .TRUE. + ELSE IF( IACC22( 1 ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( 1 ), 0 + FATAL = .TRUE. + END IF + CALL XLAENV( 1, NBVAL( 1 ) ) + CALL XLAENV( 2, NBMIN( 1 ) ) + CALL XLAENV( 3, NXVAL( 1 ) ) + CALL XLAENV(12, MAX( 11, INMIN( 1 ) ) ) + CALL XLAENV(13, INWIN( 1 ) ) + CALL XLAENV(14, INIBL( 1 ) ) + CALL XLAENV(15, ISHFTS( 1 ) ) + CALL XLAENV(16, IACC22( 1 ) ) + WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) + WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'INMIN: ', INMIN( 1 ) + WRITE( NOUT, FMT = 9983 )'INWIN: ', INWIN( 1 ) + WRITE( NOUT, FMT = 9983 )'INIBL: ', INIBL( 1 ) + WRITE( NOUT, FMT = 9983 )'ISHFTS: ', ISHFTS( 1 ) + WRITE( NOUT, FMT = 9983 )'IACC22: ', IACC22( 1 ) +* + ELSEIF( DGS .OR. DGX .OR. DGV .OR. DXV ) THEN +* +* For the nonsymmetric generalized driver routines, only one set +* of parameters is allowed. +* + READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), + $ NSVAL( 1 ), MXBVAL( 1 ) + IF( NBVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NBMIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NXVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NSVAL( 1 ).LT.2 ) THEN + WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( 1 ), 2 + FATAL = .TRUE. + ELSE IF( MXBVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( 1 ), 1 + FATAL = .TRUE. + END IF + CALL XLAENV( 1, NBVAL( 1 ) ) + CALL XLAENV( 2, NBMIN( 1 ) ) + CALL XLAENV( 3, NXVAL( 1 ) ) + CALL XLAENV( 4, NSVAL( 1 ) ) + CALL XLAENV( 8, MXBVAL( 1 ) ) + WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) + WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'NS: ', NSVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'MAXB: ', MXBVAL( 1 ) +* + ELSE IF( .NOT.DSB .AND. .NOT.GLM .AND. .NOT.GQR .AND. .NOT. + $ GSV .AND. .NOT.CSD .AND. .NOT.LSE ) THEN +* +* For the other paths, the number of parameters can be varied +* from the input file. Read the number of parameter values. +* + READ( NIN, FMT = * )NPARMS + IF( NPARMS.LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )'NPARMS', NPARMS, 1 + NPARMS = 0 + FATAL = .TRUE. + ELSE IF( NPARMS.GT.MAXIN ) THEN + WRITE( NOUT, FMT = 9988 )'NPARMS', NPARMS, MAXIN + NPARMS = 0 + FATAL = .TRUE. + END IF +* +* Read the values of NB +* + IF( .NOT.DBB ) THEN + READ( NIN, FMT = * )( NBVAL( I ), I = 1, NPARMS ) + DO 70 I = 1, NPARMS + IF( NBVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NBVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' NB ', NBVAL( I ), NMAX + FATAL = .TRUE. + END IF + 70 CONTINUE + WRITE( NOUT, FMT = 9983 )'NB: ', + $ ( NBVAL( I ), I = 1, NPARMS ) + END IF +* +* Read the values of NBMIN +* + IF( NEP .OR. SEP .OR. SVD .OR. DGG ) THEN + READ( NIN, FMT = * )( NBMIN( I ), I = 1, NPARMS ) + DO 80 I = 1, NPARMS + IF( NBMIN( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( I ), 0 + FATAL = .TRUE. + ELSE IF( NBMIN( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )'NBMIN ', NBMIN( I ), NMAX + FATAL = .TRUE. + END IF + 80 CONTINUE + WRITE( NOUT, FMT = 9983 )'NBMIN:', + $ ( NBMIN( I ), I = 1, NPARMS ) + ELSE + DO 90 I = 1, NPARMS + NBMIN( I ) = 1 + 90 CONTINUE + END IF +* +* Read the values of NX +* + IF( NEP .OR. SEP .OR. SVD ) THEN + READ( NIN, FMT = * )( NXVAL( I ), I = 1, NPARMS ) + DO 100 I = 1, NPARMS + IF( NXVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NXVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' NX ', NXVAL( I ), NMAX + FATAL = .TRUE. + END IF + 100 CONTINUE + WRITE( NOUT, FMT = 9983 )'NX: ', + $ ( NXVAL( I ), I = 1, NPARMS ) + ELSE + DO 110 I = 1, NPARMS + NXVAL( I ) = 1 + 110 CONTINUE + END IF +* +* Read the values of NSHIFT (if DGG) or NRHS (if SVD +* or DBB). +* + IF( SVD .OR. DBB .OR. DGG ) THEN + READ( NIN, FMT = * )( NSVAL( I ), I = 1, NPARMS ) + DO 120 I = 1, NPARMS + IF( NSVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NSVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' NS ', NSVAL( I ), NMAX + FATAL = .TRUE. + END IF + 120 CONTINUE + WRITE( NOUT, FMT = 9983 )'NS: ', + $ ( NSVAL( I ), I = 1, NPARMS ) + ELSE + DO 130 I = 1, NPARMS + NSVAL( I ) = 1 + 130 CONTINUE + END IF +* +* Read the values for MAXB. +* + IF( DGG ) THEN + READ( NIN, FMT = * )( MXBVAL( I ), I = 1, NPARMS ) + DO 140 I = 1, NPARMS + IF( MXBVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( MXBVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' MAXB ', MXBVAL( I ), NMAX + FATAL = .TRUE. + END IF + 140 CONTINUE + WRITE( NOUT, FMT = 9983 )'MAXB: ', + $ ( MXBVAL( I ), I = 1, NPARMS ) + ELSE + DO 150 I = 1, NPARMS + MXBVAL( I ) = 1 + 150 CONTINUE + END IF +* +* Read the values for INMIN. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( INMIN( I ), I = 1, NPARMS ) + DO 540 I = 1, NPARMS + IF( INMIN( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( I ), 0 + FATAL = .TRUE. + END IF + 540 CONTINUE + WRITE( NOUT, FMT = 9983 )'INMIN: ', + $ ( INMIN( I ), I = 1, NPARMS ) + ELSE + DO 550 I = 1, NPARMS + INMIN( I ) = 1 + 550 CONTINUE + END IF +* +* Read the values for INWIN. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( INWIN( I ), I = 1, NPARMS ) + DO 560 I = 1, NPARMS + IF( INWIN( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( I ), 0 + FATAL = .TRUE. + END IF + 560 CONTINUE + WRITE( NOUT, FMT = 9983 )'INWIN: ', + $ ( INWIN( I ), I = 1, NPARMS ) + ELSE + DO 570 I = 1, NPARMS + INWIN( I ) = 1 + 570 CONTINUE + END IF +* +* Read the values for INIBL. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( INIBL( I ), I = 1, NPARMS ) + DO 580 I = 1, NPARMS + IF( INIBL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( I ), 0 + FATAL = .TRUE. + END IF + 580 CONTINUE + WRITE( NOUT, FMT = 9983 )'INIBL: ', + $ ( INIBL( I ), I = 1, NPARMS ) + ELSE + DO 590 I = 1, NPARMS + INIBL( I ) = 1 + 590 CONTINUE + END IF +* +* Read the values for ISHFTS. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( ISHFTS( I ), I = 1, NPARMS ) + DO 600 I = 1, NPARMS + IF( ISHFTS( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( I ), 0 + FATAL = .TRUE. + END IF + 600 CONTINUE + WRITE( NOUT, FMT = 9983 )'ISHFTS: ', + $ ( ISHFTS( I ), I = 1, NPARMS ) + ELSE + DO 610 I = 1, NPARMS + ISHFTS( I ) = 1 + 610 CONTINUE + END IF +* +* Read the values for IACC22. +* + IF( NEP .OR. DGG ) THEN + READ( NIN, FMT = * )( IACC22( I ), I = 1, NPARMS ) + DO 620 I = 1, NPARMS + IF( IACC22( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( I ), 0 + FATAL = .TRUE. + END IF + 620 CONTINUE + WRITE( NOUT, FMT = 9983 )'IACC22: ', + $ ( IACC22( I ), I = 1, NPARMS ) + ELSE + DO 630 I = 1, NPARMS + IACC22( I ) = 1 + 630 CONTINUE + END IF +* +* Read the values for NBCOL. +* + IF( DGG ) THEN + READ( NIN, FMT = * )( NBCOL( I ), I = 1, NPARMS ) + DO 160 I = 1, NPARMS + IF( NBCOL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )'NBCOL ', NBCOL( I ), 0 + FATAL = .TRUE. + ELSE IF( NBCOL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )'NBCOL ', NBCOL( I ), NMAX + FATAL = .TRUE. + END IF + 160 CONTINUE + WRITE( NOUT, FMT = 9983 )'NBCOL:', + $ ( NBCOL( I ), I = 1, NPARMS ) + ELSE + DO 170 I = 1, NPARMS + NBCOL( I ) = 1 + 170 CONTINUE + END IF + END IF +* +* Calculate and print the machine dependent constants. +* + WRITE( NOUT, FMT = * ) + EPS = DLAMCH( 'Underflow threshold' ) + WRITE( NOUT, FMT = 9981 )'underflow', EPS + EPS = DLAMCH( 'Overflow threshold' ) + WRITE( NOUT, FMT = 9981 )'overflow ', EPS + EPS = DLAMCH( 'Epsilon' ) + WRITE( NOUT, FMT = 9981 )'precision', EPS +* +* Read the threshold value for the test ratios. +* + READ( NIN, FMT = * )THRESH + WRITE( NOUT, FMT = 9982 )THRESH + IF( SEP .OR. SVD .OR. DGG ) THEN +* +* Read the flag that indicates whether to test LAPACK routines. +* + READ( NIN, FMT = * )TSTCHK +* +* Read the flag that indicates whether to test driver routines. +* + READ( NIN, FMT = * )TSTDRV + END IF +* +* Read the flag that indicates whether to test the error exits. +* + READ( NIN, FMT = * )TSTERR +* +* Read the code describing how to set the random number seed. +* + READ( NIN, FMT = * )NEWSD +* +* If NEWSD = 2, read another line with 4 integers for the seed. +* + IF( NEWSD.EQ.2 ) + $ READ( NIN, FMT = * )( IOLDSD( I ), I = 1, 4 ) +* + DO 180 I = 1, 4 + ISEED( I ) = IOLDSD( I ) + 180 CONTINUE +* + IF( FATAL ) THEN + WRITE( NOUT, FMT = 9999 ) + STOP + END IF +* +* Read the input lines indicating the test path and its parameters. +* The first three characters indicate the test path, and the number +* of test matrix types must be the first nonblank item in columns +* 4-80. +* + 190 CONTINUE +* + IF( .NOT.( DGX .OR. DXV ) ) THEN +* + 200 CONTINUE + READ( NIN, FMT = '(A80)', END = 380 )LINE + C3 = LINE( 1: 3 ) + LENP = LEN( LINE ) + I = 3 + ITMP = 0 + I1 = 0 + 210 CONTINUE + I = I + 1 + IF( I.GT.LENP ) THEN + IF( I1.GT.0 ) THEN + GO TO 240 + ELSE + NTYPES = MAXT + GO TO 240 + END IF + END IF + IF( LINE( I: I ).NE.' ' .AND. LINE( I: I ).NE.',' ) THEN + I1 = I + C1 = LINE( I1: I1 ) +* +* Check that a valid integer was read +* + DO 220 K = 1, 10 + IF( C1.EQ.INTSTR( K: K ) ) THEN + IC = K - 1 + GO TO 230 + END IF + 220 CONTINUE + WRITE( NOUT, FMT = 9991 )I, LINE + GO TO 200 + 230 CONTINUE + ITMP = 10*ITMP + IC + GO TO 210 + ELSE IF( I1.GT.0 ) THEN + GO TO 240 + ELSE + GO TO 210 + END IF + 240 CONTINUE + NTYPES = ITMP +* +* Skip the tests if NTYPES is <= 0. +* + IF( .NOT.( DEV .OR. DES .OR. DVX .OR. DSX .OR. DGV .OR. + $ DGS ) .AND. NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + GO TO 200 + END IF +* + ELSE + IF( DXV ) + $ C3 = 'DXV' + IF( DGX ) + $ C3 = 'DGX' + END IF +* +* Reset the random number seed. +* + IF( NEWSD.EQ.0 ) THEN + DO 250 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 250 CONTINUE + END IF +* + IF( LSAMEN( 3, C3, 'DHS' ) .OR. LSAMEN( 3, C3, 'NEP' ) ) THEN +* +* ------------------------------------- +* NEP: Nonsymmetric Eigenvalue Problem +* ------------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* NS = number of shifts +* MAXB = minimum submatrix size +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL DERRHS( 'DHSEQR', NOUT ) + DO 270 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) + CALL XLAENV(12, MAX( 11, INMIN( I ) ) ) + CALL XLAENV(13, INWIN( I ) ) + CALL XLAENV(14, INIBL( I ) ) + CALL XLAENV(15, ISHFTS( I ) ) + CALL XLAENV(16, IACC22( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 260 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 260 CONTINUE + END IF + WRITE( NOUT, FMT = 9961 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ), MAX( 11, INMIN(I)), + $ INWIN( I ), INIBL( I ), ISHFTS( I ), IACC22( I ) + CALL DCHKHS( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 5 ), NMAX, A( 1, 6 ), + $ A( 1, 7 ), D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), + $ D( 1, 4 ), D( 1, 5 ), D( 1, 6 ), A( 1, 8 ), + $ A( 1, 9 ), A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), + $ D( 1, 7 ), WORK, LWORK, IWORK, LOGWRK, RESULT, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DCHKHS', INFO + 270 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'DST' ) .OR. LSAMEN( 3, C3, 'SEP' ) + $ .OR. LSAMEN( 3, C3, 'SE2' ) ) THEN +* +* ---------------------------------- +* SEP: Symmetric Eigenvalue Problem +* ---------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 1, 1 ) + CALL XLAENV( 9, 25 ) + IF( TSTERR ) THEN +#if defined(_OPENMP) + N_THREADS = OMP_GET_NUM_THREADS() + CALL OMP_SET_NUM_THREADS(1) +#endif + CALL DERRST( 'DST', NOUT ) +#if defined(_OPENMP) + CALL OMP_SET_NUM_THREADS(N_THREADS) +#endif + END IF + DO 290 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 280 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 280 CONTINUE + END IF + WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ) + IF( TSTCHK ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL DCHKST2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), + $ D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), D( 1, 9 ), + $ D( 1, 10 ), D( 1, 11 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), D( 1, 12 ), A( 1, 6 ), + $ WORK, LWORK, IWORK, LIWORK, RESULT, INFO ) + ELSE + CALL DCHKST( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), + $ D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), D( 1, 9 ), + $ D( 1, 10 ), D( 1, 11 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), D( 1, 12 ), A( 1, 6 ), + $ WORK, LWORK, IWORK, LIWORK, RESULT, INFO ) + ENDIF + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DCHKST', INFO + END IF + IF( TSTDRV ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL DDRVST2STG( NN, NVAL, 18, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, D( 1, 3 ), D( 1, 4 ), + $ D( 1, 5 ), D( 1, 6 ), D( 1, 8 ), D( 1, 9 ), + $ D( 1, 10 ), D( 1, 11 ), A( 1, 2 ), NMAX, + $ A( 1, 3 ), D( 1, 12 ), A( 1, 4 ), WORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ELSE + CALL DDRVST( NN, NVAL, 18, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, D( 1, 3 ), D( 1, 4 ), + $ D( 1, 5 ), D( 1, 6 ), D( 1, 8 ), D( 1, 9 ), + $ D( 1, 10 ), D( 1, 11 ), A( 1, 2 ), NMAX, + $ A( 1, 3 ), D( 1, 12 ), A( 1, 4 ), WORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ENDIF + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DDRVST', INFO + END IF + 290 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'DSG' ) ) THEN +* +* ---------------------------------------------- +* DSG: Symmetric Generalized Eigenvalue Problem +* ---------------------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 9, 25 ) + DO 310 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 300 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 300 CONTINUE + END IF + WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ) + IF( TSTCHK ) THEN +* CALL DDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, +* $ D( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), +* $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, +* $ LWORK, IWORK, LIWORK, RESULT, INFO ) + CALL DDRVSG2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, + $ D( 1, 3 ), D( 1, 3 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), A( 1, 6 ), + $ A( 1, 7 ), WORK, LWORK, IWORK, LIWORK, + $ RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DDRVSG', INFO + END IF + 310 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'DBD' ) .OR. LSAMEN( 3, C3, 'SVD' ) ) THEN +* +* ---------------------------------- +* SVD: Singular Value Decomposition +* ---------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* NRHS = number of right hand sides +* + MAXTYP = 16 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 1, 1 ) + CALL XLAENV( 9, 25 ) +* +* Test the error exits +* + IF( TSTERR .AND. TSTCHK ) + $ CALL DERRBD( 'DBD', NOUT ) + IF( TSTERR .AND. TSTDRV ) + $ CALL DERRED( 'DBD', NOUT ) +* + DO 330 I = 1, NPARMS + NRHS = NSVAL( I ) + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) + IF( NEWSD.EQ.0 ) THEN + DO 320 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 320 CONTINUE + END IF + WRITE( NOUT, FMT = 9995 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ), NRHS + IF( TSTCHK ) THEN + CALL DCHKBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, NRHS, ISEED, + $ THRESH, A( 1, 1 ), NMAX, D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), A( 1, 2 ), + $ NMAX, A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), NMAX, + $ A( 1, 6 ), NMAX, A( 1, 7 ), A( 1, 8 ), WORK, + $ LWORK, IWORK, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DCHKBD', INFO + END IF + IF( TSTDRV ) + $ CALL DDRVBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, ISEED, + $ THRESH, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, + $ A( 1, 3 ), NMAX, A( 1, 4 ), A( 1, 5 ), + $ A( 1, 6 ), D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), + $ WORK, LWORK, IWORK, NOUT, INFO ) + 330 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'DEV' ) ) THEN +* +* -------------------------------------------- +* DEV: Nonsymmetric Eigenvalue Problem Driver +* DGEEV (eigenvalues and eigenvectors) +* -------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL DERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL DDRVEV( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), A( 1, 3 ), + $ NMAX, A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, RESULT, + $ WORK, LWORK, IWORK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DGEEV', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'DES' ) ) THEN +* +* -------------------------------------------- +* DES: Nonsymmetric Eigenvalue Problem Driver +* DGEES (Schur form) +* -------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL DERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL DDRVES( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), + $ A( 1, 4 ), NMAX, RESULT, WORK, LWORK, IWORK, + $ LOGWRK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DGEES', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'DVX' ) ) THEN +* +* -------------------------------------------------------------- +* DVX: Nonsymmetric Eigenvalue Problem Expert Driver +* DGEEVX (eigenvalues, eigenvectors and condition numbers) +* -------------------------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL DERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL DDRVVX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), A( 1, 3 ), + $ NMAX, A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, + $ D( 1, 5 ), D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), + $ D( 1, 9 ), D( 1, 10 ), D( 1, 11 ), D( 1, 12 ), + $ RESULT, WORK, LWORK, IWORK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DGEEVX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'DSX' ) ) THEN +* +* --------------------------------------------------- +* DSX: Nonsymmetric Eigenvalue Problem Expert Driver +* DGEESX (Schur form and condition numbers) +* --------------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL DERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL DDRVSX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), + $ D( 1, 5 ), D( 1, 6 ), A( 1, 4 ), NMAX, + $ A( 1, 5 ), RESULT, WORK, LWORK, IWORK, LOGWRK, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DGEESX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'DGG' ) ) THEN +* +* ------------------------------------------------- +* DGG: Generalized Nonsymmetric Eigenvalue Problem +* ------------------------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NS = number of shifts +* MAXB = minimum submatrix size +* IACC22: structured matrix multiply +* NBCOL = minimum column dimension for blocks +* + MAXTYP = 26 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV(1,1) + IF( TSTCHK .AND. TSTERR ) + $ CALL DERRGG( C3, NOUT ) + DO 350 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 4, NSVAL( I ) ) + CALL XLAENV( 8, MXBVAL( I ) ) + CALL XLAENV( 16, IACC22( I ) ) + CALL XLAENV( 5, NBCOL( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 340 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 340 CONTINUE + END IF + WRITE( NOUT, FMT = 9996 )C3, NBVAL( I ), NBMIN( I ), + $ NSVAL( I ), MXBVAL( I ), IACC22( I ), NBCOL( I ) + TSTDIF = .FALSE. + THRSHN = 10.D0 + IF( TSTCHK ) THEN + CALL DCHKGG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ TSTDIF, THRSHN, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), + $ A( 1, 6 ), A( 1, 7 ), A( 1, 8 ), A( 1, 9 ), + $ NMAX, A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), + $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), + $ D( 1, 5 ), D( 1, 6 ), A( 1, 13 ), + $ A( 1, 14 ), WORK, LWORK, LOGWRK, RESULT, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DCHKGG', INFO + END IF + 350 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'DGS' ) ) THEN +* +* ------------------------------------------------- +* DGS: Generalized Nonsymmetric Eigenvalue Problem +* DGGES (Schur form) +* ------------------------------------------------- +* + MAXTYP = 26 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL DERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL DDRGES( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), WORK, LWORK, + $ RESULT, LOGWRK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DDRGES', INFO +* +* Blocked version +* + CALL XLAENV(16, 2) + CALL DDRGES3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), WORK, LWORK, + $ RESULT, LOGWRK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DDRGES3', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( DGX ) THEN +* +* ------------------------------------------------- +* DGX: Generalized Nonsymmetric Eigenvalue Problem +* DGGESX (Schur form and condition numbers) +* ------------------------------------------------- +* + MAXTYP = 5 + NTYPES = MAXTYP + IF( NN.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL DERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 5, 2 ) + CALL DDRGSX( NN, NCMAX, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), + $ A( 1, 6 ), D( 1, 1 ), D( 1, 2 ), D( 1, 3 ), + $ C( 1, 1 ), NCMAX*NCMAX, A( 1, 12 ), WORK, + $ LWORK, IWORK, LIWORK, LOGWRK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DDRGSX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'DGV' ) ) THEN +* +* ------------------------------------------------- +* DGV: Generalized Nonsymmetric Eigenvalue Problem +* DGGEV (Eigenvalue/vector form) +* ------------------------------------------------- +* + MAXTYP = 26 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL DERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL DDRGEV( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ A( 1, 9 ), NMAX, D( 1, 1 ), D( 1, 2 ), + $ D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), D( 1, 6 ), + $ WORK, LWORK, RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DDRGEV', INFO +* +* Blocked version +* + CALL DDRGEV3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ A( 1, 9 ), NMAX, D( 1, 1 ), D( 1, 2 ), + $ D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), D( 1, 6 ), + $ WORK, LWORK, RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DDRGEV3', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( DXV ) THEN +* +* ------------------------------------------------- +* DXV: Generalized Nonsymmetric Eigenvalue Problem +* DGGEVX (eigenvalue/vector with condition numbers) +* ------------------------------------------------- +* + MAXTYP = 2 + NTYPES = MAXTYP + IF( NN.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL DERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL DDRGVX( NN, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), A( 1, 5 ), A( 1, 6 ), + $ IWORK( 1 ), IWORK( 2 ), D( 1, 4 ), D( 1, 5 ), + $ D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), D( 1, 9 ), + $ WORK, LWORK, IWORK( 3 ), LIWORK-2, RESULT, + $ LOGWRK, INFO ) +* + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DDRGVX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'DSB' ) ) THEN +* +* ------------------------------ +* DSB: Symmetric Band Reduction +* ------------------------------ +* + MAXTYP = 15 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + IF( TSTERR ) + $ CALL DERRST( 'DSB', NOUT ) +* CALL DCHKSB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, D( 1, 1 ), D( 1, 2 ), +* $ A( 1, 2 ), NMAX, WORK, LWORK, RESULT, INFO ) + CALL DCHKSB2STG( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, + $ THRESH, NOUT, A( 1, 1 ), NMAX, D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), + $ A( 1, 2 ), NMAX, WORK, LWORK, RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DCHKSB', INFO +* + ELSE IF( LSAMEN( 3, C3, 'DBB' ) ) THEN +* +* ------------------------------ +* DBB: General Band Reduction +* ------------------------------ +* + MAXTYP = 15 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + DO 370 I = 1, NPARMS + NRHS = NSVAL( I ) +* + IF( NEWSD.EQ.0 ) THEN + DO 360 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 360 CONTINUE + END IF + WRITE( NOUT, FMT = 9966 )C3, NRHS + CALL DCHKBB( NN, MVAL, NVAL, NK, KVAL, MAXTYP, DOTYPE, NRHS, + $ ISEED, THRESH, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), 2*NMAX, D( 1, 1 ), D( 1, 2 ), + $ A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, A( 1, 6 ), + $ NMAX, A( 1, 7 ), WORK, LWORK, RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DCHKBB', INFO + 370 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'GLM' ) ) THEN +* +* ----------------------------------------- +* GLM: Generalized Linear Regression Model +* ----------------------------------------- +* + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL DERRGG( 'GLM', NOUT ) + CALL DCKGLM( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, + $ WORK, D( 1, 1 ), NIN, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DCKGLM', INFO +* + ELSE IF( LSAMEN( 3, C3, 'GQR' ) ) THEN +* +* ------------------------------------------ +* GQR: Generalized QR and RQ factorizations +* ------------------------------------------ +* + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL DERRGG( 'GQR', NOUT ) + CALL DCKGQR( NN, MVAL, NN, PVAL, NN, NVAL, NTYPES, ISEED, + $ THRESH, NMAX, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), TAUA, B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), + $ B( 1, 4 ), B( 1, 5 ), TAUB, WORK, D( 1, 1 ), NIN, + $ NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DCKGQR', INFO +* + ELSE IF( LSAMEN( 3, C3, 'GSV' ) ) THEN +* +* ---------------------------------------------- +* GSV: Generalized Singular Value Decomposition +* ---------------------------------------------- +* + CALL XLAENV(1,1) + IF( TSTERR ) + $ CALL DERRGG( 'GSV', NOUT ) + CALL DCKGSV( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), + $ A( 1, 3 ), B( 1, 3 ), A( 1, 4 ), TAUA, TAUB, + $ B( 1, 4 ), IWORK, WORK, D( 1, 1 ), NIN, NOUT, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DCKGSV', INFO +* + ELSE IF( LSAMEN( 3, C3, 'CSD' ) ) THEN +* +* ---------------------------------------------- +* CSD: CS Decomposition +* ---------------------------------------------- +* + CALL XLAENV(1,1) + IF( TSTERR ) + $ CALL DERRGG( 'CSD', NOUT ) + CALL DCKCSD( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), + $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), IWORK, WORK, + $ D( 1, 1 ), NIN, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DCKCSD', INFO +* + ELSE IF( LSAMEN( 3, C3, 'LSE' ) ) THEN +* +* -------------------------------------- +* LSE: Constrained Linear Least Squares +* -------------------------------------- +* + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL DERRGG( 'LSE', NOUT ) + CALL DCKLSE( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, + $ WORK, D( 1, 1 ), NIN, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'DCKLSE', INFO +* + ELSE + WRITE( NOUT, FMT = * ) + WRITE( NOUT, FMT = * ) + WRITE( NOUT, FMT = 9992 )C3 + END IF + IF( .NOT.( DGX .OR. DXV ) ) + $ GO TO 190 + 380 CONTINUE + WRITE( NOUT, FMT = 9994 ) + S2 = DSECND( ) + WRITE( NOUT, FMT = 9993 )S2 - S1 +* + DEALLOCATE (A, STAT = AllocateStatus) + DEALLOCATE (B, STAT = AllocateStatus) + DEALLOCATE (C, STAT = AllocateStatus) + DEALLOCATE (WORK, STAT = AllocateStatus) +* + 9999 FORMAT( / ' Execution not attempted due to input errors' ) + 9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 ) + 9996 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NS =', I4, + $ ', MAXB =', I4, ', IACC22 =', I4, ', NBCOL =', I4 ) + 9995 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, + $ ', NRHS =', I4 ) + 9994 FORMAT( / / ' End of tests' ) + 9993 FORMAT( ' Total time used = ', F12.2, ' seconds', / ) + 9992 FORMAT( 1X, A3, ': Unrecognized path name' ) + 9991 FORMAT( / / ' *** Invalid integer value in column ', I2, + $ ' of input', ' line:', / A79 ) + 9990 FORMAT( / / 1X, A3, ' routines were not tested' ) + 9989 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be >=', + $ I6 ) + 9988 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be <=', + $ I6 ) + 9987 FORMAT( ' Tests of the Nonsymmetric Eigenvalue Problem routines' ) + 9986 FORMAT( ' Tests of the Symmetric Eigenvalue Problem routines' ) + 9985 FORMAT( ' Tests of the Singular Value Decomposition routines' ) + 9984 FORMAT( / ' The following parameter values will be used:' ) + 9983 FORMAT( 4X, A, 10I6, / 10X, 10I6 ) + 9982 FORMAT( / ' Routines pass computational tests if test ratio is ', + $ 'less than', F8.2, / ) + 9981 FORMAT( ' Relative machine ', A, ' is taken to be', D16.6 ) + 9980 FORMAT( ' *** Error code from ', A, ' = ', I4 ) + 9979 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', + $ / ' DGEEV (eigenvalues and eigevectors)' ) + 9978 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', + $ / ' DGEES (Schur form)' ) + 9977 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', + $ ' Driver', / ' DGEEVX (eigenvalues, eigenvectors and', + $ ' condition numbers)' ) + 9976 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', + $ ' Driver', / ' DGEESX (Schur form and condition', + $ ' numbers)' ) + 9975 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem routines' ) + 9974 FORMAT( ' Tests of DSBTRD', / ' (reduction of a symmetric band ', + $ 'matrix to tridiagonal form)' ) + 9973 FORMAT( / 1X, 71( '-' ) ) + 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I1, '.', I1 ) + 9971 FORMAT( / ' Tests of the Generalized Linear Regression Model ', + $ 'routines' ) + 9970 FORMAT( / ' Tests of the Generalized QR and RQ routines' ) + 9969 FORMAT( / ' Tests of the Generalized Singular Value', + $ ' Decomposition routines' ) + 9968 FORMAT( / ' Tests of the Linear Least Squares routines' ) + 9967 FORMAT( ' Tests of DGBBRD', / ' (reduction of a general band ', + $ 'matrix to real bidiagonal form)' ) + 9966 FORMAT( / / 1X, A3, ': NRHS =', I4 ) + 9965 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Expert Driver DGGESX' ) + 9964 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Driver DGGES' ) + 9963 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Driver DGGEV' ) + 9962 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Expert Driver DGGEVX' ) + 9961 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, + $ ', INMIN=', I4, + $ ', INWIN =', I4, ', INIBL =', I4, ', ISHFTS =', I4, + $ ', IACC22 =', I4) + 9960 FORMAT( / ' Tests of the CS Decomposition routines' ) +* +* End of DCHKEE +* + END From 9cf861e8faf21cbd623ef762127767d241a86088 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 28 Feb 2021 18:51:03 +0100 Subject: [PATCH 112/134] Add rewritten cchkee.F from Reference-LAPACK PR335 --- lapack-netlib/TESTING/EIG/cchkee.F | 2553 ++++++++++++++++++++++++++++ 1 file changed, 2553 insertions(+) create mode 100644 lapack-netlib/TESTING/EIG/cchkee.F diff --git a/lapack-netlib/TESTING/EIG/cchkee.F b/lapack-netlib/TESTING/EIG/cchkee.F new file mode 100644 index 000000000..0d3d7493c --- /dev/null +++ b/lapack-netlib/TESTING/EIG/cchkee.F @@ -0,0 +1,2553 @@ +*> \brief \b CCHKEE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM CCHKEE +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CCHKEE tests the COMPLEX LAPACK subroutines for the matrix +*> eigenvalue problem. The test paths in this version are +*> +*> NEP (Nonsymmetric Eigenvalue Problem): +*> Test CGEHRD, CUNGHR, CHSEQR, CTREVC, CHSEIN, and CUNMHR +*> +*> SEP (Hermitian Eigenvalue Problem): +*> Test CHETRD, CUNGTR, CSTEQR, CSTERF, CSTEIN, CSTEDC, +*> and drivers CHEEV(X), CHBEV(X), CHPEV(X), +*> CHEEVD, CHBEVD, CHPEVD +*> +*> SVD (Singular Value Decomposition): +*> Test CGEBRD, CUNGBR, and CBDSQR +*> and the drivers CGESVD, CGESDD +*> +*> CEV (Nonsymmetric Eigenvalue/eigenvector Driver): +*> Test CGEEV +*> +*> CES (Nonsymmetric Schur form Driver): +*> Test CGEES +*> +*> CVX (Nonsymmetric Eigenvalue/eigenvector Expert Driver): +*> Test CGEEVX +*> +*> CSX (Nonsymmetric Schur form Expert Driver): +*> Test CGEESX +*> +*> CGG (Generalized Nonsymmetric Eigenvalue Problem): +*> Test CGGHD3, CGGBAL, CGGBAK, CHGEQZ, and CTGEVC +*> +*> CGS (Generalized Nonsymmetric Schur form Driver): +*> Test CGGES +*> +*> CGV (Generalized Nonsymmetric Eigenvalue/eigenvector Driver): +*> Test CGGEV +*> +*> CGX (Generalized Nonsymmetric Schur form Expert Driver): +*> Test CGGESX +*> +*> CXV (Generalized Nonsymmetric Eigenvalue/eigenvector Expert Driver): +*> Test CGGEVX +*> +*> CSG (Hermitian Generalized Eigenvalue Problem): +*> Test CHEGST, CHEGV, CHEGVD, CHEGVX, CHPGST, CHPGV, CHPGVD, +*> CHPGVX, CHBGST, CHBGV, CHBGVD, and CHBGVX +*> +*> CHB (Hermitian Band Eigenvalue Problem): +*> Test CHBTRD +*> +*> CBB (Band Singular Value Decomposition): +*> Test CGBBRD +*> +*> CEC (Eigencondition estimation): +*> Test CTRSYL, CTREXC, CTRSNA, and CTRSEN +*> +*> CBL (Balancing a general matrix) +*> Test CGEBAL +*> +*> CBK (Back transformation on a balanced matrix) +*> Test CGEBAK +*> +*> CGL (Balancing a matrix pair) +*> Test CGGBAL +*> +*> CGK (Back transformation on a matrix pair) +*> Test CGGBAK +*> +*> GLM (Generalized Linear Regression Model): +*> Tests CGGGLM +*> +*> GQR (Generalized QR and RQ factorizations): +*> Tests CGGQRF and CGGRQF +*> +*> GSV (Generalized Singular Value Decomposition): +*> Tests CGGSVD, CGGSVP, CTGSJA, CLAGS2, CLAPLL, and CLAPMT +*> +*> CSD (CS decomposition): +*> Tests CUNCSD +*> +*> LSE (Constrained Linear Least Squares): +*> Tests CGGLSE +*> +*> Each test path has a different set of inputs, but the data sets for +*> the driver routines xEV, xES, xVX, and xSX can be concatenated in a +*> single input file. The first line of input should contain one of the +*> 3-character path names in columns 1-3. The number of remaining lines +*> depends on what is found on the first line. +*> +*> The number of matrix types used in testing is often controllable from +*> the input file. The number of matrix types for each path, and the +*> test routine that describes them, is as follows: +*> +*> Path name(s) Types Test routine +*> +*> CHS or NEP 21 CCHKHS +*> CST or SEP 21 CCHKST (routines) +*> 18 CDRVST (drivers) +*> CBD or SVD 16 CCHKBD (routines) +*> 5 CDRVBD (drivers) +*> CEV 21 CDRVEV +*> CES 21 CDRVES +*> CVX 21 CDRVVX +*> CSX 21 CDRVSX +*> CGG 26 CCHKGG (routines) +*> CGS 26 CDRGES +*> CGX 5 CDRGSX +*> CGV 26 CDRGEV +*> CXV 2 CDRGVX +*> CSG 21 CDRVSG +*> CHB 15 CCHKHB +*> CBB 15 CCHKBB +*> CEC - CCHKEC +*> CBL - CCHKBL +*> CBK - CCHKBK +*> CGL - CCHKGL +*> CGK - CCHKGK +*> GLM 8 CCKGLM +*> GQR 8 CCKGQR +*> GSV 8 CCKGSV +*> CSD 3 CCKCSD +*> LSE 8 CCKLSE +*> +*>----------------------------------------------------------------------- +*> +*> NEP input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NPARMS, INTEGER +*> Number of values of the parameters NB, NBMIN, NX, NS, and +*> MAXB. +*> +*> line 5: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 6: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for the minimum blocksize NBMIN. +*> +*> line 7: NXVAL, INTEGER array, dimension (NPARMS) +*> The values for the crossover point NX. +*> +*> line 8: INMIN, INTEGER array, dimension (NPARMS) +*> LAHQR vs TTQRE crossover point, >= 11 +*> +*> line 9: INWIN, INTEGER array, dimension (NPARMS) +*> recommended deflation window size +*> +*> line 10: INIBL, INTEGER array, dimension (NPARMS) +*> nibble crossover point +*> +*> line 11: ISHFTS, INTEGER array, dimension (NPARMS) +*> number of simultaneous shifts) +*> +*> line 12: IACC22, INTEGER array, dimension (NPARMS) +*> select structured matrix multiply: 0, 1 or 2) +*> +*> line 13: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. To have all of the test +*> ratios printed, use THRESH = 0.0 . +*> +*> line 14: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 14 was 2: +*> +*> line 15: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 15-EOF: The remaining lines occur in sets of 1 or 2 and allow +*> the user to specify the matrix types. Each line contains +*> a 3-character path name in columns 1-3, and the number +*> of matrix types must be the first nonblank item in columns +*> 4-80. If the number of matrix types is at least 1 but is +*> less than the maximum number of possible types, a second +*> line will be read to get the numbers of the matrix types to +*> be used. For example, +*> NEP 21 +*> requests all of the matrix types for the nonsymmetric +*> eigenvalue problem, while +*> NEP 4 +*> 9 10 11 12 +*> requests only matrices of type 9, 10, 11, and 12. +*> +*> The valid 3-character path names are 'NEP' or 'CHS' for the +*> nonsymmetric eigenvalue routines. +*> +*>----------------------------------------------------------------------- +*> +*> SEP or CSG input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NPARMS, INTEGER +*> Number of values of the parameters NB, NBMIN, and NX. +*> +*> line 5: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 6: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for the minimum blocksize NBMIN. +*> +*> line 7: NXVAL, INTEGER array, dimension (NPARMS) +*> The values for the crossover point NX. +*> +*> line 8: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 9: TSTCHK, LOGICAL +*> Flag indicating whether or not to test the LAPACK routines. +*> +*> line 10: TSTDRV, LOGICAL +*> Flag indicating whether or not to test the driver routines. +*> +*> line 11: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 12: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 12 was 2: +*> +*> line 13: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 13-EOF: Lines specifying matrix types, as for NEP. +*> The valid 3-character path names are 'SEP' or 'CST' for the +*> Hermitian eigenvalue routines and driver routines, and +*> 'CSG' for the routines for the Hermitian generalized +*> eigenvalue problem. +*> +*>----------------------------------------------------------------------- +*> +*> SVD input file: +*> +*> line 2: NN, INTEGER +*> Number of values of M and N. +*> +*> line 3: MVAL, INTEGER array, dimension (NN) +*> The values for the matrix row dimension M. +*> +*> line 4: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix column dimension N. +*> +*> line 5: NPARMS, INTEGER +*> Number of values of the parameter NB, NBMIN, NX, and NRHS. +*> +*> line 6: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 7: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for the minimum blocksize NBMIN. +*> +*> line 8: NXVAL, INTEGER array, dimension (NPARMS) +*> The values for the crossover point NX. +*> +*> line 9: NSVAL, INTEGER array, dimension (NPARMS) +*> The values for the number of right hand sides NRHS. +*> +*> line 10: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 11: TSTCHK, LOGICAL +*> Flag indicating whether or not to test the LAPACK routines. +*> +*> line 12: TSTDRV, LOGICAL +*> Flag indicating whether or not to test the driver routines. +*> +*> line 13: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 14: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 14 was 2: +*> +*> line 15: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 15-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path names are 'SVD' or 'CBD' for both the +*> SVD routines and the SVD driver routines. +*> +*>----------------------------------------------------------------------- +*> +*> CEV and CES data files: +*> +*> line 1: 'CEV' or 'CES' in columns 1 to 3. +*> +*> line 2: NSIZES, INTEGER +*> Number of sizes of matrices to use. Should be at least 0 +*> and at most 20. If NSIZES = 0, no testing is done +*> (although the remaining 3 lines are still read). +*> +*> line 3: NN, INTEGER array, dimension(NSIZES) +*> Dimensions of matrices to be tested. +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHSEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 5: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> If it is 0., all test case data will be printed. +*> +*> line 6: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 6 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 8 and following: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'CEV' to test CGEEV, or +*> 'CES' to test CGEES. +*> +*>----------------------------------------------------------------------- +*> +*> The CVX data has two parts. The first part is identical to CEV, +*> and the second part consists of test matrices with precomputed +*> solutions. +*> +*> line 1: 'CVX' in columns 1-3. +*> +*> line 2: NSIZES, INTEGER +*> If NSIZES = 0, no testing of randomly generated examples +*> is done, but any precomputed examples are tested. +*> +*> line 3: NN, INTEGER array, dimension(NSIZES) +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> +*> line 5: THRESH, REAL +*> +*> line 6: NEWSD, INTEGER +*> +*> If line 6 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> +*> lines 8 and following: The first line contains 'CVX' in columns 1-3 +*> followed by the number of matrix types, possibly with +*> a second line to specify certain matrix types. +*> If the number of matrix types = 0, no testing of randomly +*> generated examples is done, but any precomputed examples +*> are tested. +*> +*> remaining lines : Each matrix is stored on 1+N+N**2 lines, where N is +*> its dimension. The first line contains the dimension N and +*> ISRT (two integers). ISRT indicates whether the last N lines +*> are sorted by increasing real part of the eigenvalue +*> (ISRT=0) or by increasing imaginary part (ISRT=1). The next +*> N**2 lines contain the matrix rowwise, one entry per line. +*> The last N lines correspond to each eigenvalue. Each of +*> these last N lines contains 4 real values: the real part of +*> the eigenvalues, the imaginary part of the eigenvalue, the +*> reciprocal condition number of the eigenvalues, and the +*> reciprocal condition number of the vector eigenvector. The +*> end of data is indicated by dimension N=0. Even if no data +*> is to be tested, there must be at least one line containing +*> N=0. +*> +*>----------------------------------------------------------------------- +*> +*> The CSX data is like CVX. The first part is identical to CEV, and the +*> second part consists of test matrices with precomputed solutions. +*> +*> line 1: 'CSX' in columns 1-3. +*> +*> line 2: NSIZES, INTEGER +*> If NSIZES = 0, no testing of randomly generated examples +*> is done, but any precomputed examples are tested. +*> +*> line 3: NN, INTEGER array, dimension(NSIZES) +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> +*> line 5: THRESH, REAL +*> +*> line 6: NEWSD, INTEGER +*> +*> If line 6 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> +*> lines 8 and following: The first line contains 'CSX' in columns 1-3 +*> followed by the number of matrix types, possibly with +*> a second line to specify certain matrix types. +*> If the number of matrix types = 0, no testing of randomly +*> generated examples is done, but any precomputed examples +*> are tested. +*> +*> remaining lines : Each matrix is stored on 3+N**2 lines, where N is +*> its dimension. The first line contains the dimension N, the +*> dimension M of an invariant subspace, and ISRT. The second +*> line contains M integers, identifying the eigenvalues in the +*> invariant subspace (by their position in a list of +*> eigenvalues ordered by increasing real part (if ISRT=0) or +*> by increasing imaginary part (if ISRT=1)). The next N**2 +*> lines contain the matrix rowwise. The last line contains the +*> reciprocal condition number for the average of the selected +*> eigenvalues, and the reciprocal condition number for the +*> corresponding right invariant subspace. The end of data in +*> indicated by a line containing N=0, M=0, and ISRT = 0. Even +*> if no data is to be tested, there must be at least one line +*> containing N=0, M=0 and ISRT=0. +*> +*>----------------------------------------------------------------------- +*> +*> CGG input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NPARMS, INTEGER +*> Number of values of the parameters NB, NBMIN, NBCOL, NS, and +*> MAXB. +*> +*> line 5: NBVAL, INTEGER array, dimension (NPARMS) +*> The values for the blocksize NB. +*> +*> line 6: NBMIN, INTEGER array, dimension (NPARMS) +*> The values for NBMIN, the minimum row dimension for blocks. +*> +*> line 7: NSVAL, INTEGER array, dimension (NPARMS) +*> The values for the number of shifts. +*> +*> line 8: MXBVAL, INTEGER array, dimension (NPARMS) +*> The values for MAXB, used in determining minimum blocksize. +*> +*> line 9: IACC22, INTEGER array, dimension (NPARMS) +*> select structured matrix multiply: 1 or 2) +*> +*> line 10: NBCOL, INTEGER array, dimension (NPARMS) +*> The values for NBCOL, the minimum column dimension for +*> blocks. +*> +*> line 11: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 12: TSTCHK, LOGICAL +*> Flag indicating whether or not to test the LAPACK routines. +*> +*> line 13: TSTDRV, LOGICAL +*> Flag indicating whether or not to test the driver routines. +*> +*> line 14: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 15: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 15 was 2: +*> +*> line 16: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 17-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'CGG' for the generalized +*> eigenvalue problem routines and driver routines. +*> +*>----------------------------------------------------------------------- +*> +*> CGS and CGV input files: +*> +*> line 1: 'CGS' or 'CGV' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension(NN) +*> Dimensions of matrices to be tested. +*> +*> line 4: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHGEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 5: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> If it is 0., all test case data will be printed. +*> +*> line 6: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits. +*> +*> line 7: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 17 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 7-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'CGS' for the generalized +*> eigenvalue problem routines and driver routines. +*> +*>----------------------------------------------------------------------- +*> +*> CGX input file: +*> line 1: 'CGX' in columns 1 to 3. +*> +*> line 2: N, INTEGER +*> Value of N. +*> +*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHGEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 4: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> Information will be printed about each test for which the +*> test ratio is greater than or equal to the threshold. +*> +*> line 5: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 6: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 6 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> If line 2 was 0: +*> +*> line 7-EOF: Precomputed examples are tested. +*> +*> remaining lines : Each example is stored on 3+2*N*N lines, where N is +*> its dimension. The first line contains the dimension (a +*> single integer). The next line contains an integer k such +*> that only the last k eigenvalues will be selected and appear +*> in the leading diagonal blocks of $A$ and $B$. The next N*N +*> lines contain the matrix A, one element per line. The next N*N +*> lines contain the matrix B. The last line contains the +*> reciprocal of the eigenvalue cluster condition number and the +*> reciprocal of the deflating subspace (associated with the +*> selected eigencluster) condition number. The end of data is +*> indicated by dimension N=0. Even if no data is to be tested, +*> there must be at least one line containing N=0. +*> +*>----------------------------------------------------------------------- +*> +*> CXV input files: +*> line 1: 'CXV' in columns 1 to 3. +*> +*> line 2: N, INTEGER +*> Value of N. +*> +*> line 3: NB, NBMIN, NX, NS, NBCOL, INTEGERs +*> These integer parameters determine how blocking is done +*> (see ILAENV for details) +*> NB : block size +*> NBMIN : minimum block size +*> NX : minimum dimension for blocking +*> NS : number of shifts in xHGEQR +*> NBCOL : minimum column dimension for blocking +*> +*> line 4: THRESH, REAL +*> The test threshold against which computed residuals are +*> compared. Should generally be in the range from 10. to 20. +*> Information will be printed about each test for which the +*> test ratio is greater than or equal to the threshold. +*> +*> line 5: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 6: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 6 was 2: +*> +*> line 7: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> If line 2 was 0: +*> +*> line 7-EOF: Precomputed examples are tested. +*> +*> remaining lines : Each example is stored on 3+2*N*N lines, where N is +*> its dimension. The first line contains the dimension (a +*> single integer). The next N*N lines contain the matrix A, one +*> element per line. The next N*N lines contain the matrix B. +*> The next line contains the reciprocals of the eigenvalue +*> condition numbers. The last line contains the reciprocals of +*> the eigenvector condition numbers. The end of data is +*> indicated by dimension N=0. Even if no data is to be tested, +*> there must be at least one line containing N=0. +*> +*>----------------------------------------------------------------------- +*> +*> CHB input file: +*> +*> line 2: NN, INTEGER +*> Number of values of N. +*> +*> line 3: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix dimension N. +*> +*> line 4: NK, INTEGER +*> Number of values of K. +*> +*> line 5: KVAL, INTEGER array, dimension (NK) +*> The values for the matrix dimension K. +*> +*> line 6: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 7 was 2: +*> +*> line 8: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 8-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'CHB'. +*> +*>----------------------------------------------------------------------- +*> +*> CBB input file: +*> +*> line 2: NN, INTEGER +*> Number of values of M and N. +*> +*> line 3: MVAL, INTEGER array, dimension (NN) +*> The values for the matrix row dimension M. +*> +*> line 4: NVAL, INTEGER array, dimension (NN) +*> The values for the matrix column dimension N. +*> +*> line 4: NK, INTEGER +*> Number of values of K. +*> +*> line 5: KVAL, INTEGER array, dimension (NK) +*> The values for the matrix bandwidth K. +*> +*> line 6: NPARMS, INTEGER +*> Number of values of the parameter NRHS +*> +*> line 7: NSVAL, INTEGER array, dimension (NPARMS) +*> The values for the number of right hand sides NRHS. +*> +*> line 8: THRESH +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 9: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 9 was 2: +*> +*> line 10: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 10-EOF: Lines specifying matrix types, as for SVD. +*> The 3-character path name is 'CBB'. +*> +*>----------------------------------------------------------------------- +*> +*> CEC input file: +*> +*> line 2: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> lines 3-EOF: +*> +*> Input for testing the eigencondition routines consists of a set of +*> specially constructed test cases and their solutions. The data +*> format is not intended to be modified by the user. +*> +*>----------------------------------------------------------------------- +*> +*> CBL and CBK input files: +*> +*> line 1: 'CBL' in columns 1-3 to test CGEBAL, or 'CBK' in +*> columns 1-3 to test CGEBAK. +*> +*> The remaining lines consist of specially constructed test cases. +*> +*>----------------------------------------------------------------------- +*> +*> CGL and CGK input files: +*> +*> line 1: 'CGL' in columns 1-3 to test CGGBAL, or 'CGK' in +*> columns 1-3 to test CGGBAK. +*> +*> The remaining lines consist of specially constructed test cases. +*> +*>----------------------------------------------------------------------- +*> +*> GLM data file: +*> +*> line 1: 'GLM' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M (row dimension). +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P (row dimension). +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N (column dimension), note M <= N <= M+P. +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GLM' for the generalized +*> linear regression model routines. +*> +*>----------------------------------------------------------------------- +*> +*> GQR data file: +*> +*> line 1: 'GQR' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M. +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P. +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N. +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GQR' for the generalized +*> QR and RQ routines. +*> +*>----------------------------------------------------------------------- +*> +*> GSV data file: +*> +*> line 1: 'GSV' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M (row dimension). +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P (row dimension). +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N (column dimension). +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GSV' for the generalized +*> SVD routines. +*> +*>----------------------------------------------------------------------- +*> +*> CSD data file: +*> +*> line 1: 'CSD' in columns 1 to 3. +*> +*> line 2: NM, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NM) +*> Values of M (row and column dimension of orthogonal matrix). +*> +*> line 4: PVAL, INTEGER array, dimension(NM) +*> Values of P (row dimension of top-left block). +*> +*> line 5: NVAL, INTEGER array, dimension(NM) +*> Values of N (column dimension of top-left block). +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'CSD' for the CSD routine. +*> +*>----------------------------------------------------------------------- +*> +*> LSE data file: +*> +*> line 1: 'LSE' in columns 1 to 3. +*> +*> line 2: NN, INTEGER +*> Number of values of M, P, and N. +*> +*> line 3: MVAL, INTEGER array, dimension(NN) +*> Values of M. +*> +*> line 4: PVAL, INTEGER array, dimension(NN) +*> Values of P. +*> +*> line 5: NVAL, INTEGER array, dimension(NN) +*> Values of N, note P <= N <= P+M. +*> +*> line 6: THRESH, REAL +*> Threshold value for the test ratios. Information will be +*> printed about each test for which the test ratio is greater +*> than or equal to the threshold. +*> +*> line 7: TSTERR, LOGICAL +*> Flag indicating whether or not to test the error exits for +*> the LAPACK routines and driver routines. +*> +*> line 8: NEWSD, INTEGER +*> A code indicating how to set the random number seed. +*> = 0: Set the seed to a default value before each run +*> = 1: Initialize the seed to a default value only before the +*> first run +*> = 2: Like 1, but use the seed values on the next line +*> +*> If line 8 was 2: +*> +*> line 9: INTEGER array, dimension (4) +*> Four integer values for the random number seed. +*> +*> lines 9-EOF: Lines specifying matrix types, as for NEP. +*> The 3-character path name is 'GSV' for the generalized +*> SVD routines. +*> +*>----------------------------------------------------------------------- +*> +*> NMAX is currently set to 132 and must be at least 12 for some of the +*> precomputed examples, and LWORK = NMAX*(5*NMAX+20) in the parameter +*> statements below. For SVD, we assume NRHS may be as big as N. The +*> parameter NEED is set to 14 to allow for 14 N-by-N matrices for CGG. +*> \endverbatim +* +* Arguments: +* ========== +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex_eig +* +* ===================================================================== + PROGRAM CCHKEE +* +#if defined(_OPENMP) + use omp_lib +#endif +* +* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NMAX + PARAMETER ( NMAX = 132 ) + INTEGER NCMAX + PARAMETER ( NCMAX = 20 ) + INTEGER NEED + PARAMETER ( NEED = 14 ) + INTEGER LWORK + PARAMETER ( LWORK = NMAX*( 5*NMAX+20 ) ) + INTEGER LIWORK + PARAMETER ( LIWORK = NMAX*( NMAX+20 ) ) + INTEGER MAXIN + PARAMETER ( MAXIN = 20 ) + INTEGER MAXT + PARAMETER ( MAXT = 30 ) + INTEGER NIN, NOUT + PARAMETER ( NIN = 5, NOUT = 6 ) +* .. +* .. Local Scalars .. + LOGICAL CBB, CBK, CBL, CES, CEV, CGG, CGK, CGL, CGS, + $ CGV, CGX, CHB, CSD, CSX, CVX, CXV, FATAL, GLM, + $ GQR, GSV, LSE, NEP, SEP, SVD, TSTCHK, TSTDIF, + $ TSTDRV, TSTERR + CHARACTER C1 + CHARACTER*3 C3, PATH + CHARACTER*32 VNAME + CHARACTER*10 INTSTR + CHARACTER*80 LINE + INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD, + $ NK, NN, NPARMS, NRHS, NTYPES, + $ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS + REAL EPS, S1, S2, THRESH, THRSHN +* .. +* .. Local Arrays .. + LOGICAL DOTYPE( MAXT ), LOGWRK( NMAX ) + INTEGER IOLDSD( 4 ), ISEED( 4 ), IWORK( LIWORK ), + $ KVAL( MAXIN ), MVAL( MAXIN ), MXBVAL( MAXIN ), + $ NBCOL( MAXIN ), NBMIN( MAXIN ), NBVAL( MAXIN ), + $ NSVAL( MAXIN ), NVAL( MAXIN ), NXVAL( MAXIN ), + $ PVAL( MAXIN ) + INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ), + $ ISHFTS( MAXIN ), IACC22( MAXIN ) + REAL ALPHA( NMAX ), BETA( NMAX ), DR( NMAX, 12 ), + $ RESULT( 500 ) + COMPLEX DC( NMAX, 6 ), TAUA( NMAX ), TAUB( NMAX ), + $ X( 5*NMAX ) +* .. +* .. Allocatable Arrays .. + INTEGER AllocateStatus + REAL, DIMENSION(:), ALLOCATABLE :: RWORK, S + COMPLEX, DIMENSION(:), ALLOCATABLE :: WORK + COMPLEX, DIMENSION(:,:), ALLOCATABLE :: A, B, C +* .. +* .. External Functions .. + LOGICAL LSAMEN + REAL SECOND, SLAMCH + EXTERNAL LSAMEN, SECOND, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL ALAREQ, CCHKBB, CCHKBD, CCHKBK, CCHKBL, CCHKEC, + $ CCHKGG, CCHKGK, CCHKGL, CCHKHB, CCHKHS, CCHKST, + $ CCKCSD, CCKGLM, CCKGQR, CCKGSV, CCKLSE, CDRGES, + $ CDRGEV, CDRGSX, CDRGVX, CDRVBD, CDRVES, CDRVEV, + $ CDRVSG, CDRVST, CDRVSX, CDRVVX, CERRBD, + $ CERRED, CERRGG, CERRHS, CERRST, ILAVER, XLAENV, + $ CDRGES3, CDRGEV3, + $ CCHKST2STG, CDRVST2STG, CCHKHB2STG +* .. +* .. Intrinsic Functions .. + INTRINSIC LEN, MIN +* .. +* .. Scalars in Common .. + LOGICAL LERR, OK + CHARACTER*32 SRNAMT + INTEGER INFOT, MAXB, NPROC, NSHIFT, NUNIT, SELDIM, + $ SELOPT +* .. +* .. Arrays in Common .. + LOGICAL SELVAL( 20 ) + INTEGER IPARMS( 100 ) + REAL SELWI( 20 ), SELWR( 20 ) +* .. +* .. Common blocks .. + COMMON / CENVIR / NPROC, NSHIFT, MAXB + COMMON / CLAENV / IPARMS + COMMON / INFOC / INFOT, NUNIT, OK, LERR + COMMON / SRNAMC / SRNAMT + COMMON / SSLCT / SELOPT, SELDIM, SELVAL, SELWR, SELWI +* .. +* .. Data statements .. + DATA INTSTR / '0123456789' / + DATA IOLDSD / 0, 0, 0, 1 / +* .. +* .. Allocate memory dynamically .. +* + ALLOCATE ( S(NMAX*NMAX), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( RWORK(LWORK), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" + ALLOCATE ( WORK(LWORK), STAT = AllocateStatus ) + IF (AllocateStatus /= 0) STOP "*** Not enough memory ***" +* .. +* .. Executable Statements .. +* + A = 0.0 + B = 0.0 + C = 0.0 + DC = 0.0 + S1 = SECOND( ) + FATAL = .FALSE. + NUNIT = NOUT +* +* Return to here to read multiple sets of data +* + 10 CONTINUE +* +* Read the first line and set the 3-character test path +* + READ( NIN, FMT = '(A80)', END = 380 )LINE + PATH = LINE( 1: 3 ) + NEP = LSAMEN( 3, PATH, 'NEP' ) .OR. LSAMEN( 3, PATH, 'CHS' ) + SEP = LSAMEN( 3, PATH, 'SEP' ) .OR. LSAMEN( 3, PATH, 'CST' ) .OR. + $ LSAMEN( 3, PATH, 'CSG' ) .OR. LSAMEN( 3, PATH, 'SE2' ) + SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'CBD' ) + CEV = LSAMEN( 3, PATH, 'CEV' ) + CES = LSAMEN( 3, PATH, 'CES' ) + CVX = LSAMEN( 3, PATH, 'CVX' ) + CSX = LSAMEN( 3, PATH, 'CSX' ) + CGG = LSAMEN( 3, PATH, 'CGG' ) + CGS = LSAMEN( 3, PATH, 'CGS' ) + CGX = LSAMEN( 3, PATH, 'CGX' ) + CGV = LSAMEN( 3, PATH, 'CGV' ) + CXV = LSAMEN( 3, PATH, 'CXV' ) + CHB = LSAMEN( 3, PATH, 'CHB' ) + CBB = LSAMEN( 3, PATH, 'CBB' ) + GLM = LSAMEN( 3, PATH, 'GLM' ) + GQR = LSAMEN( 3, PATH, 'GQR' ) .OR. LSAMEN( 3, PATH, 'GRQ' ) + GSV = LSAMEN( 3, PATH, 'GSV' ) + CSD = LSAMEN( 3, PATH, 'CSD' ) + LSE = LSAMEN( 3, PATH, 'LSE' ) + CBL = LSAMEN( 3, PATH, 'CBL' ) + CBK = LSAMEN( 3, PATH, 'CBK' ) + CGL = LSAMEN( 3, PATH, 'CGL' ) + CGK = LSAMEN( 3, PATH, 'CGK' ) +* +* Report values of parameters. +* + IF( PATH.EQ.' ' ) THEN + GO TO 10 + ELSE IF( NEP ) THEN + WRITE( NOUT, FMT = 9987 ) + ELSE IF( SEP ) THEN + WRITE( NOUT, FMT = 9986 ) + ELSE IF( SVD ) THEN + WRITE( NOUT, FMT = 9985 ) + ELSE IF( CEV ) THEN + WRITE( NOUT, FMT = 9979 ) + ELSE IF( CES ) THEN + WRITE( NOUT, FMT = 9978 ) + ELSE IF( CVX ) THEN + WRITE( NOUT, FMT = 9977 ) + ELSE IF( CSX ) THEN + WRITE( NOUT, FMT = 9976 ) + ELSE IF( CGG ) THEN + WRITE( NOUT, FMT = 9975 ) + ELSE IF( CGS ) THEN + WRITE( NOUT, FMT = 9964 ) + ELSE IF( CGX ) THEN + WRITE( NOUT, FMT = 9965 ) + ELSE IF( CGV ) THEN + WRITE( NOUT, FMT = 9963 ) + ELSE IF( CXV ) THEN + WRITE( NOUT, FMT = 9962 ) + ELSE IF( CHB ) THEN + WRITE( NOUT, FMT = 9974 ) + ELSE IF( CBB ) THEN + WRITE( NOUT, FMT = 9967 ) + ELSE IF( GLM ) THEN + WRITE( NOUT, FMT = 9971 ) + ELSE IF( GQR ) THEN + WRITE( NOUT, FMT = 9970 ) + ELSE IF( GSV ) THEN + WRITE( NOUT, FMT = 9969 ) + ELSE IF( CSD ) THEN + WRITE( NOUT, FMT = 9960 ) + ELSE IF( LSE ) THEN + WRITE( NOUT, FMT = 9968 ) + ELSE IF( CBL ) THEN +* +* CGEBAL: Balancing +* + CALL CCHKBL( NIN, NOUT ) + GO TO 380 + ELSE IF( CBK ) THEN +* +* CGEBAK: Back transformation +* + CALL CCHKBK( NIN, NOUT ) + GO TO 380 + ELSE IF( CGL ) THEN +* +* CGGBAL: Balancing +* + CALL CCHKGL( NIN, NOUT ) + GO TO 380 + ELSE IF( CGK ) THEN +* +* CGGBAK: Back transformation +* + CALL CCHKGK( NIN, NOUT ) + GO TO 380 + ELSE IF( LSAMEN( 3, PATH, 'CEC' ) ) THEN +* +* CEC: Eigencondition estimation +* + READ( NIN, FMT = * )THRESH + CALL XLAENV( 1, 1 ) + CALL XLAENV( 12, 1 ) + TSTERR = .TRUE. + CALL CCHKEC( THRESH, TSTERR, NIN, NOUT ) + GO TO 380 + ELSE + WRITE( NOUT, FMT = 9992 )PATH + GO TO 380 + END IF + CALL ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) + WRITE( NOUT, FMT = 9972 ) VERS_MAJOR, VERS_MINOR, VERS_PATCH + WRITE( NOUT, FMT = 9984 ) +* +* Read the number of values of M, P, and N. +* + READ( NIN, FMT = * )NN + IF( NN.LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NN ', NN, 1 + NN = 0 + FATAL = .TRUE. + ELSE IF( NN.GT.MAXIN ) THEN + WRITE( NOUT, FMT = 9988 )' NN ', NN, MAXIN + NN = 0 + FATAL = .TRUE. + END IF +* +* Read the values of M +* + IF( .NOT.( CGX .OR. CXV ) ) THEN + READ( NIN, FMT = * )( MVAL( I ), I = 1, NN ) + IF( SVD ) THEN + VNAME = ' M ' + ELSE + VNAME = ' N ' + END IF + DO 20 I = 1, NN + IF( MVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )VNAME, MVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( MVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )VNAME, MVAL( I ), NMAX + FATAL = .TRUE. + END IF + 20 CONTINUE + WRITE( NOUT, FMT = 9983 )'M: ', ( MVAL( I ), I = 1, NN ) + END IF +* +* Read the values of P +* + IF( GLM .OR. GQR .OR. GSV .OR. CSD .OR. LSE ) THEN + READ( NIN, FMT = * )( PVAL( I ), I = 1, NN ) + DO 30 I = 1, NN + IF( PVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' P ', PVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( PVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' P ', PVAL( I ), NMAX + FATAL = .TRUE. + END IF + 30 CONTINUE + WRITE( NOUT, FMT = 9983 )'P: ', ( PVAL( I ), I = 1, NN ) + END IF +* +* Read the values of N +* + IF( SVD .OR. CBB .OR. GLM .OR. GQR .OR. GSV .OR. CSD .OR. + $ LSE ) THEN + READ( NIN, FMT = * )( NVAL( I ), I = 1, NN ) + DO 40 I = 1, NN + IF( NVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' N ', NVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' N ', NVAL( I ), NMAX + FATAL = .TRUE. + END IF + 40 CONTINUE + ELSE + DO 50 I = 1, NN + NVAL( I ) = MVAL( I ) + 50 CONTINUE + END IF + IF( .NOT.( CGX .OR. CXV ) ) THEN + WRITE( NOUT, FMT = 9983 )'N: ', ( NVAL( I ), I = 1, NN ) + ELSE + WRITE( NOUT, FMT = 9983 )'N: ', NN + END IF +* +* Read the number of values of K, followed by the values of K +* + IF( CHB .OR. CBB ) THEN + READ( NIN, FMT = * )NK + READ( NIN, FMT = * )( KVAL( I ), I = 1, NK ) + DO 60 I = 1, NK + IF( KVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' K ', KVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( KVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' K ', KVAL( I ), NMAX + FATAL = .TRUE. + END IF + 60 CONTINUE + WRITE( NOUT, FMT = 9983 )'K: ', ( KVAL( I ), I = 1, NK ) + END IF +* + IF( CEV .OR. CES .OR. CVX .OR. CSX ) THEN +* +* For the nonsymmetric QR driver routines, only one set of +* parameters is allowed. +* + READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), + $ INMIN( 1 ), INWIN( 1 ), INIBL(1), ISHFTS(1), IACC22(1) + IF( NBVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NBMIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NXVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( INMIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( INWIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( INIBL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( ISHFTS( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( 1 ), 1 + FATAL = .TRUE. + ELSE IF( IACC22( 1 ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( 1 ), 0 + FATAL = .TRUE. + END IF + CALL XLAENV( 1, NBVAL( 1 ) ) + CALL XLAENV( 2, NBMIN( 1 ) ) + CALL XLAENV( 3, NXVAL( 1 ) ) + CALL XLAENV(12, MAX( 11, INMIN( 1 ) ) ) + CALL XLAENV(13, INWIN( 1 ) ) + CALL XLAENV(14, INIBL( 1 ) ) + CALL XLAENV(15, ISHFTS( 1 ) ) + CALL XLAENV(16, IACC22( 1 ) ) + WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) + WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'INMIN: ', INMIN( 1 ) + WRITE( NOUT, FMT = 9983 )'INWIN: ', INWIN( 1 ) + WRITE( NOUT, FMT = 9983 )'INIBL: ', INIBL( 1 ) + WRITE( NOUT, FMT = 9983 )'ISHFTS: ', ISHFTS( 1 ) + WRITE( NOUT, FMT = 9983 )'IACC22: ', IACC22( 1 ) +* + ELSE IF( CGS .OR. CGX .OR. CGV .OR. CXV ) THEN +* +* For the nonsymmetric generalized driver routines, only one set of +* parameters is allowed. +* + READ( NIN, FMT = * )NBVAL( 1 ), NBMIN( 1 ), NXVAL( 1 ), + $ NSVAL( 1 ), MXBVAL( 1 ) + IF( NBVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NBMIN( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NXVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( 1 ), 1 + FATAL = .TRUE. + ELSE IF( NSVAL( 1 ).LT.2 ) THEN + WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( 1 ), 2 + FATAL = .TRUE. + ELSE IF( MXBVAL( 1 ).LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( 1 ), 1 + FATAL = .TRUE. + END IF + CALL XLAENV( 1, NBVAL( 1 ) ) + CALL XLAENV( 2, NBMIN( 1 ) ) + CALL XLAENV( 3, NXVAL( 1 ) ) + CALL XLAENV( 4, NSVAL( 1 ) ) + CALL XLAENV( 8, MXBVAL( 1 ) ) + WRITE( NOUT, FMT = 9983 )'NB: ', NBVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'NBMIN:', NBMIN( 1 ) + WRITE( NOUT, FMT = 9983 )'NX: ', NXVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'NS: ', NSVAL( 1 ) + WRITE( NOUT, FMT = 9983 )'MAXB: ', MXBVAL( 1 ) + ELSE IF( .NOT.CHB .AND. .NOT.GLM .AND. .NOT.GQR .AND. .NOT. + $ GSV .AND. .NOT.CSD .AND. .NOT.LSE ) THEN +* +* For the other paths, the number of parameters can be varied +* from the input file. Read the number of parameter values. +* + READ( NIN, FMT = * )NPARMS + IF( NPARMS.LT.1 ) THEN + WRITE( NOUT, FMT = 9989 )'NPARMS', NPARMS, 1 + NPARMS = 0 + FATAL = .TRUE. + ELSE IF( NPARMS.GT.MAXIN ) THEN + WRITE( NOUT, FMT = 9988 )'NPARMS', NPARMS, MAXIN + NPARMS = 0 + FATAL = .TRUE. + END IF +* +* Read the values of NB +* + IF( .NOT.CBB ) THEN + READ( NIN, FMT = * )( NBVAL( I ), I = 1, NPARMS ) + DO 70 I = 1, NPARMS + IF( NBVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NB ', NBVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NBVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' NB ', NBVAL( I ), NMAX + FATAL = .TRUE. + END IF + 70 CONTINUE + WRITE( NOUT, FMT = 9983 )'NB: ', + $ ( NBVAL( I ), I = 1, NPARMS ) + END IF +* +* Read the values of NBMIN +* + IF( NEP .OR. SEP .OR. SVD .OR. CGG ) THEN + READ( NIN, FMT = * )( NBMIN( I ), I = 1, NPARMS ) + DO 80 I = 1, NPARMS + IF( NBMIN( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )'NBMIN ', NBMIN( I ), 0 + FATAL = .TRUE. + ELSE IF( NBMIN( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )'NBMIN ', NBMIN( I ), NMAX + FATAL = .TRUE. + END IF + 80 CONTINUE + WRITE( NOUT, FMT = 9983 )'NBMIN:', + $ ( NBMIN( I ), I = 1, NPARMS ) + ELSE + DO 90 I = 1, NPARMS + NBMIN( I ) = 1 + 90 CONTINUE + END IF +* +* Read the values of NX +* + IF( NEP .OR. SEP .OR. SVD ) THEN + READ( NIN, FMT = * )( NXVAL( I ), I = 1, NPARMS ) + DO 100 I = 1, NPARMS + IF( NXVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NX ', NXVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NXVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' NX ', NXVAL( I ), NMAX + FATAL = .TRUE. + END IF + 100 CONTINUE + WRITE( NOUT, FMT = 9983 )'NX: ', + $ ( NXVAL( I ), I = 1, NPARMS ) + ELSE + DO 110 I = 1, NPARMS + NXVAL( I ) = 1 + 110 CONTINUE + END IF +* +* Read the values of NSHIFT (if CGG) or NRHS (if SVD +* or CBB). +* + IF( SVD .OR. CBB .OR. CGG ) THEN + READ( NIN, FMT = * )( NSVAL( I ), I = 1, NPARMS ) + DO 120 I = 1, NPARMS + IF( NSVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' NS ', NSVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( NSVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' NS ', NSVAL( I ), NMAX + FATAL = .TRUE. + END IF + 120 CONTINUE + WRITE( NOUT, FMT = 9983 )'NS: ', + $ ( NSVAL( I ), I = 1, NPARMS ) + ELSE + DO 130 I = 1, NPARMS + NSVAL( I ) = 1 + 130 CONTINUE + END IF +* +* Read the values for MAXB. +* + IF( CGG ) THEN + READ( NIN, FMT = * )( MXBVAL( I ), I = 1, NPARMS ) + DO 140 I = 1, NPARMS + IF( MXBVAL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' MAXB ', MXBVAL( I ), 0 + FATAL = .TRUE. + ELSE IF( MXBVAL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )' MAXB ', MXBVAL( I ), NMAX + FATAL = .TRUE. + END IF + 140 CONTINUE + WRITE( NOUT, FMT = 9983 )'MAXB: ', + $ ( MXBVAL( I ), I = 1, NPARMS ) + ELSE + DO 150 I = 1, NPARMS + MXBVAL( I ) = 1 + 150 CONTINUE + END IF +* +* Read the values for INMIN. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( INMIN( I ), I = 1, NPARMS ) + DO 540 I = 1, NPARMS + IF( INMIN( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' INMIN ', INMIN( I ), 0 + FATAL = .TRUE. + END IF + 540 CONTINUE + WRITE( NOUT, FMT = 9983 )'INMIN: ', + $ ( INMIN( I ), I = 1, NPARMS ) + ELSE + DO 550 I = 1, NPARMS + INMIN( I ) = 1 + 550 CONTINUE + END IF +* +* Read the values for INWIN. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( INWIN( I ), I = 1, NPARMS ) + DO 560 I = 1, NPARMS + IF( INWIN( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' INWIN ', INWIN( I ), 0 + FATAL = .TRUE. + END IF + 560 CONTINUE + WRITE( NOUT, FMT = 9983 )'INWIN: ', + $ ( INWIN( I ), I = 1, NPARMS ) + ELSE + DO 570 I = 1, NPARMS + INWIN( I ) = 1 + 570 CONTINUE + END IF +* +* Read the values for INIBL. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( INIBL( I ), I = 1, NPARMS ) + DO 580 I = 1, NPARMS + IF( INIBL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' INIBL ', INIBL( I ), 0 + FATAL = .TRUE. + END IF + 580 CONTINUE + WRITE( NOUT, FMT = 9983 )'INIBL: ', + $ ( INIBL( I ), I = 1, NPARMS ) + ELSE + DO 590 I = 1, NPARMS + INIBL( I ) = 1 + 590 CONTINUE + END IF +* +* Read the values for ISHFTS. +* + IF( NEP ) THEN + READ( NIN, FMT = * )( ISHFTS( I ), I = 1, NPARMS ) + DO 600 I = 1, NPARMS + IF( ISHFTS( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' ISHFTS ', ISHFTS( I ), 0 + FATAL = .TRUE. + END IF + 600 CONTINUE + WRITE( NOUT, FMT = 9983 )'ISHFTS: ', + $ ( ISHFTS( I ), I = 1, NPARMS ) + ELSE + DO 610 I = 1, NPARMS + ISHFTS( I ) = 1 + 610 CONTINUE + END IF +* +* Read the values for IACC22. +* + IF( NEP .OR. CGG ) THEN + READ( NIN, FMT = * )( IACC22( I ), I = 1, NPARMS ) + DO 620 I = 1, NPARMS + IF( IACC22( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )' IACC22 ', IACC22( I ), 0 + FATAL = .TRUE. + END IF + 620 CONTINUE + WRITE( NOUT, FMT = 9983 )'IACC22: ', + $ ( IACC22( I ), I = 1, NPARMS ) + ELSE + DO 630 I = 1, NPARMS + IACC22( I ) = 1 + 630 CONTINUE + END IF +* +* Read the values for NBCOL. +* + IF( CGG ) THEN + READ( NIN, FMT = * )( NBCOL( I ), I = 1, NPARMS ) + DO 160 I = 1, NPARMS + IF( NBCOL( I ).LT.0 ) THEN + WRITE( NOUT, FMT = 9989 )'NBCOL ', NBCOL( I ), 0 + FATAL = .TRUE. + ELSE IF( NBCOL( I ).GT.NMAX ) THEN + WRITE( NOUT, FMT = 9988 )'NBCOL ', NBCOL( I ), NMAX + FATAL = .TRUE. + END IF + 160 CONTINUE + WRITE( NOUT, FMT = 9983 )'NBCOL:', + $ ( NBCOL( I ), I = 1, NPARMS ) + ELSE + DO 170 I = 1, NPARMS + NBCOL( I ) = 1 + 170 CONTINUE + END IF + END IF +* +* Calculate and print the machine dependent constants. +* + WRITE( NOUT, FMT = * ) + EPS = SLAMCH( 'Underflow threshold' ) + WRITE( NOUT, FMT = 9981 )'underflow', EPS + EPS = SLAMCH( 'Overflow threshold' ) + WRITE( NOUT, FMT = 9981 )'overflow ', EPS + EPS = SLAMCH( 'Epsilon' ) + WRITE( NOUT, FMT = 9981 )'precision', EPS +* +* Read the threshold value for the test ratios. +* + READ( NIN, FMT = * )THRESH + WRITE( NOUT, FMT = 9982 )THRESH + IF( SEP .OR. SVD .OR. CGG ) THEN +* +* Read the flag that indicates whether to test LAPACK routines. +* + READ( NIN, FMT = * )TSTCHK +* +* Read the flag that indicates whether to test driver routines. +* + READ( NIN, FMT = * )TSTDRV + END IF +* +* Read the flag that indicates whether to test the error exits. +* + READ( NIN, FMT = * )TSTERR +* +* Read the code describing how to set the random number seed. +* + READ( NIN, FMT = * )NEWSD +* +* If NEWSD = 2, read another line with 4 integers for the seed. +* + IF( NEWSD.EQ.2 ) + $ READ( NIN, FMT = * )( IOLDSD( I ), I = 1, 4 ) +* + DO 180 I = 1, 4 + ISEED( I ) = IOLDSD( I ) + 180 CONTINUE +* + IF( FATAL ) THEN + WRITE( NOUT, FMT = 9999 ) + STOP + END IF +* +* Read the input lines indicating the test path and its parameters. +* The first three characters indicate the test path, and the number +* of test matrix types must be the first nonblank item in columns +* 4-80. +* + 190 CONTINUE +* + IF( .NOT.( CGX .OR. CXV ) ) THEN +* + 200 CONTINUE + READ( NIN, FMT = '(A80)', END = 380 )LINE + C3 = LINE( 1: 3 ) + LENP = LEN( LINE ) + I = 3 + ITMP = 0 + I1 = 0 + 210 CONTINUE + I = I + 1 + IF( I.GT.LENP ) THEN + IF( I1.GT.0 ) THEN + GO TO 240 + ELSE + NTYPES = MAXT + GO TO 240 + END IF + END IF + IF( LINE( I: I ).NE.' ' .AND. LINE( I: I ).NE.',' ) THEN + I1 = I + C1 = LINE( I1: I1 ) +* +* Check that a valid integer was read +* + DO 220 K = 1, 10 + IF( C1.EQ.INTSTR( K: K ) ) THEN + IC = K - 1 + GO TO 230 + END IF + 220 CONTINUE + WRITE( NOUT, FMT = 9991 )I, LINE + GO TO 200 + 230 CONTINUE + ITMP = 10*ITMP + IC + GO TO 210 + ELSE IF( I1.GT.0 ) THEN + GO TO 240 + ELSE + GO TO 210 + END IF + 240 CONTINUE + NTYPES = ITMP +* +* Skip the tests if NTYPES is <= 0. +* + IF( .NOT.( CEV .OR. CES .OR. CVX .OR. CSX .OR. CGV .OR. + $ CGS ) .AND. NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + GO TO 200 + END IF +* + ELSE + IF( CGX ) + $ C3 = 'CGX' + IF( CXV ) + $ C3 = 'CXV' + END IF +* +* Reset the random number seed. +* + IF( NEWSD.EQ.0 ) THEN + DO 250 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 250 CONTINUE + END IF +* + IF( LSAMEN( 3, C3, 'CHS' ) .OR. LSAMEN( 3, C3, 'NEP' ) ) THEN +* +* ------------------------------------- +* NEP: Nonsymmetric Eigenvalue Problem +* ------------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* NS = number of shifts +* MAXB = minimum submatrix size +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL CERRHS( 'CHSEQR', NOUT ) + DO 270 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) + CALL XLAENV(12, MAX( 11, INMIN( I ) ) ) + CALL XLAENV(13, INWIN( I ) ) + CALL XLAENV(14, INIBL( I ) ) + CALL XLAENV(15, ISHFTS( I ) ) + CALL XLAENV(16, IACC22( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 260 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 260 CONTINUE + END IF + WRITE( NOUT, FMT = 9961 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ), MAX( 11, INMIN(I)), + $ INWIN( I ), INIBL( I ), ISHFTS( I ), IACC22( I ) + CALL CCHKHS( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 5 ), NMAX, A( 1, 6 ), + $ A( 1, 7 ), DC( 1, 1 ), DC( 1, 2 ), A( 1, 8 ), + $ A( 1, 9 ), A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), + $ DC( 1, 3 ), WORK, LWORK, RWORK, IWORK, LOGWRK, + $ RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CCHKHS', INFO + 270 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'CST' ) .OR. LSAMEN( 3, C3, 'SEP' ) + $ .OR. LSAMEN( 3, C3, 'SE2' ) ) THEN +* +* ---------------------------------- +* SEP: Symmetric Eigenvalue Problem +* ---------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 1, 1 ) + CALL XLAENV( 9, 25 ) + IF( TSTERR ) THEN +#if defined(_OPENMP) + N_THREADS = OMP_GET_NUM_THREADS() + CALL OMP_SET_NUM_THREADS(1) +#endif + CALL CERRST( 'CST', NOUT ) +#if defined(_OPENMP) + CALL OMP_SET_NUM_THREADS(N_THREADS) +#endif + END IF + DO 290 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 280 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 280 CONTINUE + END IF + WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ) + IF( TSTCHK ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL CCHKST2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), + $ DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), + $ DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), + $ DR( 1, 7 ), DR( 1, 8 ), DR( 1, 9 ), + $ DR( 1, 10 ), DR( 1, 11 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), DC( 1, 1 ), A( 1, 6 ), + $ WORK, LWORK, RWORK, LWORK, IWORK, LIWORK, + $ RESULT, INFO ) + ELSE + CALL CCHKST( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), + $ DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), + $ DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), + $ DR( 1, 7 ), DR( 1, 8 ), DR( 1, 9 ), + $ DR( 1, 10 ), DR( 1, 11 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), DC( 1, 1 ), A( 1, 6 ), + $ WORK, LWORK, RWORK, LWORK, IWORK, LIWORK, + $ RESULT, INFO ) + ENDIF + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CCHKST', INFO + END IF + IF( TSTDRV ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL CDRVST2STG( NN, NVAL, 18, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, DR( 1, 3 ), DR( 1, 4 ), + $ DR( 1, 5 ), DR( 1, 8 ), DR( 1, 9 ), + $ DR( 1, 10 ), A( 1, 2 ), NMAX, A( 1, 3 ), + $ DC( 1, 1 ), A( 1, 4 ), WORK, LWORK, RWORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ELSE + CALL CDRVST( NN, NVAL, 18, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, DR( 1, 3 ), DR( 1, 4 ), + $ DR( 1, 5 ), DR( 1, 8 ), DR( 1, 9 ), + $ DR( 1, 10 ), A( 1, 2 ), NMAX, A( 1, 3 ), + $ DC( 1, 1 ), A( 1, 4 ), WORK, LWORK, RWORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ENDIF + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CDRVST', INFO + END IF + 290 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'CSG' ) ) THEN +* +* ---------------------------------------------- +* CSG: Hermitian Generalized Eigenvalue Problem +* ---------------------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 9, 25 ) + DO 310 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 300 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 300 CONTINUE + END IF + WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ) + IF( TSTCHK ) THEN +* CALL CDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, +* $ DR( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), +* $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, +* $ LWORK, RWORK, LWORK, IWORK, LIWORK, RESULT, +* $ INFO ) + CALL CDRVSG2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, + $ DR( 1, 3 ), DR( 1, 4 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), A( 1, 6 ), + $ A( 1, 7 ), WORK, LWORK, RWORK, LWORK, + $ IWORK, LIWORK, RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CDRVSG', INFO + END IF + 310 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'CBD' ) .OR. LSAMEN( 3, C3, 'SVD' ) ) THEN +* +* ---------------------------------- +* SVD: Singular Value Decomposition +* ---------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NX = crossover point +* NRHS = number of right hand sides +* + MAXTYP = 16 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 9, 25 ) +* +* Test the error exits +* + CALL XLAENV( 1, 1 ) + IF( TSTERR .AND. TSTCHK ) + $ CALL CERRBD( 'CBD', NOUT ) + IF( TSTERR .AND. TSTDRV ) + $ CALL CERRED( 'CBD', NOUT ) +* + DO 330 I = 1, NPARMS + NRHS = NSVAL( I ) + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 3, NXVAL( I ) ) + IF( NEWSD.EQ.0 ) THEN + DO 320 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 320 CONTINUE + END IF + WRITE( NOUT, FMT = 9995 )C3, NBVAL( I ), NBMIN( I ), + $ NXVAL( I ), NRHS + IF( TSTCHK ) THEN + CALL CCHKBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, NRHS, ISEED, + $ THRESH, A( 1, 1 ), NMAX, DR( 1, 1 ), + $ DR( 1, 2 ), DR( 1, 3 ), DR( 1, 4 ), + $ A( 1, 2 ), NMAX, A( 1, 3 ), A( 1, 4 ), + $ A( 1, 5 ), NMAX, A( 1, 6 ), NMAX, A( 1, 7 ), + $ A( 1, 8 ), WORK, LWORK, RWORK, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CCHKBD', INFO + END IF + IF( TSTDRV ) + $ CALL CDRVBD( NN, MVAL, NVAL, MAXTYP, DOTYPE, ISEED, + $ THRESH, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, + $ A( 1, 3 ), NMAX, A( 1, 4 ), A( 1, 5 ), + $ A( 1, 6 ), DR( 1, 1 ), DR( 1, 2 ), + $ DR( 1, 3 ), WORK, LWORK, RWORK, IWORK, NOUT, + $ INFO ) + 330 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'CEV' ) ) THEN +* +* -------------------------------------------- +* CEV: Nonsymmetric Eigenvalue Problem Driver +* CGEEV (eigenvalues and eigenvectors) +* -------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL CERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL CDRVEV( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), DC( 1, 1 ), + $ DC( 1, 2 ), A( 1, 3 ), NMAX, A( 1, 4 ), NMAX, + $ A( 1, 5 ), NMAX, RESULT, WORK, LWORK, RWORK, + $ IWORK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CGEEV', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'CES' ) ) THEN +* +* -------------------------------------------- +* CES: Nonsymmetric Eigenvalue Problem Driver +* CGEES (Schur form) +* -------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL CERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL CDRVES( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ DC( 1, 1 ), DC( 1, 2 ), A( 1, 4 ), NMAX, + $ RESULT, WORK, LWORK, RWORK, IWORK, LOGWRK, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CGEES', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'CVX' ) ) THEN +* +* -------------------------------------------------------------- +* CVX: Nonsymmetric Eigenvalue Problem Expert Driver +* CGEEVX (eigenvalues, eigenvectors and condition numbers) +* -------------------------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL CERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL CDRVVX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), DC( 1, 1 ), + $ DC( 1, 2 ), A( 1, 3 ), NMAX, A( 1, 4 ), NMAX, + $ A( 1, 5 ), NMAX, DR( 1, 1 ), DR( 1, 2 ), + $ DR( 1, 3 ), DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), + $ DR( 1, 7 ), DR( 1, 8 ), RESULT, WORK, LWORK, + $ RWORK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CGEEVX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'CSX' ) ) THEN +* +* --------------------------------------------------- +* CSX: Nonsymmetric Eigenvalue Problem Expert Driver +* CGEESX (Schur form and condition numbers) +* --------------------------------------------------- +* + MAXTYP = 21 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL CERRED( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL CDRVSX( NN, NVAL, NTYPES, DOTYPE, ISEED, THRESH, NIN, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ DC( 1, 1 ), DC( 1, 2 ), DC( 1, 3 ), A( 1, 4 ), + $ NMAX, A( 1, 5 ), RESULT, WORK, LWORK, RWORK, + $ LOGWRK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CGEESX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'CGG' ) ) THEN +* +* ------------------------------------------------- +* CGG: Generalized Nonsymmetric Eigenvalue Problem +* ------------------------------------------------- +* Vary the parameters +* NB = block size +* NBMIN = minimum block size +* NS = number of shifts +* MAXB = minimum submatrix size +* IACC22: structured matrix multiply +* NBCOL = minimum column dimension for blocks +* + MAXTYP = 26 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV(1,1) + IF( TSTCHK .AND. TSTERR ) + $ CALL CERRGG( C3, NOUT ) + DO 350 I = 1, NPARMS + CALL XLAENV( 1, NBVAL( I ) ) + CALL XLAENV( 2, NBMIN( I ) ) + CALL XLAENV( 4, NSVAL( I ) ) + CALL XLAENV( 8, MXBVAL( I ) ) + CALL XLAENV( 16, IACC22( I ) ) + CALL XLAENV( 5, NBCOL( I ) ) +* + IF( NEWSD.EQ.0 ) THEN + DO 340 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 340 CONTINUE + END IF + WRITE( NOUT, FMT = 9996 )C3, NBVAL( I ), NBMIN( I ), + $ NSVAL( I ), MXBVAL( I ), IACC22( I ), NBCOL( I ) + TSTDIF = .FALSE. + THRSHN = 10. + IF( TSTCHK ) THEN + CALL CCHKGG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ TSTDIF, THRSHN, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), + $ A( 1, 6 ), A( 1, 7 ), A( 1, 8 ), A( 1, 9 ), + $ NMAX, A( 1, 10 ), A( 1, 11 ), A( 1, 12 ), + $ DC( 1, 1 ), DC( 1, 2 ), DC( 1, 3 ), + $ DC( 1, 4 ), A( 1, 13 ), A( 1, 14 ), WORK, + $ LWORK, RWORK, LOGWRK, RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CCHKGG', INFO + END IF + 350 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'CGS' ) ) THEN +* +* ------------------------------------------------- +* CGS: Generalized Nonsymmetric Eigenvalue Problem +* CGGES (Schur form) +* ------------------------------------------------- +* + MAXTYP = 26 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL CERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL CDRGES( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ DC( 1, 1 ), DC( 1, 2 ), WORK, LWORK, RWORK, + $ RESULT, LOGWRK, INFO ) +* + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CDRGES', INFO +* +* Blocked version +* + CALL XLAENV(16,2) + CALL CDRGES3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ DC( 1, 1 ), DC( 1, 2 ), WORK, LWORK, RWORK, + $ RESULT, LOGWRK, INFO ) +* + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CDRGES3', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + + GO TO 10 +* + ELSE IF( CGX ) THEN +* +* ------------------------------------------------- +* CGX Generalized Nonsymmetric Eigenvalue Problem +* CGGESX (Schur form and condition numbers) +* ------------------------------------------------- +* + MAXTYP = 5 + NTYPES = MAXTYP + IF( NN.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL CERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL XLAENV( 5, 2 ) + CALL CDRGSX( NN, NCMAX, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), A( 1, 5 ), + $ A( 1, 6 ), DC( 1, 1 ), DC( 1, 2 ), C, + $ NCMAX*NCMAX, S, WORK, LWORK, RWORK, IWORK, + $ LIWORK, LOGWRK, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CDRGSX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'CGV' ) ) THEN +* +* ------------------------------------------------- +* CGV: Generalized Nonsymmetric Eigenvalue Problem +* CGGEV (Eigenvalue/vector form) +* ------------------------------------------------- +* + MAXTYP = 26 + NTYPES = MIN( MAXTYP, NTYPES ) + IF( NTYPES.LE.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL CERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL CDRGEV( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ A( 1, 9 ), NMAX, DC( 1, 1 ), DC( 1, 2 ), + $ DC( 1, 3 ), DC( 1, 4 ), WORK, LWORK, RWORK, + $ RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CDRGEV', INFO +* +* Blocked version +* + CALL XLAENV(16,2) + CALL CDRGEV3( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, NOUT, + $ A( 1, 1 ), NMAX, A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), A( 1, 7 ), NMAX, A( 1, 8 ), + $ A( 1, 9 ), NMAX, DC( 1, 1 ), DC( 1, 2 ), + $ DC( 1, 3 ), DC( 1, 4 ), WORK, LWORK, RWORK, + $ RESULT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CDRGEV3', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( CXV ) THEN +* +* ------------------------------------------------- +* CXV: Generalized Nonsymmetric Eigenvalue Problem +* CGGEVX (eigenvalue/vector with condition numbers) +* ------------------------------------------------- +* + MAXTYP = 2 + NTYPES = MAXTYP + IF( NN.LT.0 ) THEN + WRITE( NOUT, FMT = 9990 )C3 + ELSE + IF( TSTERR ) + $ CALL CERRGG( C3, NOUT ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + CALL CDRGVX( NN, THRESH, NIN, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), DC( 1, 1 ), + $ DC( 1, 2 ), A( 1, 5 ), A( 1, 6 ), IWORK( 1 ), + $ IWORK( 2 ), DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), + $ DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), WORK, + $ LWORK, RWORK, IWORK( 3 ), LIWORK-2, RESULT, + $ LOGWRK, INFO ) +* + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CDRGVX', INFO + END IF + WRITE( NOUT, FMT = 9973 ) + GO TO 10 +* + ELSE IF( LSAMEN( 3, C3, 'CHB' ) ) THEN +* +* ------------------------------ +* CHB: Hermitian Band Reduction +* ------------------------------ +* + MAXTYP = 15 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + IF( TSTERR ) THEN +#if defined(_OPENMP) + N_THREADS = OMP_GET_NUM_THREADS() + CALL OMP_SET_NUM_THREADS(1) +#endif + CALL CERRST( 'CHB', NOUT ) +#if defined(_OPENMP) + CALL OMP_SET_NUM_THREADS(N_THREADS) +#endif + END IF +* CALL CCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ), +* $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, +* $ INFO ) + CALL CCHKHB2STG( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, + $ THRESH, NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), + $ DR( 1, 2 ), DR( 1, 3 ), DR( 1, 4 ), DR( 1, 5 ), + $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CCHKHB', INFO +* + ELSE IF( LSAMEN( 3, C3, 'CBB' ) ) THEN +* +* ------------------------------ +* CBB: General Band Reduction +* ------------------------------ +* + MAXTYP = 15 + NTYPES = MIN( MAXTYP, NTYPES ) + CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) + DO 370 I = 1, NPARMS + NRHS = NSVAL( I ) +* + IF( NEWSD.EQ.0 ) THEN + DO 360 K = 1, 4 + ISEED( K ) = IOLDSD( K ) + 360 CONTINUE + END IF + WRITE( NOUT, FMT = 9966 )C3, NRHS + CALL CCHKBB( NN, MVAL, NVAL, NK, KVAL, MAXTYP, DOTYPE, NRHS, + $ ISEED, THRESH, NOUT, A( 1, 1 ), NMAX, + $ A( 1, 2 ), 2*NMAX, DR( 1, 1 ), DR( 1, 2 ), + $ A( 1, 4 ), NMAX, A( 1, 5 ), NMAX, A( 1, 6 ), + $ NMAX, A( 1, 7 ), WORK, LWORK, RWORK, RESULT, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CCHKBB', INFO + 370 CONTINUE +* + ELSE IF( LSAMEN( 3, C3, 'GLM' ) ) THEN +* +* ----------------------------------------- +* GLM: Generalized Linear Regression Model +* ----------------------------------------- +* + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL CERRGG( 'GLM', NOUT ) + CALL CCKGLM( NN, NVAL, MVAL, PVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, + $ WORK, DR( 1, 1 ), NIN, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CCKGLM', INFO +* + ELSE IF( LSAMEN( 3, C3, 'GQR' ) ) THEN +* +* ------------------------------------------ +* GQR: Generalized QR and RQ factorizations +* ------------------------------------------ +* + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL CERRGG( 'GQR', NOUT ) + CALL CCKGQR( NN, MVAL, NN, PVAL, NN, NVAL, NTYPES, ISEED, + $ THRESH, NMAX, A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), + $ A( 1, 4 ), TAUA, B( 1, 1 ), B( 1, 2 ), B( 1, 3 ), + $ B( 1, 4 ), B( 1, 5 ), TAUB, WORK, DR( 1, 1 ), NIN, + $ NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CCKGQR', INFO +* + ELSE IF( LSAMEN( 3, C3, 'GSV' ) ) THEN +* +* ---------------------------------------------- +* GSV: Generalized Singular Value Decomposition +* ---------------------------------------------- +* + CALL XLAENV(1,1) + IF( TSTERR ) + $ CALL CERRGG( 'GSV', NOUT ) + CALL CCKGSV( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), + $ A( 1, 3 ), B( 1, 3 ), A( 1, 4 ), ALPHA, BETA, + $ B( 1, 4 ), IWORK, WORK, DR( 1, 1 ), NIN, NOUT, + $ INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CCKGSV', INFO +* + ELSE IF( LSAMEN( 3, C3, 'CSD' ) ) THEN +* +* ---------------------------------------------- +* CSD: CS Decomposition +* ---------------------------------------------- +* + CALL XLAENV(1,1) + IF( TSTERR ) + $ CALL CERRGG( 'CSD', NOUT ) + CALL CCKCSD( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), A( 1, 3 ), A( 1, 4 ), + $ A( 1, 5 ), A( 1, 6 ), RWORK, IWORK, WORK, + $ DR( 1, 1 ), NIN, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CCKCSD', INFO +* + ELSE IF( LSAMEN( 3, C3, 'LSE' ) ) THEN +* +* -------------------------------------- +* LSE: Constrained Linear Least Squares +* -------------------------------------- +* + CALL XLAENV( 1, 1 ) + IF( TSTERR ) + $ CALL CERRGG( 'LSE', NOUT ) + CALL CCKLSE( NN, MVAL, PVAL, NVAL, NTYPES, ISEED, THRESH, NMAX, + $ A( 1, 1 ), A( 1, 2 ), B( 1, 1 ), B( 1, 2 ), X, + $ WORK, DR( 1, 1 ), NIN, NOUT, INFO ) + IF( INFO.NE.0 ) + $ WRITE( NOUT, FMT = 9980 )'CCKLSE', INFO + ELSE + WRITE( NOUT, FMT = * ) + WRITE( NOUT, FMT = * ) + WRITE( NOUT, FMT = 9992 )C3 + END IF + IF( .NOT.( CGX .OR. CXV ) ) + $ GO TO 190 + 380 CONTINUE + WRITE( NOUT, FMT = 9994 ) + S2 = SECOND( ) + WRITE( NOUT, FMT = 9993 )S2 - S1 +* + DEALLOCATE (S, STAT = AllocateStatus) + DEALLOCATE (A, STAT = AllocateStatus) + DEALLOCATE (B, STAT = AllocateStatus) + DEALLOCATE (C, STAT = AllocateStatus) + DEALLOCATE (RWORK, STAT = AllocateStatus) + DEALLOCATE (WORK, STAT = AllocateStatus) +* + 9999 FORMAT( / ' Execution not attempted due to input errors' ) + 9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 ) + 9996 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NS =', I4, + $ ', MAXB =', I4, ', IACC22 =', I4, ', NBCOL =', I4 ) + 9995 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, + $ ', NRHS =', I4 ) + 9994 FORMAT( / / ' End of tests' ) + 9993 FORMAT( ' Total time used = ', F12.2, ' seconds', / ) + 9992 FORMAT( 1X, A3, ': Unrecognized path name' ) + 9991 FORMAT( / / ' *** Invalid integer value in column ', I2, + $ ' of input', ' line:', / A79 ) + 9990 FORMAT( / / 1X, A3, ' routines were not tested' ) + 9989 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be >=', + $ I6 ) + 9988 FORMAT( ' Invalid input value: ', A, '=', I6, '; must be <=', + $ I6 ) + 9987 FORMAT( ' Tests of the Nonsymmetric Eigenvalue Problem routines' ) + 9986 FORMAT( ' Tests of the Hermitian Eigenvalue Problem routines' ) + 9985 FORMAT( ' Tests of the Singular Value Decomposition routines' ) + 9984 FORMAT( / ' The following parameter values will be used:' ) + 9983 FORMAT( 4X, A, 10I6, / 10X, 10I6 ) + 9982 FORMAT( / ' Routines pass computational tests if test ratio is ', + $ 'less than', F8.2, / ) + 9981 FORMAT( ' Relative machine ', A, ' is taken to be', E16.6 ) + 9980 FORMAT( ' *** Error code from ', A, ' = ', I4 ) + 9979 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', + $ / ' CGEEV (eigenvalues and eigevectors)' ) + 9978 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Driver', + $ / ' CGEES (Schur form)' ) + 9977 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', + $ ' Driver', / ' CGEEVX (eigenvalues, eigenvectors and', + $ ' condition numbers)' ) + 9976 FORMAT( / ' Tests of the Nonsymmetric Eigenvalue Problem Expert', + $ ' Driver', / ' CGEESX (Schur form and condition', + $ ' numbers)' ) + 9975 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem routines' ) + 9974 FORMAT( ' Tests of CHBTRD', / ' (reduction of a Hermitian band ', + $ 'matrix to real tridiagonal form)' ) + 9973 FORMAT( / 1X, 71( '-' ) ) + 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I1, '.', I1 ) + 9971 FORMAT( / ' Tests of the Generalized Linear Regression Model ', + $ 'routines' ) + 9970 FORMAT( / ' Tests of the Generalized QR and RQ routines' ) + 9969 FORMAT( / ' Tests of the Generalized Singular Value', + $ ' Decomposition routines' ) + 9968 FORMAT( / ' Tests of the Linear Least Squares routines' ) + 9967 FORMAT( ' Tests of CGBBRD', / ' (reduction of a general band ', + $ 'matrix to real bidiagonal form)' ) + 9966 FORMAT( / / 1X, A3, ': NRHS =', I4 ) + 9965 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Expert Driver CGGESX' ) + 9964 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Driver CGGES' ) + 9963 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Driver CGGEV' ) + 9962 FORMAT( / ' Tests of the Generalized Nonsymmetric Eigenvalue ', + $ 'Problem Expert Driver CGGEVX' ) + 9961 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4, + $ ', INMIN=', I4, + $ ', INWIN =', I4, ', INIBL =', I4, ', ISHFTS =', I4, + $ ', IACC22 =', I4) + 9960 FORMAT( / ' Tests of the CS Decomposition routines' ) +* +* End of CCHKEE +* + END From 90c1776c86339dfcd61ae07935f448a8b10346a4 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 28 Feb 2021 18:53:20 +0100 Subject: [PATCH 113/134] Adjust build rules for ?chkee.F --- lapack-netlib/TESTING/EIG/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lapack-netlib/TESTING/EIG/Makefile b/lapack-netlib/TESTING/EIG/Makefile index b3efebcd0..a292e4496 100644 --- a/lapack-netlib/TESTING/EIG/Makefile +++ b/lapack-netlib/TESTING/EIG/Makefile @@ -157,11 +157,11 @@ cleanobj: cleanexe: rm -f xeigtst* -schkee.o: schkee.f +schkee.o: schkee.F $(FC) $(FFLAGS_DRV) -c -o $@ $< -dchkee.o: dchkee.f +dchkee.o: dchkee.F $(FC) $(FFLAGS_DRV) -c -o $@ $< -cchkee.o: cchkee.f +cchkee.o: cchkee.F $(FC) $(FFLAGS_DRV) -c -o $@ $< -zchkee.o: zchkee.f +zchkee.o: zchkee.F $(FC) $(FFLAGS_DRV) -c -o $@ $< From 9564f688c490bf0dabfa8226d3643d749f7ffff5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 28 Feb 2021 18:57:05 +0100 Subject: [PATCH 114/134] Adjust build rules for ?chkee.F --- lapack-netlib/TESTING/EIG/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lapack-netlib/TESTING/EIG/CMakeLists.txt b/lapack-netlib/TESTING/EIG/CMakeLists.txt index e877b1422..10c25a446 100644 --- a/lapack-netlib/TESTING/EIG/CMakeLists.txt +++ b/lapack-netlib/TESTING/EIG/CMakeLists.txt @@ -25,7 +25,7 @@ set(AEIGTST set(SCIGTST slafts.f slahd2.f slasum.f slatb9.f sstech.f sstect.f ssvdch.f ssvdct.f ssxt1.f) -set(SEIGTST schkee.f +set(SEIGTST schkee.F sbdt01.f sbdt02.f sbdt03.f sbdt04.f sbdt05.f schkbb.f schkbd.f schkbk.f schkbl.f schkec.f schkgg.f schkgk.f schkgl.f schkhs.f schksb.f schkst.f schkst2stg.f schksb2stg.f @@ -42,7 +42,7 @@ set(SEIGTST schkee.f sort03.f ssbt21.f ssgt01.f sslect.f sspt21.f sstt21.f sstt22.f ssyt21.f ssyt22.f) -set(CEIGTST cchkee.f +set(CEIGTST cchkee.F cbdt01.f cbdt02.f cbdt03.f cbdt05.f cchkbb.f cchkbd.f cchkbk.f cchkbl.f cchkec.f cchkgg.f cchkgk.f cchkgl.f cchkhb.f cchkhs.f cchkst.f cchkst2stg.f cchkhb2stg.f @@ -62,7 +62,7 @@ set(CEIGTST cchkee.f set(DZIGTST dlafts.f dlahd2.f dlasum.f dlatb9.f dstech.f dstect.f dsvdch.f dsvdct.f dsxt1.f) -set(DEIGTST dchkee.f +set(DEIGTST dchkee.F dbdt01.f dbdt02.f dbdt03.f dbdt04.f dbdt05.f dchkbb.f dchkbd.f dchkbk.f dchkbl.f dchkec.f dchkgg.f dchkgk.f dchkgl.f dchkhs.f dchksb.f dchkst.f dchkst2stg.f dchksb2stg.f @@ -79,7 +79,7 @@ set(DEIGTST dchkee.f dort03.f dsbt21.f dsgt01.f dslect.f dspt21.f dstt21.f dstt22.f dsyt21.f dsyt22.f) -set(ZEIGTST zchkee.f +set(ZEIGTST zchkee.F zbdt01.f zbdt02.f zbdt03.f zbdt05.f zchkbb.f zchkbd.f zchkbk.f zchkbl.f zchkec.f zchkgg.f zchkgk.f zchkgl.f zchkhb.f zchkhs.f zchkst.f zchkst2stg.f zchkhb2stg.f From 20f492c2984913b6b278be2ae6bbb057026bfc52 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 1 Mar 2021 21:00:10 +0100 Subject: [PATCH 115/134] Fix AMD AOCC compiler detection --- Makefile.system | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.system b/Makefile.system index 848c38797..91a078565 100644 --- a/Makefile.system +++ b/Makefile.system @@ -904,8 +904,8 @@ CCOMMON_OPT += -DF_INTERFACE_FLANG FCOMMON_OPT += -Mrecursive -Kieee ifeq ($(OSNAME), Linux) ifeq ($(ARCH), x86_64) -FLANG_VENDOR := $(shell `$(FC) --version|cut -f 1 -d "."|head -1`) -ifeq ($(FLANG_VENDOR),AOCC) +FLANG_VENDOR := $(shell $(FC) --version|head -1 |cut -f 1 -d " ") +ifeq ($(FLANG_VENDOR), AMD) FCOMMON_OPT += -fno-unroll-loops endif endif From 38dcf3454bf4d3a4b5b470791277904c025d7369 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 2 Mar 2021 17:50:55 +0100 Subject: [PATCH 116/134] Support timing Apple M1 --- benchmark/bench.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/benchmark/bench.h b/benchmark/bench.h index 1f9b8986c..83de8ab2b 100644 --- a/benchmark/bench.h +++ b/benchmark/bench.h @@ -74,6 +74,9 @@ static void *huge_malloc(BLASLONG size){ #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) struct timeval start, stop; +#elif defined(__APPLE__) + mach_timebase_info_data_t info; + uint64_t start = 0, stop = 0; #else struct timespec start = { 0, 0 }, stop = { 0, 0 }; #endif @@ -82,6 +85,9 @@ double getsec() { #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; +#elif defined(__APPLE__) + mach_timebase_info(&info); + return (double)(((stop - start) * info.numer)/info.denom) * 1.e-9; #else return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9; #endif @@ -90,6 +96,8 @@ double getsec() void begin() { #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) gettimeofday( &start, (struct timezone *)0); +#elif defined(__APPLE__) + start = clock_gettime_nsec_np(CLOCK_UPTIME_RAW); #else clock_gettime(CLOCK_REALTIME, &start); #endif @@ -98,7 +106,9 @@ void begin() { void end() { #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) gettimeofday( &stop, (struct timezone *)0); +#elif defined(__APPLE__) + stop = clock_gettime_nsec_np(CLOCK_UPTIME_RAW); #else clock_gettime(CLOCK_REALTIME, &stop); #endif -} \ No newline at end of file +} From 41646ed006b25167417a5b56ad37e20c9632851c Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Fri, 5 Mar 2021 16:22:36 -0600 Subject: [PATCH 117/134] Optimize s/dasum function for POWER10 This patch makes use of new POWER10 vector pair instructions for loads and stores. --- kernel/power/dasum.c | 20 +++- kernel/power/dasum_microk_power10.c | 152 +++++++++++++++++++++++++++ kernel/power/sasum.c | 20 +++- kernel/power/sasum_microk_power10.c | 153 ++++++++++++++++++++++++++++ 4 files changed, 343 insertions(+), 2 deletions(-) create mode 100644 kernel/power/dasum_microk_power10.c create mode 100644 kernel/power/sasum_microk_power10.c diff --git a/kernel/power/dasum.c b/kernel/power/dasum.c index 999dc677a..0cdec3292 100644 --- a/kernel/power/dasum.c +++ b/kernel/power/dasum.c @@ -46,9 +46,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif -#if defined(POWER8) || defined(POWER9) || defined(POWER10) #if defined(__VEC__) || defined(__ALTIVEC__) +#if defined(POWER8) || defined(POWER9) #include "dasum_microk_power8.c" +#elif defined(POWER10) +#include "dasum_microk_power10.c" #endif #endif @@ -110,6 +112,21 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) if ( inc_x == 1 ) { +#if defined(POWER10) + if ( n >= 16 ) + { + BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3; + for (i = 0; i < align; i++) { + sumf += ABS(x[i]); + } + } + n1 = (n-i) & -16; + if ( n1 > 0 ) + { + sumf += dasum_kernel_16(n1, &x[i]); + i+=n1; + } +#else n1 = n & -16; if ( n1 > 0 ) { @@ -117,6 +134,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) sumf = dasum_kernel_16(n1, x); i=n1; } +#endif while(i < n) { diff --git a/kernel/power/dasum_microk_power10.c b/kernel/power/dasum_microk_power10.c new file mode 100644 index 000000000..d1a21b4d1 --- /dev/null +++ b/kernel/power/dasum_microk_power10.c @@ -0,0 +1,152 @@ +/*************************************************************************** +Copyright (c) 2021, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#define HAVE_KERNEL_16 1 + +static double dasum_kernel_16 (long n, double *x) +{ + double sum; + __vector double t0; + __vector double t1; + __vector double t2; + __vector double t3; + + __asm__ + ( + "dcbt 0, %2 \n\t" + + "xxlxor 32, 32, 32 \n\t" + "xxlxor 33, 33, 33 \n\t" + "xxlxor 34, 34, 34 \n\t" + "xxlxor 35, 35, 35 \n\t" + "xxlxor 36, 36, 36 \n\t" + "xxlxor 37, 37, 37 \n\t" + "xxlxor 38, 38, 38 \n\t" + "xxlxor 39, 39, 39 \n\t" + + "lxvp 40, 0(%2) \n\t" + "lxvp 42, 32(%2) \n\t" + "lxvp 44, 64(%2) \n\t" + "lxvp 46, 96(%2) \n\t" + + "addi %2, %2, 128 \n\t" + + "addic. %1, %1, -16 \n\t" + "ble two%= \n\t" + + ".align 5 \n" + "one%=: \n\t" + + "xvabsdp 48, 40 \n\t" + "xvabsdp 49, 41 \n\t" + "xvabsdp 50, 42 \n\t" + "xvabsdp 51, 43 \n\t" + "lxvp 40, 0(%2) \n\t" + + + "xvabsdp %x3, 44 \n\t" + "xvabsdp %x4, 45 \n\t" + "lxvp 42, 32(%2) \n\t" + + + "xvabsdp %x5, 46 \n\t" + "xvabsdp %x6, 47 \n\t" + "lxvp 44, 64(%2) \n\t" + + + "xvadddp 32, 32, 48 \n\t" + "xvadddp 33, 33, 49 \n\t" + + "lxvp 46, 96(%2) \n\t" + + "xvadddp 34, 34, 50 \n\t" + "xvadddp 35, 35, 51 \n\t" + "addi %2, %2, 128 \n\t" + "xvadddp 36, 36, %x3 \n\t" + "xvadddp 37, 37, %x4 \n\t" + "addic. %1, %1, -16 \n\t" + "xvadddp 38, 38, %x5 \n\t" + "xvadddp 39, 39, %x6 \n\t" + + "bgt one%= \n" + + "two%=: \n\t" + + "xvabsdp 48, 40 \n\t" + "xvabsdp 49, 41 \n\t" + "xvabsdp 50, 42 \n\t" + "xvabsdp 51, 43 \n\t" + "xvabsdp %x3, 44 \n\t" + "xvabsdp %x4, 45 \n\t" + "xvabsdp %x5, 46 \n\t" + "xvabsdp %x6, 47 \n\t" + + "xvadddp 32, 32, 48 \n\t" + "xvadddp 33, 33, 49 \n\t" + "xvadddp 34, 34, 50 \n\t" + "xvadddp 35, 35, 51 \n\t" + "xvadddp 36, 36, %x3 \n\t" + "xvadddp 37, 37, %x4 \n\t" + "xvadddp 38, 38, %x5 \n\t" + "xvadddp 39, 39, %x6 \n\t" + + "xvadddp 32, 32, 33 \n\t" + "xvadddp 34, 34, 35 \n\t" + "xvadddp 36, 36, 37 \n\t" + "xvadddp 38, 38, 39 \n\t" + + "xvadddp 32, 32, 34 \n\t" + "xvadddp 36, 36, 38 \n\t" + + "xvadddp 32, 32, 36 \n\t" + + XXSWAPD_S(33,32) + "xsadddp %x0, 32, 33 \n" + + "#n=%1 x=%3=%2 sum=%0\n" + "#t0=%x3 t1=%x4 t2=%x5 t3=%x6" + : + "=d" (sum), // 0 + "+r" (n), // 1 + "+b" (x), // 2 + "=wa" (t0), // 3 + "=wa" (t1), // 4 + "=wa" (t2), // 5 + "=wa" (t3) // 6 + : + "m" (*x) + : + "cr0", + "vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39", + "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47", + "vs48","vs49","vs50","vs51" + ); + + return sum; +} + + diff --git a/kernel/power/sasum.c b/kernel/power/sasum.c index 733137012..af692a7fa 100644 --- a/kernel/power/sasum.c +++ b/kernel/power/sasum.c @@ -46,9 +46,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif -#if defined(POWER8) || defined(POWER9) || defined(POWER10) #if defined(__VEC__) || defined(__ALTIVEC__) +#if defined(POWER8) || defined(POWER9) #include "sasum_microk_power8.c" +#elif defined(POWER10) +#include "sasum_microk_power10.c" #endif #endif @@ -110,6 +112,21 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) if ( inc_x == 1 ) { +#if defined(POWER10) + if ( n >= 32 ) + { + BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7; + for (i = 0; i < align; i++) { + sumf += ABS(x[i]); + } + } + n1 = (n-i) & -32; + if ( n1 > 0 ) + { + sumf += sasum_kernel_32(n1, &x[i]); + i+=n1; + } +#else n1 = n & -32; if ( n1 > 0 ) { @@ -117,6 +134,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) sumf = sasum_kernel_32(n1, x); i=n1; } +#endif while(i < n) { diff --git a/kernel/power/sasum_microk_power10.c b/kernel/power/sasum_microk_power10.c new file mode 100644 index 000000000..ea12a4264 --- /dev/null +++ b/kernel/power/sasum_microk_power10.c @@ -0,0 +1,153 @@ +/*************************************************************************** +Copyright (c) 2021, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +#define HAVE_KERNEL_32 1 + +static float sasum_kernel_32 (long n, float *x) +{ + float sum; + __vector float t0; + __vector float t1; + __vector float t2; + __vector float t3; + + __asm__ + ( + "dcbt 0, %2 \n\t" + + "xxlxor 32, 32, 32 \n\t" + "xxlxor 33, 33, 33 \n\t" + "xxlxor 34, 34, 34 \n\t" + "xxlxor 35, 35, 35 \n\t" + "xxlxor 36, 36, 36 \n\t" + "xxlxor 37, 37, 37 \n\t" + "xxlxor 38, 38, 38 \n\t" + "xxlxor 39, 39, 39 \n\t" + + "lxvp 40, 0(%2) \n\t" + "lxvp 42, 32(%2) \n\t" + "lxvp 44, 64(%2) \n\t" + "lxvp 46, 96(%2) \n\t" + + "addi %2, %2, 128 \n\t" + + "addic. %1, %1, -32 \n\t" + "ble two%= \n\t" + + ".align 5 \n" + "one%=: \n\t" + + "xvabssp 48, 40 \n\t" + "xvabssp 49, 41 \n\t" + "xvabssp 50, 42 \n\t" + "xvabssp 51, 43 \n\t" + "lxvp 40, 0(%2) \n\t" + + "xvabssp %x3, 44 \n\t" + "xvabssp %x4, 45 \n\t" + "lxvp 42, 32(%2) \n\t" + + "xvabssp %x5, 46 \n\t" + "xvabssp %x6, 47 \n\t" + "lxvp 44, 64(%2) \n\t" + + "xvaddsp 32, 32, 48 \n\t" + "xvaddsp 33, 33, 49 \n\t" + + "lxvp 46, 96(%2) \n\t" + + "xvaddsp 34, 34, 50 \n\t" + "xvaddsp 35, 35, 51 \n\t" + "addi %2, %2, 128 \n\t" + "xvaddsp 36, 36, %x3 \n\t" + "xvaddsp 37, 37, %x4 \n\t" + "addic. %1, %1, -32 \n\t" + "xvaddsp 38, 38, %x5 \n\t" + "xvaddsp 39, 39, %x6 \n\t" + + "bgt one%= \n" + + "two%=: \n\t" + + "xvabssp 48, 40 \n\t" + "xvabssp 49, 41 \n\t" + "xvabssp 50, 42 \n\t" + "xvabssp 51, 43 \n\t" + "xvabssp %x3, 44 \n\t" + "xvabssp %x4, 45 \n\t" + "xvabssp %x5, 46 \n\t" + "xvabssp %x6, 47 \n\t" + + "xvaddsp 32, 32, 48 \n\t" + "xvaddsp 33, 33, 49 \n\t" + "xvaddsp 34, 34, 50 \n\t" + "xvaddsp 35, 35, 51 \n\t" + "xvaddsp 36, 36, %x3 \n\t" + "xvaddsp 37, 37, %x4 \n\t" + "xvaddsp 38, 38, %x5 \n\t" + "xvaddsp 39, 39, %x6 \n\t" + + "xvaddsp 32, 32, 33 \n\t" + "xvaddsp 34, 34, 35 \n\t" + "xvaddsp 36, 36, 37 \n\t" + "xvaddsp 38, 38, 39 \n\t" + + "xvaddsp 32, 32, 34 \n\t" + "xvaddsp 36, 36, 38 \n\t" + + "xvaddsp 32, 32, 36 \n\t" + + "xxsldwi 33, 32, 32, 2 \n\t" + "xvaddsp 32, 32, 33 \n\t" + + "xxsldwi 33, 32, 32, 1 \n\t" + "xvaddsp 32, 32, 33 \n\t" + + "xscvspdp %x0, 32 \n" + + "#n=%1 x=%3=%2 sum=%0\n" + "#t0=%x3 t1=%x4 t2=%x5 t3=%x6" + : + "=f" (sum), // 0 + "+r" (n), // 1 + "+b" (x), // 2 + "=wa" (t0), // 3 + "=wa" (t1), // 4 + "=wa" (t2), // 5 + "=wa" (t3) // 6 + : + "m" (*x) + : + "cr0", + "vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39", + "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47", + "vs48","vs49","vs50","vs51" + ); + + return sum; +} From a9f6f7ad390fea938c45a0e4b3b8feb2c1841edf Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 6 Mar 2021 14:35:49 +0100 Subject: [PATCH 118/134] Remove spurious AVX512 requirement and add AVX2/FMA3 guard --- kernel/x86_64/srot_microk_haswell-2.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/x86_64/srot_microk_haswell-2.c b/kernel/x86_64/srot_microk_haswell-2.c index 8e245cc8f..b5545726e 100644 --- a/kernel/x86_64/srot_microk_haswell-2.c +++ b/kernel/x86_64/srot_microk_haswell-2.c @@ -1,5 +1,4 @@ -/* need a new enough GCC for avx512 support */ -#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9)) +#if defined(HAVE_FMA3) && defined(HAVE_AVX2) #define HAVE_SROT_KERNEL 1 From 09d47af2c0451b7d5868e9aeec200b565a6bf25f Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Wed, 10 Mar 2021 17:15:33 -0600 Subject: [PATCH 119/134] Optimize zscal function for POWER10 This patch makes use of new POWER10 vector pair instructions for loads and stores. --- kernel/power/zscal.c | 2 +- kernel/power/zscal_microk_power10.c | 195 ++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 kernel/power/zscal_microk_power10.c diff --git a/kernel/power/zscal.c b/kernel/power/zscal.c index 31b3682b9..0068138e8 100644 --- a/kernel/power/zscal.c +++ b/kernel/power/zscal.c @@ -45,7 +45,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #elif defined(POWER10) #if defined(DOUBLE) -#include "zscal_microk_power8.c" +#include "zscal_microk_power10.c" #else #include "cscal_microk_power10.c" #endif diff --git a/kernel/power/zscal_microk_power10.c b/kernel/power/zscal_microk_power10.c new file mode 100644 index 000000000..15b8323f4 --- /dev/null +++ b/kernel/power/zscal_microk_power10.c @@ -0,0 +1,195 @@ +/*************************************************************************** +Copyright (c) 2021, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#define HAVE_KERNEL_8 1 + +static void zscal_kernel_8 (long n, double *x, double alpha_r, double alpha_i) +{ + __vector double t0; + __vector double t1; + __vector double t2; + __vector double t3; + __vector double t4; + __vector double t5; + + __asm__ + ( + "dcbt 0, %2 \n\t" + + "xsnegdp 33, %x10 \n\t" // -alpha_i + XXSPLTD_S(32,%x9,0) // alpha_r , alpha_r + XXMRGHD_S(33,%x10, 33) // -alpha_i , alpha_i + + "lxvp 40, 0(%2) \n\t" + "lxvp 42, 32(%2) \n\t" + "lxvp 44, 64(%2) \n\t" + "lxvp 46, 96(%2) \n\t" + + "addic. %1, %1, -8 \n\t" + "ble two%= \n\t" + + ".align 5 \n" + "one%=: \n\t" + + "xvmuldp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r + "xvmuldp 49, 41, 32 \n\t" + "xvmuldp 50, 42, 32 \n\t" + "xvmuldp 51, 43, 32 \n\t" + "xvmuldp 34, 44, 32 \n\t" + "xvmuldp 35, 45, 32 \n\t" + "xvmuldp 36, 46, 32 \n\t" + "xvmuldp 37, 47, 32 \n\t" + + XXSWAPD_S(38,40) + XXSWAPD_S(39,41) + XXSWAPD_S(%x3,42) + XXSWAPD_S(%x4,43) + XXSWAPD_S(%x5,44) + XXSWAPD_S(%x6,45) + XXSWAPD_S(%x7,46) + XXSWAPD_S(%x8,47) + + "xvmuldp 38, 38, 33 \n\t" // x0_i * -alpha_i, x0_r * alpha_i + "xvmuldp 39, 39, 33 \n\t" + + + "xvmuldp %x3, %x3, 33 \n\t" + "xvmuldp %x4, %x4, 33 \n\t" + + + "lxvp 40, 128(%2) \n\t" + "lxvp 42, 160(%2) \n\t" + "xvmuldp %x5, %x5, 33 \n\t" + "xvmuldp %x6, %x6, 33 \n\t" + + + "xvmuldp %x7, %x7, 33 \n\t" + "xvmuldp %x8, %x8, 33 \n\t" + "lxvp 44, 192(%2) \n\t" + "lxvp 46, 224(%2) \n\t" + + + "xvadddp 48, 48, 38 \n\t" + "xvadddp 49, 49, 39 \n\t" + "xvadddp 50, 50, %x3 \n\t" + "xvadddp 51, 51, %x4 \n\t" + "stxv 49, 0(%2) \n\t" + "stxv 48, 16(%2) \n\t" + "stxv 51, 32(%2) \n\t" + "stxv 50, 48(%2) \n\t" + + + "xvadddp 34, 34, %x5 \n\t" + "xvadddp 35, 35, %x6 \n\t" + + + "xvadddp 36, 36, %x7 \n\t" + "xvadddp 37, 37, %x8 \n\t" + + "stxv 35, 64(%2) \n\t" + "stxv 34, 80(%2) \n\t" + "stxv 37, 96(%2) \n\t" + "stxv 36, 112(%2) \n\t" + + "addi %2, %2, 128 \n\t" + + "addic. %1, %1, -8 \n\t" + "bgt one%= \n" + + "two%=: \n\t" + + "xvmuldp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r + "xvmuldp 49, 41, 32 \n\t" + "xvmuldp 50, 42, 32 \n\t" + "xvmuldp 51, 43, 32 \n\t" + "xvmuldp 34, 44, 32 \n\t" + "xvmuldp 35, 45, 32 \n\t" + "xvmuldp 36, 46, 32 \n\t" + "xvmuldp 37, 47, 32 \n\t" + + XXSWAPD_S(38,40) + XXSWAPD_S(39,41) + XXSWAPD_S(%x3,42) + XXSWAPD_S(%x4,43) + XXSWAPD_S(%x5,44) + XXSWAPD_S(%x6,45) + XXSWAPD_S(%x7,46) + XXSWAPD_S(%x8,47) + + + "xvmuldp 38, 38, 33 \n\t" // x0_i * -alpha_i, x0_r * alpha_i + "xvmuldp 39, 39, 33 \n\t" + "xvmuldp %x3, %x3, 33 \n\t" + "xvmuldp %x4, %x4, 33 \n\t" + "xvmuldp %x5, %x5, 33 \n\t" + "xvmuldp %x6, %x6, 33 \n\t" + "xvmuldp %x7, %x7, 33 \n\t" + "xvmuldp %x8, %x8, 33 \n\t" + + "xvadddp 48, 48, 38 \n\t" + "xvadddp 49, 49, 39 \n\t" + + "xvadddp 50, 50, %x3 \n\t" + "xvadddp 51, 51, %x4 \n\t" + "stxv 49, 0(%2) \n\t" + "stxv 48, 16(%2) \n\t" + "stxv 51, 32(%2) \n\t" + "stxv 50, 48(%2) \n\t" + + "xvadddp 34, 34, %x5 \n\t" + "xvadddp 35, 35, %x6 \n\t" + + + "xvadddp 36, 36, %x7 \n\t" + "xvadddp 37, 37, %x8 \n\t" + + "stxv 35, 64(%2) \n\t" + "stxv 34, 80(%2) \n\t" + "stxv 37, 96(%2) \n\t" + "stxv 36, 112(%2) \n\t" + + "#n=%1 x=%0=%2 alpha=(%9,%10) \n" + : + "+m" (*x), + "+r" (n), // 1 + "+b" (x), // 2 + "=wa" (t0), // 3 + "=wa" (t1), // 4 + "=wa" (t2), // 5 + "=wa" (t3), // 6 + "=wa" (t4), // 7 + "=wa" (t5) // 8 + : + "d" (alpha_r), // 9 + "d" (alpha_i) // 10 + : + "cr0", + "vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39", + "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47", + "vs48","vs49","vs50","vs51" + ); +} From 9579bd47e53fb65cca2f27e92340d1c08e745068 Mon Sep 17 00:00:00 2001 From: austinpagan Date: Wed, 10 Mar 2021 18:19:12 -0500 Subject: [PATCH 120/134] Modifying a couple paramaters in the "POWER10"-specific section of param.h, for performance enhancements for SGEMM and DGEMM. --- param.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/param.h b/param.h index 9ba25de6a..262f52c88 100644 --- a/param.h +++ b/param.h @@ -2455,13 +2455,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ZGEMM_DEFAULT_UNROLL_M 8 #define ZGEMM_DEFAULT_UNROLL_N 2 -#define SGEMM_DEFAULT_P 832 -#define DGEMM_DEFAULT_P 320 +#define SGEMM_DEFAULT_P 512 +#define DGEMM_DEFAULT_P 384 #define CGEMM_DEFAULT_P 512 #define ZGEMM_DEFAULT_P 256 -#define SGEMM_DEFAULT_Q 1026 -#define DGEMM_DEFAULT_Q 960 +#define SGEMM_DEFAULT_Q 512 +#define DGEMM_DEFAULT_Q 512 #define CGEMM_DEFAULT_Q 1026 #define ZGEMM_DEFAULT_Q 1026 From 3c356b1a1f0d2a6b2209a6ca908212cfafe53971 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Mar 2021 11:51:09 +0100 Subject: [PATCH 121/134] Support compilation with the NAG Fortran compiler --- Makefile.system | 18 ++++++++++++++++++ Makefile.x86_64 | 14 ++++++++++++++ f_check | 38 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 68 insertions(+), 2 deletions(-) diff --git a/Makefile.system b/Makefile.system index 91a078565..ae703e4d9 100644 --- a/Makefile.system +++ b/Makefile.system @@ -899,6 +899,18 @@ endif # Fortran Compiler dependent settings # +ifeq ($(F_COMPILER), NAG) +FCOMMON_OPT += -dcfuns -recursive -ieee=full -w=obs -thread_safe +ifdef INTERFACE64 +ifneq ($(INTERFACE64), 0) +FCOMMON_OPT += -i8 +endif +endif +ifeq ($(USE_OPENMP), 1) +FCOMMON_OPT += -openmp +endif +endif + ifeq ($(F_COMPILER), FLANG) CCOMMON_OPT += -DF_INTERFACE_FLANG FCOMMON_OPT += -Mrecursive -Kieee @@ -1207,6 +1219,8 @@ CCOMMON_OPT += -fPIC endif ifeq ($(F_COMPILER), SUN) FCOMMON_OPT += -pic +else ifeq ($(F_COMPILER), NAG) +FCOMMON_OPT += -PIC else FCOMMON_OPT += -fPIC endif @@ -1465,6 +1479,10 @@ LAPACK_FFLAGS := $(FFLAGS) LAPACK_FPFLAGS := $(FPFLAGS) endif +ifeq ($(F_COMPILER),NAG) +LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS)) +endif + LAPACK_CFLAGS = $(CFLAGS) LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H ifdef INTERFACE64 diff --git a/Makefile.x86_64 b/Makefile.x86_64 index 175db823d..5406494c9 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -10,34 +10,46 @@ endif ifdef HAVE_SSE3 CCOMMON_OPT += -msse3 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -msse3 endif +endif ifdef HAVE_SSSE3 CCOMMON_OPT += -mssse3 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -mssse3 endif +endif ifdef HAVE_SSE4_1 CCOMMON_OPT += -msse4.1 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -msse4.1 endif +endif ifndef OLDGCC ifdef HAVE_AVX CCOMMON_OPT += -mavx +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -mavx endif endif +endif ifndef NO_AVX2 ifdef HAVE_AVX2 CCOMMON_OPT += -mavx2 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -mavx2 endif endif +endif ifeq ($(CORE), SKYLAKEX) ifndef DYNAMIC_ARCH ifndef NO_AVX512 CCOMMON_OPT += -march=skylake-avx512 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=skylake-avx512 +endif ifeq ($(OSNAME), CYGWIN_NT) CCOMMON_OPT += -fno-asynchronous-unwind-tables FCOMMON_OPT += -fno-asynchronous-unwind-tables @@ -59,9 +71,11 @@ ifeq ($(C_COMPILER), GCC) # cooperlake support was added in 10.1 ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11) CCOMMON_OPT += -march=cooperlake +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=cooperlake endif endif +endif ifeq ($(OSNAME), CYGWIN_NT) CCOMMON_OPT += -fno-asynchronous-unwind-tables FCOMMON_OPT += -fno-asynchronous-unwind-tables diff --git a/f_check b/f_check index fe947bf66..54f542eaf 100644 --- a/f_check +++ b/f_check @@ -34,7 +34,7 @@ if ($compiler eq "") { "pathf90", "pathf95", "pgf95", "pgf90", "pgf77", "pgfortran", "nvfortran", "flang", "egfortran", - "ifort"); + "ifort", "nagfor"); OUTER: foreach $lists (@lists) { @@ -64,6 +64,9 @@ if ($compiler eq "") { if (!$?) { $data = `$compiler -O2 -S ftest.f > /dev/null 2>&1 && cat ftest.s && rm -f ftest.s`; + if ($data eq "") { + $data = `$compiler -O2 -S ftest.f > /dev/null 2>&1 && cat ftest.c && rm -f ftest.c`; + } if ($data =~ /zhoge_/) { $bu = "_"; } @@ -133,8 +136,16 @@ if ($compiler eq "") { $openmp = "-openmp"; } + if ($data =~ /NAG/) { + $vendor = NAG; + $openmp = "-openmp"; + } + # for embedded underscore name, e.g. zho_ge, it may append 2 underscores. $data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`; + if ($data eq "") { + $data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.c && rm -f ftest3.c`; + } if ($data =~ / zho_ge__/) { $need2bu = 1; } @@ -222,6 +233,12 @@ if ($compiler eq "") { $openmp = "-fopenmp"; } + if ($compiler =~ /nagfor/) { + $vendor = NAG; + $bu = "_"; + $openmp = "-openmp"; + } + if ($vendor eq "") { $nofortran = 1; $compiler = "gfortran"; @@ -275,14 +292,20 @@ if (!$?) { if ($?) { $link = `$compiler $openmp -mabi=64 -v ftest2.f 2>&1 && rm -f a.out a.exe`; } + #For nagfor + if ($?) { + $link = `$compiler $openmp -dryrun ftest2.f 2>&1 && rm -f a.out a.exe`; + } $binary = "" if ($?); } - if ($binary eq "") { $link = `$compiler $openmp -v ftest2.f 2>&1 && rm -f a.out a.exe`; } } +if ( $vendor == NAG) { + $link = `$compiler $openmp -dryrun ftest2.f 2>&1 && rm -f a.out a.exe`; + } $linker_L = ""; $linker_l = ""; $linker_a = ""; @@ -336,6 +359,7 @@ if ($link ne "") { if ( ($flags =~ /^\-l/) + && ($flags !~ /ibrary/) && ($flags !~ /gfortranbegin/) && ($flags !~ /frtbegin/) && ($flags !~ /pathfstart/) @@ -352,6 +376,16 @@ if ($link ne "") { $linker_l .= $flags . " "; } + if ( $flags =~ /quickfit.o/ && $vendor == NAG) { + $linker_l .= $flags . " "; + } + if ( $flags =~ /safefit.o/ && $vendor == NAG) { + $linker_l .= $flags . " "; + } + if ( $flags =~ /thsafe.o/ && $vendor == NAG) { + $linker_l .= $flags . " "; + } + $linker_a .= $flags . " " if $flags =~ /\.a$/; } From 041a26fd79e56d9807a9ecd9486bd139fd062d6c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Mar 2021 11:52:29 +0100 Subject: [PATCH 122/134] Support compilation with nagfor --- ctest/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ctest/Makefile b/ctest/Makefile index 2a893cae8..15c83a907 100644 --- a/ctest/Makefile +++ b/ctest/Makefile @@ -212,6 +212,9 @@ ifeq ($(C_COMPILER), CLANG) CEXTRALIB = -lomp endif endif +ifeq ($(F_COMPILER), NAG) +CEXTRALIB = -lgomp +endif endif ifeq ($(BUILD_SINGLE),1) From 6ae7af78a38649c446e2b4cf310b48538f8a1db7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Mar 2021 11:53:51 +0100 Subject: [PATCH 123/134] Support compilation with nagfor --- test/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/Makefile b/test/Makefile index 5f653414a..54fa60533 100644 --- a/test/Makefile +++ b/test/Makefile @@ -270,6 +270,9 @@ ifeq ($(C_COMPILER), CLANG) CEXTRALIB = -lomp endif endif +ifeq ($(F_COMPILER), NAG) +CEXTRALIB = -lgomp +endif endif ifeq ($(BUILD_SINGLE),1) From 697e64bbb6651eef92bc910f46b6e1e7b58709d1 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Mar 2021 23:03:58 +0100 Subject: [PATCH 124/134] Fix syntax --- f_check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/f_check b/f_check index 54f542eaf..20281ea4d 100644 --- a/f_check +++ b/f_check @@ -303,7 +303,7 @@ if (!$?) { } } -if ( $vendor == NAG) { +if ( $vendor eq "NAG") { $link = `$compiler $openmp -dryrun ftest2.f 2>&1 && rm -f a.out a.exe`; } $linker_L = ""; From 0934568d9cbc0e7a7d95c00fab807d95d1168bed Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 12 Mar 2021 12:42:05 +0100 Subject: [PATCH 125/134] Move includes under the ifdef for compilers w/o intrinsics support --- kernel/x86_64/sgemm_direct_skylakex.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/x86_64/sgemm_direct_skylakex.c b/kernel/x86_64/sgemm_direct_skylakex.c index aaadcf151..cc2ac5553 100644 --- a/kernel/x86_64/sgemm_direct_skylakex.c +++ b/kernel/x86_64/sgemm_direct_skylakex.c @@ -1,8 +1,11 @@ /* the direct sgemm code written by Arjan van der Ven */ + + +#if defined(SKYLAKEX) || defined (COOPERLAKE) + #include #include "common.h" -#if defined(SKYLAKEX) || defined (COOPERLAKE) /* * "Direct sgemm" code. This code operates directly on the inputs and outputs * of the sgemm call, avoiding the copies, memory realignments and threading, From 6726771645e32529d19c3c0ffc90f25784c2cc23 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 13 Mar 2021 20:16:18 +0100 Subject: [PATCH 126/134] Support compilation with NAG fortran --- Makefile.arm64 | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Makefile.arm64 b/Makefile.arm64 index c3fe583e4..23362b4e5 100644 --- a/Makefile.arm64 +++ b/Makefile.arm64 @@ -1,28 +1,38 @@ ifneq ($(C_COMPILER), PGI) ifeq ($(CORE), ARMV8) CCOMMON_OPT += -march=armv8-a +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8-a endif +endif ifeq ($(CORE), CORTEXA53) CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53 endif +endif ifeq ($(CORE), CORTEXA57) CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57 endif +endif ifeq ($(CORE), CORTEXA72) CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 endif +endif ifeq ($(CORE), CORTEXA73) CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73 endif +endif # Use a72 tunings because Neoverse-N1 is only available # in GCC>=9 @@ -30,51 +40,71 @@ ifeq ($(CORE), NEOVERSEN1) ifeq ($(GCCVERSIONGTEQ7), 1) ifeq ($(GCCVERSIONGTEQ9), 1) CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1 +endif else CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72 endif +endif else CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 endif endif +endif ifeq ($(CORE), THUNDERX) CCOMMON_OPT += -march=armv8-a -mtune=thunderx +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8-a -mtune=thunderx endif +endif ifeq ($(CORE), FALKOR) CCOMMON_OPT += -march=armv8-a -mtune=falkor +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8-a -mtune=falkor endif +endif ifeq ($(CORE), THUNDERX2T99) CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 endif +endif ifeq ($(CORE), THUNDERX3T110) ifeq ($(GCCVERSIONGTEQ10), 1) CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110 +endif else CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 endif endif +endif ifeq ($(CORE), VORTEX) CCOMMON_OPT += -march=armv8.3-a +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8.3-a endif +endif ifeq ($(GCCVERSIONGTEQ9), 1) ifeq ($(CORE), TSV110) CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 +ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 endif endif endif +endif From 34753eaebb8b2ddbc256e9e996c1fb315396a2a0 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 14 Mar 2021 17:28:43 +0100 Subject: [PATCH 127/134] Include common.h (and indirectly param.h) rather than just param.h to have BLASLONG available w/o circular dependencies --- getarch_2nd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/getarch_2nd.c b/getarch_2nd.c index c390ef52c..53ecccf30 100644 --- a/getarch_2nd.c +++ b/getarch_2nd.c @@ -4,7 +4,7 @@ #else #include "config_kernel.h" #endif -#include "param.h" +#include "common.h" int main(int argc, char **argv) { From ecb4babcf45e402d6e75702446cedd7242fd2ef8 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 14 Mar 2021 17:36:51 +0100 Subject: [PATCH 128/134] remove inclusion of common.h again to avoid circular dependency --- param.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/param.h b/param.h index 508cbb2a5..a37743ef4 100644 --- a/param.h +++ b/param.h @@ -72,8 +72,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef PARAM_H #define PARAM_H -#include "common.h" - #define SBGEMM_DEFAULT_UNROLL_N 4 #define SBGEMM_DEFAULT_UNROLL_M 8 #define SBGEMM_DEFAULT_UNROLL_MN 32 From e9d453b623ee23005604f0526aabbc2ab4128d6d Mon Sep 17 00:00:00 2001 From: xoviat Date: Sun, 14 Mar 2021 16:34:02 -0500 Subject: [PATCH 129/134] disable openmp --- appveyor.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 6197e85ab..c9b2fa3a1 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -31,7 +31,6 @@ environment: matrix: - COMPILER: clang-cl WITH_FORTRAN: ON - USE_OPENMP: ON - COMPILER: clang-cl DYNAMIC_ARCH: ON WITH_FORTRAN: OFF From 186368ddc3775540c147b6300693ccc0bcac7597 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 16 Mar 2021 16:52:57 +0100 Subject: [PATCH 130/134] Fix compilation with CLANG --- driver/others/dynamic_power.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index b8e5840a3..d9c15b312 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -40,7 +40,14 @@ char *gotoblas_corename(void) { return corename[0]; } -#ifdef C_PGI +#if defined(__clang__) +static int __builtin_cpu_supports(char* arg) +{ + return 0; +} +#endif + +#if defined(C_PGI) || defined(__clang__) /* * NV HPC compilers do not yet implement __builtin_cpu_is(). * Fake a version here for use in the CPU detection code below. From 8cdf0825debb529c55d06a7da22de366b049c4f5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 16 Mar 2021 21:20:05 +0100 Subject: [PATCH 131/134] Add workaround for older gcc on ppc64be not supporting casts in defines --- param.h | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/param.h b/param.h index a37743ef4..c41f75ec9 100644 --- a/param.h +++ b/param.h @@ -72,6 +72,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef PARAM_H #define PARAM_H +#define LONGCAST (BLASLONG) +#if defined(__BYTE_ORDER__) +#if __GNUC__ < 9 +#undef LONGCAST +#define LONGCAST +#endif +#endif + #define SBGEMM_DEFAULT_UNROLL_N 4 #define SBGEMM_DEFAULT_UNROLL_M 8 #define SBGEMM_DEFAULT_UNROLL_MN 32 @@ -2088,7 +2096,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef PPCG4 #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 1024 -#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL +#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2119,7 +2127,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 2688 #define GEMM_DEFAULT_OFFSET_B 3072 -#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL +#define GEMM_DEFAULT_ALIGN LONGCAST 0x03fffUL #if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) #define SGEMM_DEFAULT_UNROLL_M 4 @@ -2168,7 +2176,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A (32 * 0) #define GEMM_DEFAULT_OFFSET_B (32 * 0) -#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL +#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2204,7 +2212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A (32 * 0) #define GEMM_DEFAULT_OFFSET_B (32 * 0) -#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL +#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 8 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2239,7 +2247,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(POWER3) || defined(POWER4) || defined(POWER5) #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 2048 -#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL +#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2312,7 +2320,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 384 #define GEMM_DEFAULT_OFFSET_B 1024 -#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL +#define GEMM_DEFAULT_ALIGN LONGCAST 0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2345,7 +2353,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 65536 -#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL +#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL #if defined(__32BIT__) #warning using BINARY32==POWER6 #define SGEMM_DEFAULT_UNROLL_M 4 @@ -2398,7 +2406,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 65536 -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL #define SWITCH_RATIO 16 #define GEMM_PREFERED_SIZE 16 @@ -2437,7 +2445,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 65536 -#define GEMM_DEFAULT_ALIGN 0x0ffffUL +#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL #define SWITCH_RATIO 16 #define GEMM_PREFERED_SIZE 16 From 7888b5127c4e6a6ac457224583d931c29b3ec88e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 17 Mar 2021 16:17:55 +0100 Subject: [PATCH 132/134] Update Changelog for 0.3.14 --- Changelog.txt | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/Changelog.txt b/Changelog.txt index cbc7007ac..5662bc5c6 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,52 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.3.14 + 17-Mar-2021 + + common: + * Fixed a race condition on thread shutdown in non-OpenMP builds + * Fixed custom BUFFERSIZE option getting ignored in gmake builds + * Fixed CMAKE compilation of the TRMM kernels for GENERIC platforms + * Added CBLAS interfaces for CROTG, ZROTG, CSROT and ZDROT + * Improved performance of OMATCOPY_RT across all platforms + * Changed perl scripts to use env instead of a hardcoded /usr/bin/perl + * Fixed potential misreading of the GCC compiler version in the build scripts + * Fixed convergence problems in LAPACK complex GGEV/GGES (Reference-LAPACK #477) + * Reduced the stacksize requirements for running the LAPACK testsuite (Reference-LAPACK #335) + + RISCV: + * Fixed compilation on RISCV (missing entry in getarch) + + POWER: + * Fixed compilation for DYNAMIC_ARCH with clang and with old gcc versions + * Added support for compilation on FreeBSD/ppc64le + * Added optimized POWER10 kernels for SSCAL, DSCAL, CSCAL, ZSCAL + * Added optimized POWER10 kernels for SROT, DROT, CDOT, SASUM, DASUM + * Improved SSWAP, DSWAP, CSWAP, ZSWAP performance on POWER10 + * Improved SCOPY and CCOPY performance on POWER10 + * Improved SGEMM and DGEMM performance on POWER10 + * Added support for compilation with the NVIDIA HPC compiler + + x86_64: + * Added an optimized bfloat16 GEMM kernel for Cooperlake + * Added CPUID autodetection for Intel Rocket Lake and Tiger Lake cpus + * Improved the performance of SASUM,DASUM,SROT,DROT on AMD Ryzen cpus + * Added support for compilation with the NAG Fortran compiler + * Fixed recognition of the AMD AOCC compiler + * Fixed compilation for DYNAMIC_ARCH with clang on Windows + * Added support for running the BLAS/CBLAS tests on Windows + * Fixed signatures of the tls callback functions for Windows x64 + * Fixed various issues with fma intrinsics support handling + + ARM: + * Added support for embedded Cortex M targets via a new option EMBEDDED + + ARMV8: + * Fixed the THUNDERX2T99 and NEOVERSEN1 DNRM2/ZNRM2 kernels for inputs with Inf + * Added support for the DYNAMIC_LIST option + * Added support for compilation with the NVIDIA HPC compiler + * Added support for compiling with the NAG Fortran compiler + ==================================================================== Version 0.3.13 12-Dec-2020 From 2663e44724737bad34abdb6aff770d2c9c2fb09e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 17 Mar 2021 20:20:00 +0100 Subject: [PATCH 133/134] Update version to 0.3.14 for release --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f34d5337..3107ef9a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5) project(OpenBLAS C ASM) set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MINOR_VERSION 3) -set(OpenBLAS_PATCH_VERSION 13.dev) +set(OpenBLAS_PATCH_VERSION 14) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") # Adhere to GNU filesystem layout conventions From 86de5f768b7013c6c6788bc3e12df122b8c53196 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 17 Mar 2021 20:20:34 +0100 Subject: [PATCH 134/134] Update version to 0.3.14 for release --- Makefile.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.rule b/Makefile.rule index c68c20923..5a46bf6b0 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.3.13.dev +VERSION = 0.3.14 # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library