diff --git a/appveyor.yml b/appveyor.yml index 2f9cc7b0b..1936059d5 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -38,7 +38,8 @@ environment: - COMPILER: MinGW64-gcc-7.2.0-mingw DYNAMIC_ARCH: OFF WITH_FORTRAN: ignore - - COMPILER: MinGW64-gcc-7.2.0 + - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 + COMPILER: MinGW-gcc-6.3.0-32 - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 COMPILER: MinGW-gcc-5.3.0 WITH_FORTRAN: ignore @@ -62,10 +63,10 @@ before_build: - set PATH=%PATH:C:\Program Files\Git\usr\bin;=% - if [%COMPILER%]==[MinGW-gcc-5.3.0] set PATH=C:\MinGW\bin;C:\msys64\usr\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH% - if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] set PATH=C:\MinGW\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH% - - if [%COMPILER%]==[MinGW64-gcc-7.2.0] set PATH=C:\msys64\usr\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH% + - if [%COMPILER%]==[MinGW-gcc-6.3.0-32] set PATH=C:\msys64\usr\bin;C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw64\bin;%PATH% - if [%COMPILER%]==[cl] cmake -G "Visual Studio 15 2017 Win64" .. - if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 .. - - if [%COMPILER%]==[MinGW64-gcc-7.2.0] cmake -G "MSYS Makefiles" -DBINARY=32 -DNOFORTRAN=1 .. + - if [%COMPILER%]==[MinGW-gcc-6.3.0-32] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 .. - if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 .. - if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON .. - if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 .. diff --git a/cmake/cc.cmake b/cmake/cc.cmake index 98f9298f8..37da0d6ed 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -3,7 +3,7 @@ ## Description: Ported from portion of OpenBLAS/Makefile.system ## Sets C related variables. -if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang") +if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB" OR ${CMAKE_C_COMPILER_ID} MATCHES "Clang") set(CCOMMON_OPT "${CCOMMON_OPT} -Wall") set(COMMON_PROF "${COMMON_PROF} -fno-inline") @@ -43,7 +43,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR endif () endif () -if (${CMAKE_C_COMPILER} STREQUAL "PGI") +if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI") if (BINARY64) set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64") else () @@ -51,7 +51,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "PGI") endif () endif () -if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") +if (${CMAKE_C_COMPILER_ID} STREQUAL "PATHSCALE") if (BINARY64) set(CCOMMON_OPT "${CCOMMON_OPT} -m64") else () @@ -59,7 +59,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") endif () endif () -if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") +if (${CMAKE_C_COMPILER_ID} STREQUAL "OPEN64") if (MIPS64) @@ -87,7 +87,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") endif () endif () -if (${CMAKE_C_COMPILER} STREQUAL "SUN") +if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN") set(CCOMMON_OPT "${CCOMMON_OPT} -w") if (X86) set(CCOMMON_OPT "${CCOMMON_OPT} -m32") diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 2fe168a1c..086df1943 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -105,6 +105,7 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS # Perhaps this should be inside a different file as it grows larger file(APPEND ${TARGET_CONF_TEMP} "#define ${TCORE}\n" + "#define CORE_${TCORE}\n" "#define CHAR_CORENAME \"${TCORE}\"\n") if ("${TCORE}" STREQUAL "CORE2") file(APPEND ${TARGET_CONF_TEMP} @@ -119,15 +120,23 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS "#define HAVE_SSE\n" "#define HAVE_SSE2\n" "#define HAVE_SSE3\n" - "#define HAVE_SSSE3\n") + "#define HAVE_SSSE3\n" + "#define SLOCAL_BUFFER_SIZE\t16384\n" + "#define DLOCAL_BUFFER_SIZE\t16384\n" + "#define CLOCAL_BUFFER_SIZE\t16384\n" + "#define ZLOCAL_BUFFER_SIZE\t16384\n") set(SGEMM_UNROLL_M 8) set(SGEMM_UNROLL_N 4) set(DGEMM_UNROLL_M 4) set(DGEMM_UNROLL_N 4) - set(CGEMM_DEFAULT_UNROLL_M 4) - set(CGEMM_DEFAULT_UNROLL_N 2) - set(ZGEMM_DEFAULT_UNROLL_M 2) - set(ZGEMM_DEFAULT_UNROLL_N 2) + set(CGEMM_UNROLL_M 4) + set(CGEMM_UNROLL_N 2) + set(ZGEMM_UNROLL_M 2) + set(ZGEMM_UNROLL_N 2) + set(CGEMM3M_UNROLL_M 8) + set(CGEMM3M_UNROLL_N 4) + set(ZGEMM3M_UNROLL_M 4) + set(ZGEMM3M_UNROLL_N 4) elseif ("${TCORE}" STREQUAL "ARMV7") file(APPEND ${TARGET_CONF_TEMP} "#define L1_DATA_SIZE\t65536\n" @@ -143,6 +152,10 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS set(SGEMM_UNROLL_N 4) set(DGEMM_UNROLL_M 4) set(DGEMM_UNROLL_N 4) + set(CGEMM_UNROLL_M 2) + set(CGEMM_UNROLL_N 2) + set(ZGEMM_UNROLL_M 2) + set(ZGEMM_UNROLL_N 2) elseif ("${TCORE}" STREQUAL "ARMV8") file(APPEND ${TARGET_CONF_TEMP} "#define L1_DATA_SIZE\t32768\n" @@ -331,6 +344,9 @@ else(NOT CMAKE_CROSSCOMPILING) set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC) else() list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S) + if (DEFINED TARGET_CORE) + set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_${TARGET_CORE}) + endif () endif () if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") diff --git a/cmake/system_check.cmake b/cmake/system_check.cmake index 610f689e0..c4a553c5a 100644 --- a/cmake/system_check.cmake +++ b/cmake/system_check.cmake @@ -39,10 +39,18 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc.*|power.*|Power.*") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "mips64.*") set(MIPS64 1) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") - if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") - set(X86_64 1) + if (NOT BINARY) + if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + set(X86_64 1) + else() + set(X86 1) + endif() else() - set(X86 1) + if (${BINARY} EQUAL "64") + set(X86_64 1) + else () + set(X86 1) + endif() endif() elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*") set(X86 1) @@ -54,6 +62,22 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") else() set(ARM 1) endif() +elseif (${CMAKE_CROSSCOMPILING}) + if (${TARGET} STREQUAL "CORE2") + if (NOT BINARY) + set(X86 1) + elseif (${BINARY} EQUAL "64") + set(X86_64 1) + else () + set(X86 1) + endif() + elseif (${TARGET} STREQUAL "ARMV7") + set(ARM 1) + else() + set(ARM64 1) + endif () +else () + message(WARNING "Target ARCH could not be determined, got \"${CMAKE_SYSTEM_PROCESSOR}\"") endif() if (X86_64) @@ -92,4 +116,3 @@ set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512") endif() file(REMOVE "avx512.tmp" "avx512.o") endif() - diff --git a/common_lapack.h b/common_lapack.h index f6d1956fc..f9c36646a 100644 --- a/common_lapack.h +++ b/common_lapack.h @@ -293,4 +293,150 @@ blasint zlarf_R(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLO blasint xlarf_L(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); blasint xlarf_R(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint strtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint dtrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint qtrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint ctrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_URU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_URN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LRU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LRN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ztrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_URU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_URN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LRU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LRN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint xtrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_URU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_URN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LRU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LRN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); + +blasint strtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint dtrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint qtrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint ctrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_URU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_URN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LRU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LRN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ztrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_URU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_URN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LRU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LRN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint xtrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_URU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_URN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LRU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LRN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); + #endif diff --git a/common_macro.h b/common_macro.h index d2503aa65..13bb85794 100644 --- a/common_macro.h +++ b/common_macro.h @@ -641,7 +641,7 @@ #define IMATCOPY_K_CT DIMATCOPY_K_CT #define IMATCOPY_K_RT DIMATCOPY_K_RT -#define GEADD_K DGEADD_K +#define GEADD_K DGEADD_K #else #define AMAX_K SAMAX_K @@ -944,7 +944,7 @@ #define IMATCOPY_K_CT SIMATCOPY_K_CT #define IMATCOPY_K_RT SIMATCOPY_K_RT -#define GEADD_K SGEADD_K +#define GEADD_K SGEADD_K #endif #else #ifdef XDOUBLE @@ -1770,7 +1770,7 @@ #define IMATCOPY_K_CTC ZIMATCOPY_K_CTC #define IMATCOPY_K_RTC ZIMATCOPY_K_RTC -#define GEADD_K ZGEADD_K +#define GEADD_K ZGEADD_K #else @@ -2193,7 +2193,7 @@ #define IMATCOPY_K_CTC CIMATCOPY_K_CTC #define IMATCOPY_K_RTC CIMATCOPY_K_RTC -#define GEADD_K CGEADD_K +#define GEADD_K CGEADD_K #endif #endif @@ -2806,3 +2806,160 @@ typedef struct { #endif #endif + +#ifndef COMPLEX +#ifdef XDOUBLE +#define TRTRS_UNU_SINGLE qtrtrs_UNU_single +#define TRTRS_UNN_SINGLE qtrtrs_UNN_single +#define TRTRS_UTU_SINGLE qtrtrs_UTU_single +#define TRTRS_UTN_SINGLE qtrtrs_UTN_single +#define TRTRS_LNU_SINGLE qtrtrs_LNU_single +#define TRTRS_LNN_SINGLE qtrtrs_LNN_single +#define TRTRS_LTU_SINGLE qtrtrs_LTU_single +#define TRTRS_LTN_SINGLE qtrtrs_LTN_single +#define TRTRS_UNU_PARALLEL qtrtrs_UNU_parallel +#define TRTRS_UNN_PARALLEL qtrtrs_UNN_parallel +#define TRTRS_UTU_PARALLEL qtrtrs_UTU_parallel +#define TRTRS_UTN_PARALLEL qtrtrs_UTN_parallel +#define TRTRS_LNU_PARALLEL qtrtrs_LNU_parallel +#define TRTRS_LNN_PARALLEL qtrtrs_LNN_parallel +#define TRTRS_LTU_PARALLEL qtrtrs_LTU_parallel +#define TRTRS_LTN_PARALLEL qtrtrs_LTN_parallel + +#elif defined(DOUBLE) +#define TRTRS_UNU_SINGLE dtrtrs_UNU_single +#define TRTRS_UNN_SINGLE dtrtrs_UNN_single +#define TRTRS_UTU_SINGLE dtrtrs_UTU_single +#define TRTRS_UTN_SINGLE dtrtrs_UTN_single +#define TRTRS_LNU_SINGLE dtrtrs_LNU_single +#define TRTRS_LNN_SINGLE dtrtrs_LNN_single +#define TRTRS_LTU_SINGLE dtrtrs_LTU_single +#define TRTRS_LTN_SINGLE dtrtrs_LTN_single +#define TRTRS_UNU_PARALLEL dtrtrs_UNU_parallel +#define TRTRS_UNN_PARALLEL dtrtrs_UNN_parallel +#define TRTRS_UTU_PARALLEL dtrtrs_UTU_parallel +#define TRTRS_UTN_PARALLEL dtrtrs_UTN_parallel +#define TRTRS_LNU_PARALLEL dtrtrs_LNU_parallel +#define TRTRS_LNN_PARALLEL dtrtrs_LNN_parallel +#define TRTRS_LTU_PARALLEL dtrtrs_LTU_parallel +#define TRTRS_LTN_PARALLEL dtrtrs_LTN_parallel +#else +#define TRTRS_UNU_SINGLE strtrs_UNU_single +#define TRTRS_UNN_SINGLE strtrs_UNN_single +#define TRTRS_UTU_SINGLE strtrs_UTU_single +#define TRTRS_UTN_SINGLE strtrs_UTN_single +#define TRTRS_LNU_SINGLE strtrs_LNU_single +#define TRTRS_LNN_SINGLE strtrs_LNN_single +#define TRTRS_LTU_SINGLE strtrs_LTU_single +#define TRTRS_LTN_SINGLE strtrs_LTN_single +#define TRTRS_UNU_PARALLEL strtrs_UNU_parallel +#define TRTRS_UNN_PARALLEL strtrs_UNN_parallel +#define TRTRS_UTU_PARALLEL strtrs_UTU_parallel +#define TRTRS_UTN_PARALLEL strtrs_UTN_parallel +#define TRTRS_LNU_PARALLEL strtrs_LNU_parallel +#define TRTRS_LNN_PARALLEL strtrs_LNN_parallel +#define TRTRS_LTU_PARALLEL strtrs_LTU_parallel +#define TRTRS_LTN_PARALLEL strtrs_LTN_parallel +#endif +#else +#ifdef XDOUBLE +#define TRTRS_UNU_SINGLE xtrtrs_UNU_single +#define TRTRS_UNN_SINGLE xtrtrs_UNN_single +#define TRTRS_UTU_SINGLE xtrtrs_UTU_single +#define TRTRS_UTN_SINGLE xtrtrs_UTN_single +#define TRTRS_URU_SINGLE xtrtrs_URU_single +#define TRTRS_URN_SINGLE xtrtrs_URN_single +#define TRTRS_UCU_SINGLE xtrtrs_UCU_single +#define TRTRS_UCN_SINGLE xtrtrs_UCN_single +#define TRTRS_LNU_SINGLE xtrtrs_LNU_single +#define TRTRS_LNN_SINGLE xtrtrs_LNN_single +#define TRTRS_LTU_SINGLE xtrtrs_LTU_single +#define TRTRS_LTN_SINGLE xtrtrs_LTN_single +#define TRTRS_LRU_SINGLE xtrtrs_LRU_single +#define TRTRS_LRN_SINGLE xtrtrs_LRN_single +#define TRTRS_LCU_SINGLE xtrtrs_LCU_single +#define TRTRS_LCN_SINGLE xtrtrs_LCN_single +#define TRTRS_UNU_PARALLEL xtrtrs_UNU_parallel +#define TRTRS_UNN_PARALLEL xtrtrs_UNN_parallel +#define TRTRS_UTU_PARALLEL xtrtrs_UTU_parallel +#define TRTRS_UTN_PARALLEL xtrtrs_UTN_parallel +#define TRTRS_URU_PARALLEL xtrtrs_URU_parallel +#define TRTRS_URN_PARALLEL xtrtrs_URN_parallel +#define TRTRS_UCU_PARALLEL xtrtrs_UCU_parallel +#define TRTRS_UCN_PARALLEL xtrtrs_UCN_parallel +#define TRTRS_LNU_PARALLEL xtrtrs_LNU_parallel +#define TRTRS_LNN_PARALLEL xtrtrs_LNN_parallel +#define TRTRS_LTU_PARALLEL xtrtrs_LTU_parallel +#define TRTRS_LTN_PARALLEL xtrtrs_LTN_parallel +#define TRTRS_LRU_PARALLEL xtrtrs_LRU_parallel +#define TRTRS_LRN_PARALLEL xtrtrs_LRN_parallel +#define TRTRS_LCU_PARALLEL xtrtrs_LCU_parallel +#define TRTRS_LCN_PARALLEL xtrtrs_LCN_parallel +#elif defined(DOUBLE) +#define TRTRS_UNU_SINGLE ztrtrs_UNU_single +#define TRTRS_UNN_SINGLE ztrtrs_UNN_single +#define TRTRS_UTU_SINGLE ztrtrs_UTU_single +#define TRTRS_UTN_SINGLE ztrtrs_UTN_single +#define TRTRS_URU_SINGLE ztrtrs_URU_single +#define TRTRS_URN_SINGLE ztrtrs_URN_single +#define TRTRS_UCU_SINGLE ztrtrs_UCU_single +#define TRTRS_UCN_SINGLE ztrtrs_UCN_single +#define TRTRS_LNU_SINGLE ztrtrs_LNU_single +#define TRTRS_LNN_SINGLE ztrtrs_LNN_single +#define TRTRS_LTU_SINGLE ztrtrs_LTU_single +#define TRTRS_LTN_SINGLE ztrtrs_LTN_single +#define TRTRS_LRU_SINGLE ztrtrs_LRU_single +#define TRTRS_LRN_SINGLE ztrtrs_LRN_single +#define TRTRS_LCU_SINGLE ztrtrs_LCU_single +#define TRTRS_LCN_SINGLE ztrtrs_LCN_single +#define TRTRS_UNU_PARALLEL ztrtrs_UNU_parallel +#define TRTRS_UNN_PARALLEL ztrtrs_UNN_parallel +#define TRTRS_UTU_PARALLEL ztrtrs_UTU_parallel +#define TRTRS_UTN_PARALLEL ztrtrs_UTN_parallel +#define TRTRS_URU_PARALLEL ztrtrs_URU_parallel +#define TRTRS_URN_PARALLEL ztrtrs_URN_parallel +#define TRTRS_UCU_PARALLEL ztrtrs_UCU_parallel +#define TRTRS_UCN_PARALLEL ztrtrs_UCN_parallel +#define TRTRS_LNU_PARALLEL ztrtrs_LNU_parallel +#define TRTRS_LNN_PARALLEL ztrtrs_LNN_parallel +#define TRTRS_LTU_PARALLEL ztrtrs_LTU_parallel +#define TRTRS_LTN_PARALLEL ztrtrs_LTN_parallel +#define TRTRS_LRU_PARALLEL ztrtrs_LRU_parallel +#define TRTRS_LRN_PARALLEL ztrtrs_LRN_parallel +#define TRTRS_LCU_PARALLEL ztrtrs_LCU_parallel +#define TRTRS_LCN_PARALLEL ztrtrs_LCN_parallel +#else +#define TRTRS_UNU_SINGLE ctrtrs_UNU_single +#define TRTRS_UNN_SINGLE ctrtrs_UNN_single +#define TRTRS_UTU_SINGLE ctrtrs_UTU_single +#define TRTRS_UTN_SINGLE ctrtrs_UTN_single +#define TRTRS_URU_SINGLE ctrtrs_URU_single +#define TRTRS_URN_SINGLE ctrtrs_URN_single +#define TRTRS_UCU_SINGLE ctrtrs_UCU_single +#define TRTRS_UCN_SINGLE ctrtrs_UCN_single +#define TRTRS_LNU_SINGLE ctrtrs_LNU_single +#define TRTRS_LNN_SINGLE ctrtrs_LNN_single +#define TRTRS_LTU_SINGLE ctrtrs_LTU_single +#define TRTRS_LTN_SINGLE ctrtrs_LTN_single +#define TRTRS_LRU_SINGLE ctrtrs_LRU_single +#define TRTRS_LRN_SINGLE ctrtrs_LRN_single +#define TRTRS_LCU_SINGLE ctrtrs_LCU_single +#define TRTRS_LCN_SINGLE ctrtrs_LCN_single +#define TRTRS_UNU_PARALLEL ctrtrs_UNU_parallel +#define TRTRS_UNN_PARALLEL ctrtrs_UNN_parallel +#define TRTRS_UTU_PARALLEL ctrtrs_UTU_parallel +#define TRTRS_UTN_PARALLEL ctrtrs_UTN_parallel +#define TRTRS_URU_PARALLEL ctrtrs_URU_parallel +#define TRTRS_URN_PARALLEL ctrtrs_URN_parallel +#define TRTRS_UCU_PARALLEL ctrtrs_UCU_parallel +#define TRTRS_UCN_PARALLEL ctrtrs_UCN_parallel +#define TRTRS_LNU_PARALLEL ctrtrs_LNU_parallel +#define TRTRS_LNN_PARALLEL ctrtrs_LNN_parallel +#define TRTRS_LTU_PARALLEL ctrtrs_LTU_parallel +#define TRTRS_LTN_PARALLEL ctrtrs_LTN_parallel +#define TRTRS_LRU_PARALLEL ctrtrs_LRU_parallel +#define TRTRS_LRN_PARALLEL ctrtrs_LRN_parallel +#define TRTRS_LCU_PARALLEL ctrtrs_LCU_parallel +#define TRTRS_LCN_PARALLEL ctrtrs_LCN_parallel +#endif +#endif diff --git a/driver/others/openblas_get_config.c b/driver/others/openblas_get_config.c index 81648fb7c..7fefee33d 100644 --- a/driver/others/openblas_get_config.c +++ b/driver/others/openblas_get_config.c @@ -78,10 +78,10 @@ char tmpstr[20]; #ifdef DYNAMIC_ARCH strcat(tmp_config_str, gotoblas_corename()); #endif -if (openblas_get_parallel() == 0) - sprintf(tmpstr, " SINGLE_THREADED"); -else - snprintf(tmpstr,19," MAX_THREADS=%d",MAX_CPU_NUMBER); + if (openblas_get_parallel() == 0) + sprintf(tmpstr, " SINGLE_THREADED"); + else + snprintf(tmpstr,19," MAX_THREADS=%d",MAX_CPU_NUMBER); strcat(tmp_config_str, tmpstr); return tmp_config_str; } diff --git a/interface/Makefile b/interface/Makefile index f0577796d..3f0dcca28 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -394,7 +394,7 @@ XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) SLAPACKOBJS = \ sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \ - slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) + slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) strtrs.$(SUFFIX) #DLAPACKOBJS = \ @@ -405,14 +405,14 @@ SLAPACKOBJS = \ DLAPACKOBJS = \ dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \ dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \ - dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) + dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dtrtrs.$(SUFFIX) QLAPACKOBJS = \ qgetf2.$(SUFFIX) qgetrf.$(SUFFIX) qlauu2.$(SUFFIX) qlauum.$(SUFFIX) \ qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \ - qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \ - + qlaswp.$(SUFFIX) qtrtrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \ + qtrtrs.$(SUFFIX) #CLAPACKOBJS = \ # cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ @@ -423,7 +423,7 @@ QLAPACKOBJS = \ CLAPACKOBJS = \ cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ - clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) + clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX) #ZLAPACKOBJS = \ @@ -435,13 +435,14 @@ CLAPACKOBJS = \ ZLAPACKOBJS = \ zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ - zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) + zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) ztrtrs.$(SUFFIX) XLAPACKOBJS = \ xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \ xpotf2.$(SUFFIX) xpotrf.$(SUFFIX) xtrti2.$(SUFFIX) xtrtri.$(SUFFIX) \ - xlaswp.$(SUFFIX) xgetrs.$(SUFFIX) xgesv.$(SUFFIX) xpotri.$(SUFFIX) \ + xlaswp.$(SUFFIX) xtrtrs.$(SUFFIX) xgesv.$(SUFFIX) xpotri.$(SUFFIX) \ + xtrtrs.$(SUFFIX) ifneq ($(NO_LAPACK), 1) SBLASOBJS += $(SLAPACKOBJS) @@ -2031,7 +2032,7 @@ sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : lapack/getrs.c dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : lapack/getrs.c $(CC) -c $(CFLAGS) $< -o $(@F) -qgetrs.$(SUFFIX) qgetrs.$(PSUFFIX) : getrs.c +qgetrs.$(SUFFIX) qgetrs.$(PSUFFIX) : lapack/getrs.c $(CC) -c $(CFLAGS) $< -o $(@F) cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : lapack/zgetrs.c @@ -2040,7 +2041,25 @@ cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : lapack/zgetrs.c zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : lapack/zgetrs.c $(CC) -c $(CFLAGS) $< -o $(@F) -xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : zgetrs.c +xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : lapack/zgetrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +strtrs.$(SUFFIX) strtrs.$(PSUFFIX) : lapack/trtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtrtrs.$(SUFFIX) dtrtrs.$(PSUFFIX) : lapack/trtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtrtrs.$(SUFFIX) qtrtrs.$(PSUFFIX) : lapack/trtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctrtrs.$(SUFFIX) ctrtrs.$(PSUFFIX) : lapack/ztrtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztrtrs.$(SUFFIX) ztrtrs.$(PSUFFIX) : lapack/ztrtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtrtrs.$(SUFFIX) xtrtrs.$(PSUFFIX) : lapack/ztrtrs.c $(CC) -c $(CFLAGS) $< -o $(@F) sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : lapack/gesv.c diff --git a/interface/lapack/trtrs.c b/interface/lapack/trtrs.c new file mode 100644 index 000000000..54fbe8394 --- /dev/null +++ b/interface/lapack/trtrs.c @@ -0,0 +1,171 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QTRTRS" +#elif defined(DOUBLE) +#define ERROR_NAME "DTRTRS" +#else +#define ERROR_NAME "STRTRS" +#endif + +static blasint (*trtrs_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + TRTRS_UNU_SINGLE, TRTRS_UNN_SINGLE, TRTRS_UTU_SINGLE, TRTRS_UTN_SINGLE, TRTRS_LNU_SINGLE, TRTRS_LNN_SINGLE, TRTRS_LTU_SINGLE, TRTRS_LTN_SINGLE, +}; + +#ifdef SMP +static blasint (*trtrs_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + TRTRS_UNU_PARALLEL, TRTRS_UNN_PARALLEL, TRTRS_UTU_PARALLEL, TRTRS_UTN_PARALLEL, TRTRS_LNU_PARALLEL, TRTRS_LNN_PARALLEL, TRTRS_LTU_PARALLEL, TRTRS_LTN_PARALLEL, +}; +#endif + +int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA, + FLOAT *b, blasint *ldB, blasint *Info){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blas_arg_t args; + + blasint info; + int uplo, trans, diag; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.m = *N; + args.n = *NRHS; + args.a = (void *)a; + args.lda = *ldA; + args.b = (void *)b; + args.ldb = *ldB; + + info = 0; + + TOUPPER(trans_arg); + trans = -1; + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 0; + if (trans_arg == 'C') trans = 1; + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + diag = -1; + if (diag_arg == 'U') diag = 0; + if (diag_arg == 'N') diag = 1; + + if (args.ldb < MAX(1, args.m)) info = 9; + if (args.lda < MAX(1, args.m)) info = 7; + if (args.n < 0) info = 5; + if (args.m < 0) info = 4; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + if (diag < 0) info = 3; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME) - 1); + *Info = - info; + return 0; + } + + args.alpha = NULL; + args.beta = NULL; + + *Info = 0; + + if (args.m == 0) return 0; + + if (diag) { + if (AMIN_K(args.m, args.a, args.lda + 1) == ZERO) { + *Info = IAMIN_K(args.m, args.a, args.lda + 1); + return 0; + } + } + + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + (trtrs_single[(uplo << 2) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + (trtrs_parallel[(uplo << 2) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2 * args.m * args.m * args.n); + + IDEBUG_END; + + return 0; + +} diff --git a/interface/lapack/ztrtrs.c b/interface/lapack/ztrtrs.c new file mode 100644 index 000000000..7f1bd9af4 --- /dev/null +++ b/interface/lapack/ztrtrs.c @@ -0,0 +1,171 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XTRTRS" +#elif defined(DOUBLE) +#define ERROR_NAME "ZTRTRS" +#else +#define ERROR_NAME "CTRTRS" +#endif + +static blasint (*trtrs_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + TRTRS_UNU_SINGLE, TRTRS_UNN_SINGLE, TRTRS_UTU_SINGLE, TRTRS_UTN_SINGLE, TRTRS_URU_SINGLE, TRTRS_URN_SINGLE, TRTRS_UCU_SINGLE, TRTRS_UCN_SINGLE, TRTRS_LNU_SINGLE, TRTRS_LNN_SINGLE, TRTRS_LTU_SINGLE, TRTRS_LTN_SINGLE, TRTRS_LRU_SINGLE, TRTRS_LRN_SINGLE, TRTRS_LCU_SINGLE, TRTRS_LCN_SINGLE, +}; + +#ifdef SMP +static blasint (*trtrs_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + TRTRS_UNU_PARALLEL, TRTRS_UNN_PARALLEL, TRTRS_UTU_PARALLEL, TRTRS_UTN_PARALLEL, TRTRS_URU_PARALLEL, TRTRS_URN_PARALLEL, TRTRS_UCU_PARALLEL, TRTRS_UCN_PARALLEL, TRTRS_LNU_PARALLEL, TRTRS_LNN_PARALLEL, TRTRS_LTU_PARALLEL, TRTRS_LTN_PARALLEL, TRTRS_LRU_PARALLEL, TRTRS_LRN_PARALLEL, TRTRS_LCU_PARALLEL, TRTRS_LCN_PARALLEL, +}; +#endif + +int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA, + FLOAT *b, blasint *ldB, blasint *Info){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blas_arg_t args; + + blasint info; + int uplo, trans, diag; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.m = *N; + args.n = *NRHS; + args.a = (void *)a; + args.lda = *ldA; + args.b = (void *)b; + args.ldb = *ldB; + + info = 0; + + TOUPPER(trans_arg); + trans = -1; + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 2; + if (trans_arg == 'C') trans = 3; + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + diag = -1; + if (diag_arg == 'U') diag = 0; + if (diag_arg == 'N') diag = 1; + + if (args.ldb < MAX(1, args.m)) info = 9; + if (args.lda < MAX(1, args.m)) info = 7; + if (args.n < 0) info = 5; + if (args.m < 0) info = 4; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + if (diag < 0) info = 3; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME) - 1); + *Info = - info; + return 0; + } + + args.alpha = NULL; + args.beta = NULL; + + *Info = 0; + + if (args.m == 0) return 0; + + if (diag) { + if (AMIN_K(args.m, args.a, args.lda + 1) == ZERO) { + *Info = IAMIN_K(args.m, args.a, args.lda + 1); + return 0; + } + } + + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + (trtrs_single[(uplo << 3) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + (trtrs_parallel[(uplo << 3) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2 * args.m * args.m * args.n); + + IDEBUG_END; + + return 0; + +} diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index 87a8f51e4..1c276aff6 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -507,22 +507,22 @@ ALL_AUX_OBJS = xerbla.o ../INSTALL/lsame.o SLAPACKOBJS = \ sgetrf.o sgetrs.o spotrf.o sgetf2.o \ spotf2.o slaswp.o sgesv.o slauu2.o \ - slauum.o strti2.o strtri.o + slauum.o strti2.o strtri.o strtrs.o DLAPACKOBJS = \ dgetrf.o dgetrs.o dpotrf.o dgetf2.o \ dpotf2.o dlaswp.o dgesv.o dlauu2.o \ - dlauum.o dtrti2.o dtrtri.o + dlauum.o dtrti2.o dtrtri.o dtrtrs.o CLAPACKOBJS = \ cgetrf.o cgetrs.o cpotrf.o cgetf2.o \ cpotf2.o claswp.o cgesv.o clauu2.o \ - clauum.o ctrti2.o ctrtri.o + clauum.o ctrti2.o ctrtri.o ctrtrs.o ZLAPACKOBJS = \ zgetrf.o zgetrs.o zpotrf.o zgetf2.o \ zpotf2.o zlaswp.o zgesv.o zlauu2.o \ - zlauum.o ztrti2.o ztrtri.o + zlauum.o ztrti2.o ztrtri.o ztrtrs.o ALLAUX = $(filter-out $(ALL_AUX_OBJS),$(ALLAUX_O)) diff --git a/lapack/Makefile b/lapack/Makefile index aff5209d5..2bbb4603f 100644 --- a/lapack/Makefile +++ b/lapack/Makefile @@ -2,7 +2,7 @@ TOPDIR = .. include ../Makefile.system #SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs -SUBDIRS = getrf getf2 laswp getrs potrf potf2 lauu2 lauum trti2 trtri +SUBDIRS = getrf getf2 laswp getrs potrf potf2 lauu2 lauum trti2 trtri trtrs FLAMEDIRS = laswp getf2 potf2 lauu2 trti2 diff --git a/lapack/trtri/trtri_L_parallel.c b/lapack/trtri/trtri_L_parallel.c index 5dc60b862..fb8c8fc77 100644 --- a/lapack/trtri/trtri_L_parallel.c +++ b/lapack/trtri/trtri_L_parallel.c @@ -54,7 +54,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, BLASLONG n, info; BLASLONG bk, i, blocking, start_i; int mode; - BLASLONG lda, range_N[2]; + BLASLONG lda; // , range_N[2]; blas_arg_t newarg; FLOAT *a; FLOAT alpha[2] = { ONE, ZERO}; @@ -100,8 +100,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, bk = n - i; if (bk > blocking) bk = blocking; - range_N[0] = i; - range_N[1] = i + bk; + /* range_N[0] = i; + range_N[1] = i + bk; */ newarg.lda = lda; newarg.ldb = lda; diff --git a/lapack/trtri/trtri_U_parallel.c b/lapack/trtri/trtri_U_parallel.c index fc48a33f1..5287421d6 100644 --- a/lapack/trtri/trtri_U_parallel.c +++ b/lapack/trtri/trtri_U_parallel.c @@ -54,7 +54,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, BLASLONG n, info; BLASLONG bk, i, blocking; int mode; - BLASLONG lda, range_N[2]; + BLASLONG lda; //, range_N[2]; blas_arg_t newarg; FLOAT *a; FLOAT alpha[2] = { ONE, ZERO}; @@ -96,8 +96,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, bk = n - i; if (bk > blocking) bk = blocking; - range_N[0] = i; - range_N[1] = i + bk; + /* range_N[0] = i; + range_N[1] = i + bk; */ newarg.lda = lda; newarg.ldb = lda; diff --git a/lapack/trtrs/Makefile b/lapack/trtrs/Makefile new file mode 100644 index 000000000..a3b8f4322 --- /dev/null +++ b/lapack/trtrs/Makefile @@ -0,0 +1,452 @@ +TOPDIR = ../.. +include ../../Makefile.system + +SBLASOBJS = strtrs_UNU_single.$(SUFFIX) strtrs_UNN_single.$(SUFFIX) strtrs_UTU_single.$(SUFFIX) strtrs_UTN_single.$(SUFFIX) strtrs_LNU_single.$(SUFFIX) strtrs_LNN_single.$(SUFFIX) strtrs_LTU_single.$(SUFFIX) strtrs_LTN_single.$(SUFFIX) +DBLASOBJS = dtrtrs_UNU_single.$(SUFFIX) dtrtrs_UNN_single.$(SUFFIX) dtrtrs_UTU_single.$(SUFFIX) dtrtrs_UTN_single.$(SUFFIX) dtrtrs_LNU_single.$(SUFFIX) dtrtrs_LNN_single.$(SUFFIX) dtrtrs_LTU_single.$(SUFFIX) dtrtrs_LTN_single.$(SUFFIX) +QBLASOBJS = qtrtrs_UNU_single.$(SUFFIX) qtrtrs_UNN_single.$(SUFFIX) qtrtrs_UTU_single.$(SUFFIX) qtrtrs_UTN_single.$(SUFFIX) qtrtrs_LNU_single.$(SUFFIX) qtrtrs_LNN_single.$(SUFFIX) qtrtrs_LTU_single.$(SUFFIX) qtrtrs_LTN_single.$(SUFFIX) +CBLASOBJS = ctrtrs_UNU_single.$(SUFFIX) ctrtrs_UNN_single.$(SUFFIX) ctrtrs_UTU_single.$(SUFFIX) ctrtrs_UTN_single.$(SUFFIX) ctrtrs_URU_single.$(SUFFIX) ctrtrs_URN_single.$(SUFFIX) ctrtrs_UCU_single.$(SUFFIX) ctrtrs_UCN_single.$(SUFFIX) ctrtrs_LNU_single.$(SUFFIX) ctrtrs_LNN_single.$(SUFFIX) ctrtrs_LTU_single.$(SUFFIX) ctrtrs_LTN_single.$(SUFFIX) ctrtrs_LRU_single.$(SUFFIX) ctrtrs_LRN_single.$(SUFFIX) ctrtrs_LCU_single.$(SUFFIX) ctrtrs_LCN_single.$(SUFFIX) +ZBLASOBJS = ztrtrs_UNU_single.$(SUFFIX) ztrtrs_UNN_single.$(SUFFIX) ztrtrs_UTU_single.$(SUFFIX) ztrtrs_UTN_single.$(SUFFIX) ztrtrs_URU_single.$(SUFFIX) ztrtrs_URN_single.$(SUFFIX) ztrtrs_UCU_single.$(SUFFIX) ztrtrs_UCN_single.$(SUFFIX) ztrtrs_LNU_single.$(SUFFIX) ztrtrs_LNN_single.$(SUFFIX) ztrtrs_LTU_single.$(SUFFIX) ztrtrs_LTN_single.$(SUFFIX) ztrtrs_LRU_single.$(SUFFIX) ztrtrs_LRN_single.$(SUFFIX) ztrtrs_LCU_single.$(SUFFIX) ztrtrs_LCN_single.$(SUFFIX) +XBLASOBJS = xtrtrs_UNU_single.$(SUFFIX) xtrtrs_UNN_single.$(SUFFIX) xtrtrs_UTU_single.$(SUFFIX) xtrtrs_UTN_single.$(SUFFIX) xtrtrs_URU_single.$(SUFFIX) xtrtrs_URN_single.$(SUFFIX) xtrtrs_UCU_single.$(SUFFIX) xtrtrs_UCN_single.$(SUFFIX) xtrtrs_LNU_single.$(SUFFIX) xtrtrs_LNN_single.$(SUFFIX) xtrtrs_LTU_single.$(SUFFIX) xtrtrs_LTN_single.$(SUFFIX) xtrtrs_LRU_single.$(SUFFIX) xtrtrs_LRN_single.$(SUFFIX) xtrtrs_LCU_single.$(SUFFIX) xtrtrs_LCN_single.$(SUFFIX) + +ifdef SMP +SBLASOBJS += strtrs_UNU_parallel.$(SUFFIX) strtrs_UNN_parallel.$(SUFFIX) strtrs_UTU_parallel.$(SUFFIX) strtrs_UTN_parallel.$(SUFFIX) strtrs_LNU_parallel.$(SUFFIX) strtrs_LNN_parallel.$(SUFFIX) strtrs_LTU_parallel.$(SUFFIX) strtrs_LTN_parallel.$(SUFFIX) +DBLASOBJS += dtrtrs_UNU_parallel.$(SUFFIX) dtrtrs_UNN_parallel.$(SUFFIX) dtrtrs_UTU_parallel.$(SUFFIX) dtrtrs_UTN_parallel.$(SUFFIX) dtrtrs_LNU_parallel.$(SUFFIX) dtrtrs_LNN_parallel.$(SUFFIX) dtrtrs_LTU_parallel.$(SUFFIX) dtrtrs_LTN_parallel.$(SUFFIX) +QBLASOBJS += qtrtrs_UNU_parallel.$(SUFFIX) qtrtrs_UNN_parallel.$(SUFFIX) qtrtrs_UTU_parallel.$(SUFFIX) qtrtrs_UTN_parallel.$(SUFFIX) qtrtrs_LNU_parallel.$(SUFFIX) qtrtrs_LNN_parallel.$(SUFFIX) qtrtrs_LTU_parallel.$(SUFFIX) qtrtrs_LTN_parallel.$(SUFFIX) +CBLASOBJS += ctrtrs_UNU_parallel.$(SUFFIX) ctrtrs_UNN_parallel.$(SUFFIX) ctrtrs_UTU_parallel.$(SUFFIX) ctrtrs_UTN_parallel.$(SUFFIX) ctrtrs_URU_parallel.$(SUFFIX) ctrtrs_URN_parallel.$(SUFFIX) ctrtrs_UCU_parallel.$(SUFFIX) ctrtrs_UCN_parallel.$(SUFFIX) ctrtrs_LNU_parallel.$(SUFFIX) ctrtrs_LNN_parallel.$(SUFFIX) ctrtrs_LTU_parallel.$(SUFFIX) ctrtrs_LTN_parallel.$(SUFFIX) ctrtrs_LRU_parallel.$(SUFFIX) ctrtrs_LRN_parallel.$(SUFFIX) ctrtrs_LCU_parallel.$(SUFFIX) ctrtrs_LCN_parallel.$(SUFFIX) +ZBLASOBJS += ztrtrs_UNU_parallel.$(SUFFIX) ztrtrs_UNN_parallel.$(SUFFIX) ztrtrs_UTU_parallel.$(SUFFIX) ztrtrs_UTN_parallel.$(SUFFIX) ztrtrs_URU_parallel.$(SUFFIX) ztrtrs_URN_parallel.$(SUFFIX) ztrtrs_UCU_parallel.$(SUFFIX) ztrtrs_UCN_parallel.$(SUFFIX) ztrtrs_LNU_parallel.$(SUFFIX) ztrtrs_LNN_parallel.$(SUFFIX) ztrtrs_LTU_parallel.$(SUFFIX) ztrtrs_LTN_parallel.$(SUFFIX) ztrtrs_LRU_parallel.$(SUFFIX) ztrtrs_LRN_parallel.$(SUFFIX) ztrtrs_LCU_parallel.$(SUFFIX) ztrtrs_LCN_parallel.$(SUFFIX) +XBLASOBJS += xtrtrs_UNU_parallel.$(SUFFIX) xtrtrs_UNN_parallel.$(SUFFIX) xtrtrs_UTU_parallel.$(SUFFIX) xtrtrs_UTN_parallel.$(SUFFIX) xtrtrs_URU_parallel.$(SUFFIX) xtrtrs_URN_parallel.$(SUFFIX) xtrtrs_UCU_parallel.$(SUFFIX) xtrtrs_UCN_parallel.$(SUFFIX) xtrtrs_LNU_parallel.$(SUFFIX) xtrtrs_LNN_parallel.$(SUFFIX) xtrtrs_LTU_parallel.$(SUFFIX) xtrtrs_LTN_parallel.$(SUFFIX) xtrtrs_LRU_parallel.$(SUFFIX) xtrtrs_LRN_parallel.$(SUFFIX) xtrtrs_LCU_parallel.$(SUFFIX) xtrtrs_LCN_parallel.$(SUFFIX) +endif + +strtrs_UNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +strtrs_UNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +strtrs_UTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +strtrs_UTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +strtrs_LNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +strtrs_LNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +strtrs_LTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -DTRANS -UDIAG $< -o $(@F) + +strtrs_LTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +strtrs_UNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +strtrs_UNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +strtrs_UTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +strtrs_UTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +strtrs_LNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +strtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +strtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -DTRANS -UDIAG $< -o $(@F) + +strtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +dtrtrs_UNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +dtrtrs_UNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +dtrtrs_UTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +dtrtrs_UTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +dtrtrs_LNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +dtrtrs_LNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +dtrtrs_LTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -DTRANS -UDIAG $< -o $(@F) + +dtrtrs_LTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +dtrtrs_UNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +dtrtrs_UNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +dtrtrs_UTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +dtrtrs_UTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +dtrtrs_LNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +dtrtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +dtrtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -DTRANS -UDIAG $< -o $(@F) + +dtrtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_UNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +qtrtrs_UNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +qtrtrs_UTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +qtrtrs_UTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_LNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +qtrtrs_LNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +qtrtrs_LTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -UDIAG $< -o $(@F) + +qtrtrs_LTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_UNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +qtrtrs_UNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +qtrtrs_UTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +qtrtrs_UTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_LNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +qtrtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +qtrtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -UDIAG $< -o $(@F) + +qtrtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +ctrtrs_UNU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +ctrtrs_UNN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +ctrtrs_UTU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +ctrtrs_UTN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +ctrtrs_URU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +ctrtrs_URN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +ctrtrs_UCU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +ctrtrs_UCN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +ctrtrs_LNU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +ctrtrs_LNN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +ctrtrs_LTU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +ctrtrs_LTN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +ctrtrs_LRU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +ctrtrs_LRN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +ctrtrs_LCU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +ctrtrs_LCN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +ztrtrs_UNU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +ztrtrs_UNN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +ztrtrs_UTU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +ztrtrs_UTN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +ztrtrs_URU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +ztrtrs_URN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +ztrtrs_UCU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +ztrtrs_UCN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +ztrtrs_LNU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +ztrtrs_LNN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +ztrtrs_LTU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +ztrtrs_LTN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +ztrtrs_LRU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +ztrtrs_LRN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +ztrtrs_LCU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +ztrtrs_LCN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +xtrtrs_UNU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +xtrtrs_UNN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +xtrtrs_UTU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +xtrtrs_UTN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +xtrtrs_URU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +xtrtrs_URN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +xtrtrs_UCU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +xtrtrs_UCN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +xtrtrs_LNU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +xtrtrs_LNN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +xtrtrs_LTU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +xtrtrs_LTN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +xtrtrs_LRU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +xtrtrs_LRN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +xtrtrs_LCU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +xtrtrs_LCN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +ctrtrs_UNU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +ctrtrs_UNN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +ctrtrs_UTU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +ctrtrs_UTN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +ctrtrs_URU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +ctrtrs_URN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +ctrtrs_UCU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +ctrtrs_UCN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +ctrtrs_LNU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +ctrtrs_LNN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +ctrtrs_LTU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +ctrtrs_LTN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +ctrtrs_LRU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +ctrtrs_LRN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +ctrtrs_LCU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +ctrtrs_LCN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +ztrtrs_UNU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +ztrtrs_UNN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +ztrtrs_UTU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +ztrtrs_UTN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +ztrtrs_URU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +ztrtrs_URN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +ztrtrs_UCU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +ztrtrs_UCN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +ztrtrs_LNU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +ztrtrs_LNN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +ztrtrs_LTU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +ztrtrs_LTN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +ztrtrs_LRU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +ztrtrs_LRN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +ztrtrs_LCU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +ztrtrs_LCN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +xtrtrs_UNU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +xtrtrs_UNN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +xtrtrs_UTU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +xtrtrs_UTN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +xtrtrs_URU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +xtrtrs_URN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +xtrtrs_UCU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +xtrtrs_UCN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +xtrtrs_LNU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +xtrtrs_LNN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +xtrtrs_LTU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +xtrtrs_LTN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +xtrtrs_LRU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +xtrtrs_LRN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +xtrtrs_LCU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +xtrtrs_LCN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +include ../../Makefile.tail diff --git a/lapack/trtrs/trtrs_parallel.c b/lapack/trtrs/trtrs_parallel.c new file mode 100644 index 000000000..52f42f693 --- /dev/null +++ b/lapack/trtrs/trtrs_parallel.c @@ -0,0 +1,111 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" + +#if !defined(TRANS) && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNUU +#define TRSV TRSV_NUU +#elif !defined(TRANS) && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNUN +#define TRSV TRSV_NUN +#elif !defined(TRANS) && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNLU +#define TRSV TRSV_NLU +#elif !defined(TRANS) && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNLN +#define TRSV TRSV_NLN +#elif defined(TRANS) && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTUU +#define TRSV TRSV_TUU +#elif defined(TRANS) && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTUN +#define TRSV TRSV_TUN +#elif defined(TRANS) && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTLU +#define TRSV TRSV_TLU +#elif defined(TRANS) && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTLN +#define TRSV TRSV_TLN +#endif + +static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, + FLOAT *sa, FLOAT *sb, BLASLONG mypos) { + + TRSM (args, range_m, range_n, sa, sb, 0); + + return 0; +} + +blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos) { + + int mode; + +#ifndef TRANS + if (args -> n == 1){ + TRSV (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { +#ifdef XDOUBLE + mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + mode = BLAS_DOUBLE | BLAS_REAL; +#else + mode = BLAS_SINGLE | BLAS_REAL; +#endif + + gemm_thread_n(mode, args, NULL, NULL, inner_thread, sa, sb, args -> nthreads); + } +#else + if (args -> n == 1){ + TRSV (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { +#ifdef XDOUBLE + mode = BLAS_XDOUBLE | BLAS_REAL | (1 << BLAS_TRANSA_SHIFT); +#elif defined(DOUBLE) + mode = BLAS_DOUBLE | BLAS_REAL | (1 << BLAS_TRANSA_SHIFT); +#else + mode = BLAS_SINGLE | BLAS_REAL | (1 << BLAS_TRANSA_SHIFT); +#endif + + gemm_thread_n(mode, args, NULL, NULL, inner_thread, sa, sb, args -> nthreads); + } +#endif + + return 0; + } diff --git a/lapack/trtrs/trtrs_single.c b/lapack/trtrs/trtrs_single.c new file mode 100644 index 000000000..c82b81303 --- /dev/null +++ b/lapack/trtrs/trtrs_single.c @@ -0,0 +1,75 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" + +#if !defined(TRANS) && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNUU +#define TRSV TRSV_NUU +#elif !defined(TRANS) && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNUN +#define TRSV TRSV_NUN +#elif !defined(TRANS) && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNLU +#define TRSV TRSV_NLU +#elif !defined(TRANS) && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNLN +#define TRSV TRSV_NLN +#elif defined(TRANS) && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTUU +#define TRSV TRSV_TUU +#elif defined(TRANS) && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTUN +#define TRSV TRSV_TUN +#elif defined(TRANS) && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTLU +#define TRSV TRSV_TLU +#elif defined(TRANS) && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTLN +#define TRSV TRSV_TLN +#endif + +blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos) { + + if (args -> n == 1){ + TRSV (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { + TRSM (args, range_m, range_n, sa, sb, 0); + } + return 0; } diff --git a/lapack/trtrs/ztrtrs_parallel.c b/lapack/trtrs/ztrtrs_parallel.c new file mode 100644 index 000000000..d5248f21b --- /dev/null +++ b/lapack/trtrs/ztrtrs_parallel.c @@ -0,0 +1,118 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" + +#if TRANS == 1 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNUU +#define ZTRSV ZTRSV_NUU +#elif TRANS == 1 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNUN +#define ZTRSV ZTRSV_NUN +#elif TRANS == 1 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNLU +#define ZTRSV ZTRSV_NLU +#elif TRANS == 1 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNLN +#define ZTRSV ZTRSV_NLN +#elif TRANS == 2 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTUU +#define ZTRSV ZTRSV_TUU +#elif TRANS == 2 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTUN +#define ZTRSV ZTRSV_TUN +#elif TRANS == 2 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTLU +#define ZTRSV ZTRSV_TLU +#elif TRANS == 2 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTLN +#define ZTRSV ZTRSV_TLN +#elif TRANS == 3 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LRUU +#define ZTRSV ZTRSV_RUU +#elif TRANS == 3 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LRUN +#define ZTRSV ZTRSV_RUN +#elif TRANS == 3 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LRLU +#define ZTRSV ZTRSV_RLU +#elif TRANS == 3 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LRLN +#define ZTRSV ZTRSV_RLN +#elif TRANS == 4 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LCUU +#define ZTRSV ZTRSV_CUU +#elif TRANS == 4 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LCUN +#define ZTRSV ZTRSV_CUN +#elif TRANS == 4 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LCLU +#define ZTRSV ZTRSV_CLU +#elif TRANS == 4 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LCLN +#define ZTRSV ZTRSV_CLN +#endif + +static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, + FLOAT *sa, FLOAT *sb, BLASLONG mypos) { + + TRSM (args, range_m, range_n, sa, sb, 0); + return 0; +} + +blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos) { + + int mode; + + if (args -> n == 1){ + ZTRSV (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { +#ifdef XDOUBLE + mode = BLAS_XDOUBLE | BLAS_COMPLEX; +#elif defined(DOUBLE) + mode = BLAS_DOUBLE | BLAS_COMPLEX; +#else + mode = BLAS_SINGLE | BLAS_COMPLEX; +#endif + + gemm_thread_n(mode, args, NULL, NULL, inner_thread, sa, sb, args -> nthreads); + } + + return 0; + } diff --git a/lapack/trtrs/ztrtrs_single.c b/lapack/trtrs/ztrtrs_single.c new file mode 100644 index 000000000..f39d72900 --- /dev/null +++ b/lapack/trtrs/ztrtrs_single.c @@ -0,0 +1,98 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" + +#if TRANS == 1 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNUU +#define ZTRSV ZTRSV_NUU +#elif TRANS == 1 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNUN +#define ZTRSV ZTRSV_NUN +#elif TRANS == 1 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNLU +#define ZTRSV ZTRSV_NLU +#elif TRANS == 1 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNLN +#define ZTRSV ZTRSV_NLN +#elif TRANS == 2 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTUU +#define ZTRSV ZTRSV_TUU +#elif TRANS == 2 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTUN +#define ZTRSV ZTRSV_TUN +#elif TRANS == 2 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTLU +#define ZTRSV ZTRSV_TLU +#elif TRANS == 2 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTLN +#define ZTRSV ZTRSV_TLN +#elif TRANS == 3 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LRUU +#define ZTRSV ZTRSV_RUU +#elif TRANS == 3 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LRUN +#define ZTRSV ZTRSV_RUN +#elif TRANS == 3 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LRLU +#define ZTRSV ZTRSV_RLU +#elif TRANS == 3 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LRLN +#define ZTRSV ZTRSV_RLN +#elif TRANS == 4 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LCUU +#define ZTRSV ZTRSV_CUU +#elif TRANS == 4 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LCUN +#define ZTRSV ZTRSV_CUN +#elif TRANS == 4 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LCLU +#define ZTRSV ZTRSV_CLU +#elif TRANS == 4 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LCLN +#define ZTRSV ZTRSV_CLN +#endif + +blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos) { + if (args -> n == 1){ + ZTRSV (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { + TRSM (args, range_m, range_n, sa, sb, 0); + } + return 0; }