From fde8a8e6a02b1186a178f8bbe3b3e5d84c8786e1 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 3 Sep 2019 22:41:17 +0200 Subject: [PATCH 01/19] Improve cmake build behaviour with non-host cpu targets (#2246) 1. Supply appropriate values for C/Z GEMM unroll when cross-compiling for CORE2 or ARMV7 2. Add the required xLOCAL_BUFFER_SIZE parameters for cross-compiling CORE2 3. Add -DFORCE_ option to getarch when building with -DTARGET=target for #2245 --- cmake/prebuild.cmake | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 2fe168a1c..da185db5a 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -105,6 +105,7 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS # Perhaps this should be inside a different file as it grows larger file(APPEND ${TARGET_CONF_TEMP} "#define ${TCORE}\n" + "#define CORE_${TCORE}\n" "#define CHAR_CORENAME \"${TCORE}\"\n") if ("${TCORE}" STREQUAL "CORE2") file(APPEND ${TARGET_CONF_TEMP} @@ -119,15 +120,19 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS "#define HAVE_SSE\n" "#define HAVE_SSE2\n" "#define HAVE_SSE3\n" - "#define HAVE_SSSE3\n") + "#define HAVE_SSSE3\n" + "#define SLOCAL_BUFFER_SIZE\t16384\n" + "#define DLOCAL_BUFFER_SIZE\t16384\n" + "#define CLOCAL_BUFFER_SIZE\t16384\n" + "#define ZLOCAL_BUFFER_SIZE\t16384\n") set(SGEMM_UNROLL_M 8) set(SGEMM_UNROLL_N 4) set(DGEMM_UNROLL_M 4) set(DGEMM_UNROLL_N 4) - set(CGEMM_DEFAULT_UNROLL_M 4) - set(CGEMM_DEFAULT_UNROLL_N 2) - set(ZGEMM_DEFAULT_UNROLL_M 2) - set(ZGEMM_DEFAULT_UNROLL_N 2) + set(CGEMM_UNROLL_M 4) + set(CGEMM_UNROLL_N 2) + set(ZGEMM_UNROLL_M 2) + set(ZGEMM_UNROLL_N 2) elseif ("${TCORE}" STREQUAL "ARMV7") file(APPEND ${TARGET_CONF_TEMP} "#define L1_DATA_SIZE\t65536\n" @@ -143,6 +148,10 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS set(SGEMM_UNROLL_N 4) set(DGEMM_UNROLL_M 4) set(DGEMM_UNROLL_N 4) + set(CGEMM_UNROLL_M 2) + set(CGEMM_UNROLL_N 2) + set(ZGEMM_UNROLL_M 2) + set(ZGEMM_UNROLL_N 2) elseif ("${TCORE}" STREQUAL "ARMV8") file(APPEND ${TARGET_CONF_TEMP} "#define L1_DATA_SIZE\t32768\n" @@ -331,6 +340,9 @@ else(NOT CMAKE_CROSSCOMPILING) set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC) else() list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S) + if (DEFINED TARGET_CORE) + set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_${TARGET_CORE}) + endif () endif () if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") From 6e9a93ec19b0411118f6d50d78ae5f0326e77a6f Mon Sep 17 00:00:00 2001 From: Andrew <16061801+brada4@users.noreply.github.com> Date: Sat, 7 Sep 2019 10:18:46 +0300 Subject: [PATCH 02/19] init From 4de545aa7da84c6bbb5d2d843d91a4900ad9a3e1 Mon Sep 17 00:00:00 2001 From: Andrew <16061801+brada4@users.noreply.github.com> Date: Sat, 7 Sep 2019 10:21:08 +0300 Subject: [PATCH 03/19] address minor warnings from gcc7 --- driver/others/openblas_get_config.c | 8 ++++---- lapack/trtri/trtri_L_parallel.c | 6 +++--- lapack/trtri/trtri_U_parallel.c | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/driver/others/openblas_get_config.c b/driver/others/openblas_get_config.c index 81648fb7c..7fefee33d 100644 --- a/driver/others/openblas_get_config.c +++ b/driver/others/openblas_get_config.c @@ -78,10 +78,10 @@ char tmpstr[20]; #ifdef DYNAMIC_ARCH strcat(tmp_config_str, gotoblas_corename()); #endif -if (openblas_get_parallel() == 0) - sprintf(tmpstr, " SINGLE_THREADED"); -else - snprintf(tmpstr,19," MAX_THREADS=%d",MAX_CPU_NUMBER); + if (openblas_get_parallel() == 0) + sprintf(tmpstr, " SINGLE_THREADED"); + else + snprintf(tmpstr,19," MAX_THREADS=%d",MAX_CPU_NUMBER); strcat(tmp_config_str, tmpstr); return tmp_config_str; } diff --git a/lapack/trtri/trtri_L_parallel.c b/lapack/trtri/trtri_L_parallel.c index 5dc60b862..fb8c8fc77 100644 --- a/lapack/trtri/trtri_L_parallel.c +++ b/lapack/trtri/trtri_L_parallel.c @@ -54,7 +54,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, BLASLONG n, info; BLASLONG bk, i, blocking, start_i; int mode; - BLASLONG lda, range_N[2]; + BLASLONG lda; // , range_N[2]; blas_arg_t newarg; FLOAT *a; FLOAT alpha[2] = { ONE, ZERO}; @@ -100,8 +100,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, bk = n - i; if (bk > blocking) bk = blocking; - range_N[0] = i; - range_N[1] = i + bk; + /* range_N[0] = i; + range_N[1] = i + bk; */ newarg.lda = lda; newarg.ldb = lda; diff --git a/lapack/trtri/trtri_U_parallel.c b/lapack/trtri/trtri_U_parallel.c index fc48a33f1..5287421d6 100644 --- a/lapack/trtri/trtri_U_parallel.c +++ b/lapack/trtri/trtri_U_parallel.c @@ -54,7 +54,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, BLASLONG n, info; BLASLONG bk, i, blocking; int mode; - BLASLONG lda, range_N[2]; + BLASLONG lda; //, range_N[2]; blas_arg_t newarg; FLOAT *a; FLOAT alpha[2] = { ONE, ZERO}; @@ -96,8 +96,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, bk = n - i; if (bk > blocking) bk = blocking; - range_N[0] = i; - range_N[1] = i + bk; + /* range_N[0] = i; + range_N[1] = i + bk; */ newarg.lda = lda; newarg.ldb = lda; From ea747cf933a3ab6b82fbb726a51c70d34b3b91dc Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Fri, 30 Aug 2019 15:06:38 -0400 Subject: [PATCH 04/19] start working on ?trtrs --- common_macro.h | 117 ++++++++++++++++++++++++++- interface/lapack/trtrs.c | 171 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 284 insertions(+), 4 deletions(-) create mode 100644 interface/lapack/trtrs.c diff --git a/common_macro.h b/common_macro.h index d2503aa65..e8a4a66ed 100644 --- a/common_macro.h +++ b/common_macro.h @@ -641,7 +641,7 @@ #define IMATCOPY_K_CT DIMATCOPY_K_CT #define IMATCOPY_K_RT DIMATCOPY_K_RT -#define GEADD_K DGEADD_K +#define GEADD_K DGEADD_K #else #define AMAX_K SAMAX_K @@ -944,7 +944,7 @@ #define IMATCOPY_K_CT SIMATCOPY_K_CT #define IMATCOPY_K_RT SIMATCOPY_K_RT -#define GEADD_K SGEADD_K +#define GEADD_K SGEADD_K #endif #else #ifdef XDOUBLE @@ -1770,7 +1770,7 @@ #define IMATCOPY_K_CTC ZIMATCOPY_K_CTC #define IMATCOPY_K_RTC ZIMATCOPY_K_RTC -#define GEADD_K ZGEADD_K +#define GEADD_K ZGEADD_K #else @@ -2193,7 +2193,7 @@ #define IMATCOPY_K_CTC CIMATCOPY_K_CTC #define IMATCOPY_K_RTC CIMATCOPY_K_RTC -#define GEADD_K CGEADD_K +#define GEADD_K CGEADD_K #endif #endif @@ -2806,3 +2806,112 @@ typedef struct { #endif #endif + +#ifndef COMPLEX +#ifdef XDOUBLE +#define TRTRS_UNU_SINGLE qtrtrs_UNU_single +#define TRTRS_UNN_SINGLE qtrtrs_UNN_single +#define TRTRS_UTU_SINGLE qtrtrs_UTU_single +#define TRTRS_UTN_SINGLE qtrtrs_UTN_single +#define TRTRS_LNU_SINGLE qtrtrs_LNU_single +#define TRTRS_LNN_SINGLE qtrtrs_LNN_single +#define TRTRS_LTU_SINGLE qtrtrs_LTU_single +#define TRTRS_LTN_SINGLE qtrtrs_LTN_single +#define TRTRS_UNU_PARALLEL qtrtrs_UNU_parallel +#define TRTRS_UNN_PARALLEL qtrtrs_UNN_parallel +#define TRTRS_UTU_PARALLEL qtrtrs_UTU_parallel +#define TRTRS_UTN_PARALLEL qtrtrs_UTN_parallel +#define TRTRS_LNU_PARALLEL qtrtrs_LNU_parallel +#define TRTRS_LNN_PARALLEL qtrtrs_LNN_parallel +#define TRTRS_LTU_PARALLEL qtrtrs_LTU_parallel +#define TRTRS_LTN_PARALLEL qtrtrs_LTN_parallel + +#elif defined(DOUBLE) +#define TRTRS_UNU_SINGLE dtrtrs_UNU_single +#define TRTRS_UNN_SINGLE dtrtrs_UNN_single +#define TRTRS_UTU_SINGLE dtrtrs_UTU_single +#define TRTRS_UTN_SINGLE dtrtrs_UTN_single +#define TRTRS_LNU_SINGLE dtrtrs_LNU_single +#define TRTRS_LNN_SINGLE dtrtrs_LNN_single +#define TRTRS_LTU_SINGLE dtrtrs_LTU_single +#define TRTRS_LTN_SINGLE dtrtrs_LTN_single +#define TRTRS_UNU_PARALLEL dtrtrs_UNU_parallel +#define TRTRS_UNN_PARALLEL dtrtrs_UNN_parallel +#define TRTRS_UTU_PARALLEL dtrtrs_UTU_parallel +#define TRTRS_UTN_PARALLEL dtrtrs_UTN_parallel +#define TRTRS_LNU_PARALLEL dtrtrs_LNU_parallel +#define TRTRS_LNN_PARALLEL dtrtrs_LNN_parallel +#define TRTRS_LTU_PARALLEL dtrtrs_LTU_parallel +#define TRTRS_LTN_PARALLEL dtrtrs_LTN_parallel +#else +#define TRTRS_UNU_SINGLE strtrs_UNU_single +#define TRTRS_UNN_SINGLE strtrs_UNN_single +#define TRTRS_UTU_SINGLE strtrs_UTU_single +#define TRTRS_UTN_SINGLE strtrs_UTN_single +#define TRTRS_LNU_SINGLE strtrs_LNU_single +#define TRTRS_LNN_SINGLE strtrs_LNN_single +#define TRTRS_LTU_SINGLE strtrs_LTU_single +#define TRTRS_LTN_SINGLE strtrs_LTN_single +#define TRTRS_UNU_PARALLEL strtrs_UNU_parallel +#define TRTRS_UNN_PARALLEL strtrs_UNN_parallel +#define TRTRS_UTU_PARALLEL strtrs_UTU_parallel +#define TRTRS_UTN_PARALLEL strtrs_UTN_parallel +#define TRTRS_LNU_PARALLEL strtrs_LNU_parallel +#define TRTRS_LNN_PARALLEL strtrs_LNN_parallel +#define TRTRS_LTU_PARALLEL strtrs_LTU_parallel +#define TRTRS_LTN_PARALLEL strtrs_LTN_parallel +#endif +#else +#ifdef XDOUBLE +#define TRTRS_UNU_SINGLE xtrtrs_UNU_single +#define TRTRS_UNN_SINGLE xtrtrs_UNN_single +#define TRTRS_UTU_SINGLE xtrtrs_UTU_single +#define TRTRS_UTN_SINGLE xtrtrs_UTN_single +#define TRTRS_LNU_SINGLE xtrtrs_LNU_single +#define TRTRS_LNN_SINGLE xtrtrs_LNN_single +#define TRTRS_LTU_SINGLE xtrtrs_LTU_single +#define TRTRS_LTN_SINGLE xtrtrs_LTN_single +#define TRTRS_UNU_PARALLEL xtrtrs_UNU_parallel +#define TRTRS_UNN_PARALLEL xtrtrs_UNN_parallel +#define TRTRS_UTU_PARALLEL xtrtrs_UTU_parallel +#define TRTRS_UTN_PARALLEL xtrtrs_UTN_parallel +#define TRTRS_LNU_PARALLEL xtrtrs_LNU_parallel +#define TRTRS_LNN_PARALLEL xtrtrs_LNN_parallel +#define TRTRS_LTU_PARALLEL xtrtrs_LTU_parallel +#define TRTRS_LTN_PARALLEL xtrtrs_LTN_parallel +#elif defined(DOUBLE) +#define TRTRS_UNU_SINGLE ztrtrs_UNU_single +#define TRTRS_UNN_SINGLE ztrtrs_UNN_single +#define TRTRS_UTU_SINGLE ztrtrs_UTU_single +#define TRTRS_UTN_SINGLE ztrtrs_UTN_single +#define TRTRS_LNU_SINGLE ztrtrs_LNU_single +#define TRTRS_LNN_SINGLE ztrtrs_LNN_single +#define TRTRS_LTU_SINGLE ztrtrs_LTU_single +#define TRTRS_LTN_SINGLE ztrtrs_LTN_single +#define TRTRS_UNU_PARALLEL ztrtrs_UNU_parallel +#define TRTRS_UNN_PARALLEL ztrtrs_UNN_parallel +#define TRTRS_UTU_PARALLEL ztrtrs_UTU_parallel +#define TRTRS_UTN_PARALLEL ztrtrs_UTN_parallel +#define TRTRS_LNU_PARALLEL ztrtrs_LNU_parallel +#define TRTRS_LNN_PARALLEL ztrtrs_LNN_parallel +#define TRTRS_LTU_PARALLEL ztrtrs_LTU_parallel +#define TRTRS_LTN_PARALLEL ztrtrs_LTN_parallel +#else +#define TRTRS_UNU_SINGLE ctrtrs_UNU_single +#define TRTRS_UNN_SINGLE ctrtrs_UNN_single +#define TRTRS_UTU_SINGLE ctrtrs_UTU_single +#define TRTRS_UTN_SINGLE ctrtrs_UTN_single +#define TRTRS_LNU_SINGLE ctrtrs_LNU_single +#define TRTRS_LNN_SINGLE ctrtrs_LNN_single +#define TRTRS_LTU_SINGLE ctrtrs_LTU_single +#define TRTRS_LTN_SINGLE ctrtrs_LTN_single +#define TRTRS_UNU_PARALLEL ctrtrs_UNU_parallel +#define TRTRS_UNN_PARALLEL ctrtrs_UNN_parallel +#define TRTRS_UTU_PARALLEL ctrtrs_UTU_parallel +#define TRTRS_UTN_PARALLEL ctrtrs_UTN_parallel +#define TRTRS_LNU_PARALLEL ctrtrs_LNU_parallel +#define TRTRS_LNN_PARALLEL ctrtrs_LNN_parallel +#define TRTRS_LTU_PARALLEL ctrtrs_LTU_parallel +#define TRTRS_LTN_PARALLEL ctrtrs_LTN_parallel +#endif +#endif diff --git a/interface/lapack/trtrs.c b/interface/lapack/trtrs.c new file mode 100644 index 000000000..261b07ec6 --- /dev/null +++ b/interface/lapack/trtrs.c @@ -0,0 +1,171 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QTRTRS" +#elif defined(DOUBLE) +#define ERROR_NAME "DTRTRS" +#else +#define ERROR_NAME "STRTRS" +#endif + +static blasint (*trtrs_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + TRTRS_UNU_SINGLE, TRTRS_UNN_SINGLE, TRTRS_UTU_SINGLE, TRTRS_UTN_SINGLE, TRTRS_LNU_SINGLE, TRTRS_LNN_SINGLE, TRTRS_LTU_SINGLE, TRTRS_LTN_SINGLE, +}; + +#ifdef SMP +static blasint (*trtrs_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + TRTRS_UNU_PARALLEL, TRTRS_UNN_PARALLEL, TRTRS_UTU_PARALLEL, TRTRS_UTN_PARALLEL, TRTRS_LNU_PARALLEL, TRTRS_LNN_PARALLEL, TRTRS_LTU_PARALLEL, TRTRS_LTN_PARALLEL, +}; +#endif + +int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA, + FLOAT *b, blasint *ldB, blasint *Info){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blas_arg_t args; + + blasint info; + int uplo, trans, diag; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.m = *N; + args.n = *NRHS; + args.a = (void *)a; + args.lda = *ldA; + args.b = (void *)b; + args.ldb = *ldB; + + info = 0; + + TOUPPER(trans_arg); + trans = -1; + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 0; + if (trans_arg == 'C') trans = 1; + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + diag = -1; + if (diag_arg == 'U') diag = 0; + if (diag_arg == 'N') diag = 1; + + if (args.ldb < MAX(1, args.m)) info = 7; + if (args.lda < MAX(1, args.m)) info = 9; + if (args.n < 0) info = 5; + if (args.m < 0) info = 4; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + if (diag < 0) info = 3; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + args.alpha = NULL; + args.beta = NULL; + + *Info = 0; + + if (args.m == 0 || args.n == 0) return 0; + + if (diag) { + if (AMIN_K(args.n, args.a, args.lda + 1) == ZERO) { + *Info = IAMIN_K(args.n, args.a, args.lda + 1); + return 0; + } + } + + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + (trtrs_single[(uplo << 2) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + (trtrs_parallel[(uplo << 2) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2 * args.m * args.m * args.n); + + IDEBUG_END; + + return 0; + +} From 733d97b2df64d6e5674a5a6b673254584b1d75af Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Fri, 30 Aug 2019 16:31:25 -0400 Subject: [PATCH 05/19] add files --- lapack/trtrs/Makefile | 308 ++++++++++++++++++++++++++++++++++++ lapack/trtrs/trtrs_single.c | 68 ++++++++ 2 files changed, 376 insertions(+) create mode 100644 lapack/trtrs/Makefile create mode 100644 lapack/trtrs/trtrs_single.c diff --git a/lapack/trtrs/Makefile b/lapack/trtrs/Makefile new file mode 100644 index 000000000..6f41a9319 --- /dev/null +++ b/lapack/trtrs/Makefile @@ -0,0 +1,308 @@ +TOPDIR = ../.. +include ../../Makefile.system + +SBLASOBJS = strtrs_UNU_single.$(SUFFIX) strtrs_UNN_single.$(SUFFIX) strtrs_UTU_single.$(SUFFIX) strtrs_UTN_single.$(SUFFIX) strtrs_LNU_single.$(SUFFIX) strtrs_LNN_single.$(SUFFIX) strtrs_LTU_single.$(SUFFIX) strtrs_LTN_single.$(SUFFIX) +DBLASOBJS = dtrtrs_UNU_single.$(SUFFIX) dtrtrs_UNN_single.$(SUFFIX) dtrtrs_UTU_single.$(SUFFIX) dtrtrs_UTN_single.$(SUFFIX) dtrtrs_LNU_single.$(SUFFIX) dtrtrs_LNN_single.$(SUFFIX) dtrtrs_LTU_single.$(SUFFIX) dtrtrs_LTN_single.$(SUFFIX) +QBLASOBJS = qtrtrs_UNU_single.$(SUFFIX) qtrtrs_UNN_single.$(SUFFIX) qtrtrs_UTU_single.$(SUFFIX) qtrtrs_UTN_single.$(SUFFIX) qtrtrs_LNU_single.$(SUFFIX) qtrtrs_LNN_single.$(SUFFIX) qtrtrs_LTU_single.$(SUFFIX) qtrtrs_LTN_single.$(SUFFIX) +CBLASOBJS = cgetrs_N_single.$(SUFFIX) cgetrs_T_single.$(SUFFIX) cgetrs_R_single.$(SUFFIX) cgetrs_C_single.$(SUFFIX) +ZBLASOBJS = zgetrs_N_single.$(SUFFIX) zgetrs_T_single.$(SUFFIX) zgetrs_R_single.$(SUFFIX) zgetrs_C_single.$(SUFFIX) +XBLASOBJS = xgetrs_N_single.$(SUFFIX) xgetrs_T_single.$(SUFFIX) xgetrs_R_single.$(SUFFIX) xgetrs_C_single.$(SUFFIX) + +ifdef SMP +SBLASOBJS += sgetrs_N_parallel.$(SUFFIX) sgetrs_T_parallel.$(SUFFIX) +DBLASOBJS += dgetrs_N_parallel.$(SUFFIX) dgetrs_T_parallel.$(SUFFIX) +QBLASOBJS += qgetrs_N_parallel.$(SUFFIX) qgetrs_T_parallel.$(SUFFIX) +CBLASOBJS += cgetrs_N_parallel.$(SUFFIX) cgetrs_T_parallel.$(SUFFIX) cgetrs_R_parallel.$(SUFFIX) cgetrs_C_parallel.$(SUFFIX) +ZBLASOBJS += zgetrs_N_parallel.$(SUFFIX) zgetrs_T_parallel.$(SUFFIX) zgetrs_R_parallel.$(SUFFIX) zgetrs_C_parallel.$(SUFFIX) +XBLASOBJS += xgetrs_N_parallel.$(SUFFIX) xgetrs_T_parallel.$(SUFFIX) xgetrs_R_parallel.$(SUFFIX) xgetrs_C_parallel.$(SUFFIX) +endif + +strtrs_UNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +strtrs_UNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +strtrs_UTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +strtrs_UTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +strtrs_LNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +strtrs_LNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +strtrs_LTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +strtrs_LTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +strtrs_UNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +strtrs_UNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +strtrs_UTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +strtrs_UTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +strtrs_LNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +strtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +strtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +strtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +dtrtrs_UNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +dtrtrs_UNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +dtrtrs_UTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +dtrtrs_UTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +dtrtrs_LNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +dtrtrs_LNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +dtrtrs_LTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +dtrtrs_LTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +dtrtrs_UNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +dtrtrs_UNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +dtrtrs_UTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +dtrtrs_UTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +dtrtrs_LNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +dtrtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +dtrtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +dtrtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_UNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +qtrtrs_UNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +qtrtrs_UTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +qtrtrs_UTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_LNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +qtrtrs_LNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +qtrtrs_LTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_LTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_UNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +qtrtrs_UNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +qtrtrs_UTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +qtrtrs_UTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_LNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +qtrtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +qtrtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +ctrtrs_UNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +ctrtrs_UNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +ctrtrs_UTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +ctrtrs_UTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +ctrtrs_LNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +ctrtrs_LNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +ctrtrs_LTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +ctrtrs_LTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +ctrtrs_UNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +ctrtrs_UNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +ctrtrs_UTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +ctrtrs_UTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +ctrtrs_LNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +ctrtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +ctrtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +ctrtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +ztrtrs_UNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +ztrtrs_UNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +ztrtrs_UTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +ztrtrs_UTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +ztrtrs_LNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +ztrtrs_LNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +ztrtrs_LTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +ztrtrs_LTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +ztrtrs_UNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +ztrtrs_UNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +ztrtrs_UTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +ztrtrs_UTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +ztrtrs_LNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +ztrtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +ztrtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +ztrtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_UNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +qtrtrs_UNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +qtrtrs_UTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +qtrtrs_UTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_LNU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +qtrtrs_LNN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +qtrtrs_LTU_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_LTN_single.$(SUFFIX) : trtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_UNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) + +qtrtrs_UNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) + +qtrtrs_UTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) + +qtrtrs_UTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_LNU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) + +qtrtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) + +qtrtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +qtrtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + +include ../../Makefile.tail diff --git a/lapack/trtrs/trtrs_single.c b/lapack/trtrs/trtrs_single.c new file mode 100644 index 000000000..0dbb03869 --- /dev/null +++ b/lapack/trtrs/trtrs_single.c @@ -0,0 +1,68 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" + +blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos) { + +#ifndef TRANS + LASWP_PLUS(args -> n, 1, args -> m, ZERO, args -> b, args -> ldb, NULL, 0, args -> c, 1); + + if (args -> n == 1){ + TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); + TRSV_NUN (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { + TRSM_LNLU (args, range_m, range_n, sa, sb, 0); + TRSM_LNUN (args, range_m, range_n, sa, sb, 0); + } + +#else + + if (args -> n == 1){ + TRSV_TUN (args -> m, args -> a, args -> lda, args -> b, 1, sb); + TRSV_TLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { + TRSM_LTUN (args, range_m, range_n, sa, sb, 0); + TRSM_LTLU (args, range_m, range_n, sa, sb, 0); + } + + LASWP_MINUS(args -> n, 1, args -> m, ZERO, args -> b, args -> ldb, NULL, 0, args -> c, -1); +#endif + + return 0; } From a4f17a9297c444180be4f2a73cc2940ddda7b00f Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Mon, 2 Sep 2019 21:15:20 -0400 Subject: [PATCH 06/19] add missing objects --- lapack/Makefile | 2 +- lapack/trtrs/Makefile | 210 +++++++++++++++++++++--------------------- 2 files changed, 106 insertions(+), 106 deletions(-) diff --git a/lapack/Makefile b/lapack/Makefile index aff5209d5..2bbb4603f 100644 --- a/lapack/Makefile +++ b/lapack/Makefile @@ -2,7 +2,7 @@ TOPDIR = .. include ../Makefile.system #SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs -SUBDIRS = getrf getf2 laswp getrs potrf potf2 lauu2 lauum trti2 trtri +SUBDIRS = getrf getf2 laswp getrs potrf potf2 lauu2 lauum trti2 trtri trtrs FLAMEDIRS = laswp getf2 potf2 lauu2 trti2 diff --git a/lapack/trtrs/Makefile b/lapack/trtrs/Makefile index 6f41a9319..400b8b653 100644 --- a/lapack/trtrs/Makefile +++ b/lapack/trtrs/Makefile @@ -4,17 +4,17 @@ include ../../Makefile.system SBLASOBJS = strtrs_UNU_single.$(SUFFIX) strtrs_UNN_single.$(SUFFIX) strtrs_UTU_single.$(SUFFIX) strtrs_UTN_single.$(SUFFIX) strtrs_LNU_single.$(SUFFIX) strtrs_LNN_single.$(SUFFIX) strtrs_LTU_single.$(SUFFIX) strtrs_LTN_single.$(SUFFIX) DBLASOBJS = dtrtrs_UNU_single.$(SUFFIX) dtrtrs_UNN_single.$(SUFFIX) dtrtrs_UTU_single.$(SUFFIX) dtrtrs_UTN_single.$(SUFFIX) dtrtrs_LNU_single.$(SUFFIX) dtrtrs_LNN_single.$(SUFFIX) dtrtrs_LTU_single.$(SUFFIX) dtrtrs_LTN_single.$(SUFFIX) QBLASOBJS = qtrtrs_UNU_single.$(SUFFIX) qtrtrs_UNN_single.$(SUFFIX) qtrtrs_UTU_single.$(SUFFIX) qtrtrs_UTN_single.$(SUFFIX) qtrtrs_LNU_single.$(SUFFIX) qtrtrs_LNN_single.$(SUFFIX) qtrtrs_LTU_single.$(SUFFIX) qtrtrs_LTN_single.$(SUFFIX) -CBLASOBJS = cgetrs_N_single.$(SUFFIX) cgetrs_T_single.$(SUFFIX) cgetrs_R_single.$(SUFFIX) cgetrs_C_single.$(SUFFIX) -ZBLASOBJS = zgetrs_N_single.$(SUFFIX) zgetrs_T_single.$(SUFFIX) zgetrs_R_single.$(SUFFIX) zgetrs_C_single.$(SUFFIX) -XBLASOBJS = xgetrs_N_single.$(SUFFIX) xgetrs_T_single.$(SUFFIX) xgetrs_R_single.$(SUFFIX) xgetrs_C_single.$(SUFFIX) +CBLASOBJS = ctrtrs_UNU_single.$(SUFFIX) ctrtrs_UNN_single.$(SUFFIX) ctrtrs_UTU_single.$(SUFFIX) ctrtrs_UTN_single.$(SUFFIX) ctrtrs_URU_single.$(SUFFIX) ctrtrs_URN_single.$(SUFFIX) ctrtrs_UCU_single.$(SUFFIX) ctrtrs_UCN_single.$(SUFFIX) ctrtrs_LNU_single.$(SUFFIX) ctrtrs_LNN_single.$(SUFFIX) ctrtrs_LTU_single.$(SUFFIX) ctrtrs_LTN_single.$(SUFFIX) ctrtrs_LRU_single.$(SUFFIX) ctrtrs_LRN_single.$(SUFFIX) ctrtrs_LCU_single.$(SUFFIX) ctrtrs_LCN_single.$(SUFFIX) +ZBLASOBJS = ztrtrs_UNU_single.$(SUFFIX) ztrtrs_UNN_single.$(SUFFIX) ztrtrs_UTU_single.$(SUFFIX) ztrtrs_UTN_single.$(SUFFIX) ztrtrs_URU_single.$(SUFFIX) ztrtrs_URN_single.$(SUFFIX) ztrtrs_UCU_single.$(SUFFIX) ztrtrs_UCN_single.$(SUFFIX) ztrtrs_LNU_single.$(SUFFIX) ztrtrs_LNN_single.$(SUFFIX) ztrtrs_LTU_single.$(SUFFIX) ztrtrs_LTN_single.$(SUFFIX) ztrtrs_LRU_single.$(SUFFIX) ztrtrs_LRN_single.$(SUFFIX) ztrtrs_LCU_single.$(SUFFIX) ztrtrs_LCN_single.$(SUFFIX) +XBLASOBJS = xtrtrs_UNU_single.$(SUFFIX) xtrtrs_UNN_single.$(SUFFIX) xtrtrs_UTU_single.$(SUFFIX) xtrtrs_UTN_single.$(SUFFIX) xtrtrs_URU_single.$(SUFFIX) xtrtrs_URN_single.$(SUFFIX) xtrtrs_UCU_single.$(SUFFIX) xtrtrs_UCN_single.$(SUFFIX) xtrtrs_LNU_single.$(SUFFIX) xtrtrs_LNN_single.$(SUFFIX) xtrtrs_LTU_single.$(SUFFIX) xtrtrs_LTN_single.$(SUFFIX) xtrtrs_LRU_single.$(SUFFIX) xtrtrs_LRN_single.$(SUFFIX) xtrtrs_LCU_single.$(SUFFIX) xtrtrs_LCN_single.$(SUFFIX) ifdef SMP -SBLASOBJS += sgetrs_N_parallel.$(SUFFIX) sgetrs_T_parallel.$(SUFFIX) -DBLASOBJS += dgetrs_N_parallel.$(SUFFIX) dgetrs_T_parallel.$(SUFFIX) -QBLASOBJS += qgetrs_N_parallel.$(SUFFIX) qgetrs_T_parallel.$(SUFFIX) -CBLASOBJS += cgetrs_N_parallel.$(SUFFIX) cgetrs_T_parallel.$(SUFFIX) cgetrs_R_parallel.$(SUFFIX) cgetrs_C_parallel.$(SUFFIX) -ZBLASOBJS += zgetrs_N_parallel.$(SUFFIX) zgetrs_T_parallel.$(SUFFIX) zgetrs_R_parallel.$(SUFFIX) zgetrs_C_parallel.$(SUFFIX) -XBLASOBJS += xgetrs_N_parallel.$(SUFFIX) xgetrs_T_parallel.$(SUFFIX) xgetrs_R_parallel.$(SUFFIX) xgetrs_C_parallel.$(SUFFIX) +SBLASOBJS += strtrs_UNU_parallel.$(SUFFIX) strtrs_UNN_parallel.$(SUFFIX) strtrs_UTU_parallel.$(SUFFIX) strtrs_UTN_parallel.$(SUFFIX) strtrs_LNU_parallel.$(SUFFIX) strtrs_LNN_parallel.$(SUFFIX) strtrs_LTU_parallel.$(SUFFIX) strtrs_LTN_parallel.$(SUFFIX) +DBLASOBJS += dtrtrs_UNU_parallel.$(SUFFIX) dtrtrs_UNN_parallel.$(SUFFIX) dtrtrs_UTU_parallel.$(SUFFIX) dtrtrs_UTN_parallel.$(SUFFIX) dtrtrs_LNU_parallel.$(SUFFIX) dtrtrs_LNN_parallel.$(SUFFIX) dtrtrs_LTU_parallel.$(SUFFIX) dtrtrs_LTN_parallel.$(SUFFIX) +QBLASOBJS += qtrtrs_UNU_parallel.$(SUFFIX) qtrtrs_UNN_parallel.$(SUFFIX) qtrtrs_UTU_parallel.$(SUFFIX) qtrtrs_UTN_parallel.$(SUFFIX) qtrtrs_LNU_parallel.$(SUFFIX) qtrtrs_LNN_parallel.$(SUFFIX) qtrtrs_LTU_parallel.$(SUFFIX) qtrtrs_LTN_parallel.$(SUFFIX) +CBLASOBJS += ctrtrs_UNU_parallel.$(SUFFIX) ctrtrs_UNN_parallel.$(SUFFIX) ctrtrs_UTU_parallel.$(SUFFIX) ctrtrs_UTN_parallel.$(SUFFIX) ctrtrs_URU_parallel.$(SUFFIX) ctrtrs_URN_parallel.$(SUFFIX) ctrtrs_UCU_parallel.$(SUFFIX) ctrtrs_UCN_parallel.$(SUFFIX) ctrtrs_LNU_parallel.$(SUFFIX) ctrtrs_LNN_parallel.$(SUFFIX) ctrtrs_LTU_parallel.$(SUFFIX) ctrtrs_LTN_parallel.$(SUFFIX) ctrtrs_LRU_parallel.$(SUFFIX) ctrtrs_LRN_parallel.$(SUFFIX) ctrtrs_LCU_parallel.$(SUFFIX) ctrtrs_LCN_parallel.$(SUFFIX) +ZBLASOBJS += ztrtrs_UNU_parallel.$(SUFFIX) ztrtrs_UNN_parallel.$(SUFFIX) ztrtrs_UTU_parallel.$(SUFFIX) ztrtrs_UTN_parallel.$(SUFFIX) ztrtrs_URU_parallel.$(SUFFIX) ztrtrs_URN_parallel.$(SUFFIX) ztrtrs_UCU_parallel.$(SUFFIX) ztrtrs_UCN_parallel.$(SUFFIX) ztrtrs_LNU_parallel.$(SUFFIX) ztrtrs_LNN_parallel.$(SUFFIX) ztrtrs_LTU_parallel.$(SUFFIX) ztrtrs_LTN_parallel.$(SUFFIX) ztrtrs_LRU_parallel.$(SUFFIX) ztrtrs_LRN_parallel.$(SUFFIX) ztrtrs_LCU_parallel.$(SUFFIX) ztrtrs_LCN_parallel.$(SUFFIX) +XBLASOBJS += xtrtrs_UNU_parallel.$(SUFFIX) xtrtrs_UNN_parallel.$(SUFFIX) xtrtrs_UTU_parallel.$(SUFFIX) xtrtrs_UTN_parallel.$(SUFFIX) xtrtrs_URU_parallel.$(SUFFIX) xtrtrs_URN_parallel.$(SUFFIX) xtrtrs_UCU_parallel.$(SUFFIX) xtrtrs_UCN_parallel.$(SUFFIX) xtrtrs_LNU_parallel.$(SUFFIX) xtrtrs_LNN_parallel.$(SUFFIX) xtrtrs_LTU_parallel.$(SUFFIX) xtrtrs_LTN_parallel.$(SUFFIX) xtrtrs_LRU_parallel.$(SUFFIX) xtrtrs_LRN_parallel.$(SUFFIX) xtrtrs_LCU_parallel.$(SUFFIX) xtrtrs_LCN_parallel.$(SUFFIX) endif strtrs_UNU_single.$(SUFFIX) : trtrs_single.c @@ -161,148 +161,148 @@ qtrtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c qtrtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) -ctrtrs_UNU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) +ctrtrs_UNU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=1 -UDIAG $< -o $(@F) -ctrtrs_UNN_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) +ctrtrs_UNN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=1 -DDIAG $< -o $(@F) -ctrtrs_UTU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) +ctrtrs_UTU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=2 -UDIAG $< -o $(@F) -ctrtrs_UTN_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) +ctrtrs_UTN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=2 -DDIAG $< -o $(@F) -ctrtrs_LNU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) +ctrtrs_URU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=3 -UDIAG $< -o $(@F) -ctrtrs_LNN_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) +ctrtrs_URN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=3 -DDIAG $< -o $(@F) -ctrtrs_LTU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) +ctrtrs_UCU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=4 -UDIAG $< -o $(@F) -ctrtrs_LTN_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) +ctrtrs_UCN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=4 -DDIAG $< -o $(@F) -ctrtrs_UNU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) +ctrtrs_LNU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=1 -UDIAG $< -o $(@F) -ctrtrs_UNN_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) +ctrtrs_LNN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=1 -DDIAG $< -o $(@F) -ctrtrs_UTU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) +ctrtrs_LTU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=2 -UDIAG $< -o $(@F) -ctrtrs_UTN_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) +ctrtrs_LTN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=2 -DDIAG $< -o $(@F) -ctrtrs_LNU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) +ctrtrs_LRU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=3 -UDIAG $< -o $(@F) -ctrtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) +ctrtrs_LRN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=3 -DDIAG $< -o $(@F) -ctrtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) +ctrtrs_LCU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=4 -UDIAG $< -o $(@F) -ctrtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) +ctrtrs_LCN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) -ztrtrs_UNU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) +ztrtrs_UNU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=1 -UDIAG $< -o $(@F) -ztrtrs_UNN_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) +ztrtrs_UNN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=1 -DDIAG $< -o $(@F) -ztrtrs_UTU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) +ztrtrs_UTU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=2 -UDIAG $< -o $(@F) -ztrtrs_UTN_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) +ztrtrs_UTN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=2 -DDIAG $< -o $(@F) -ztrtrs_LNU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) +ztrtrs_URU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=3 -UDIAG $< -o $(@F) -ztrtrs_LNN_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) +ztrtrs_URN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=3 -DDIAG $< -o $(@F) -ztrtrs_LTU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) +ztrtrs_UCU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=4 -UDIAG $< -o $(@F) -ztrtrs_LTN_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) +ztrtrs_UCN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=4 -DDIAG $< -o $(@F) -ztrtrs_UNU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) +ztrtrs_LNU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=1 -UDIAG $< -o $(@F) -ztrtrs_UNN_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) +ztrtrs_LNN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=1 -DDIAG $< -o $(@F) -ztrtrs_UTU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) +ztrtrs_LTU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=2 -UDIAG $< -o $(@F) -ztrtrs_UTN_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) +ztrtrs_LTN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=2 -DDIAG $< -o $(@F) -ztrtrs_LNU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) +ztrtrs_LRU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=3 -UDIAG $< -o $(@F) -ztrtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) +ztrtrs_LRN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=3 -DDIAG $< -o $(@F) -ztrtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) +ztrtrs_LCU_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=4 -UDIAG $< -o $(@F) -ztrtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) +ztrtrs_LCN_single.$(SUFFIX) : ztrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) -qtrtrs_UNU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) +xtrtrs_UNU_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=1 -UDIAG $< -o $(@F) -qtrtrs_UNN_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) +xtrtrs_UNN_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=1 -DDIAG $< -o $(@F) -qtrtrs_UTU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) +xtrtrs_UTU_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=2 -UDIAG $< -o $(@F) -qtrtrs_UTN_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) +xtrtrs_UTN_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=2 -DDIAG $< -o $(@F) -qtrtrs_LNU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) +xtrtrs_URU_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=3 -UDIAG $< -o $(@F) -qtrtrs_LNN_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) +xtrtrs_URN_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=3 -DDIAG $< -o $(@F) -qtrtrs_LTU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) +xtrtrs_UCU_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=4 -UDIAG $< -o $(@F) -qtrtrs_LTN_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) +xtrtrs_UCN_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=4 -DDIAG $< -o $(@F) -qtrtrs_UNU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) +xtrtrs_LNU_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=1 -UDIAG $< -o $(@F) -qtrtrs_UNN_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -UTRANS -DDIAG $< -o $(@F) +xtrtrs_LNN_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=1 -DDIAG $< -o $(@F) -qtrtrs_UTU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS -UDIAG $< -o $(@F) +xtrtrs_LTU_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=2 -UDIAG $< -o $(@F) -qtrtrs_UTN_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS -DDIAG $< -o $(@F) +xtrtrs_LTN_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=2 -DDIAG $< -o $(@F) -qtrtrs_LNU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -UTRANS -UDIAG $< -o $(@F) +xtrtrs_LRU_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=3 -UDIAG $< -o $(@F) -qtrtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) +xtrtrs_LRN_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=3 -DDIAG $< -o $(@F) -qtrtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) +xtrtrs_LCU_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=4 -UDIAG $< -o $(@F) -qtrtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) +xtrtrs_LCN_single.$(SUFFIX) : xtrtrs_single.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) include ../../Makefile.tail From 42203dafdcb8e2ab1bd9d68ede5ed78769e2b7a1 Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Mon, 2 Sep 2019 21:57:28 -0400 Subject: [PATCH 07/19] add logic --- lapack/trtrs/trtrs_single.c | 79 ++++++++++++++++++++++++++----------- 1 file changed, 57 insertions(+), 22 deletions(-) diff --git a/lapack/trtrs/trtrs_single.c b/lapack/trtrs/trtrs_single.c index 0dbb03869..a690d4a25 100644 --- a/lapack/trtrs/trtrs_single.c +++ b/lapack/trtrs/trtrs_single.c @@ -41,28 +41,63 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos) { -#ifndef TRANS - LASWP_PLUS(args -> n, 1, args -> m, ZERO, args -> b, args -> ldb, NULL, 0, args -> c, 1); - - if (args -> n == 1){ - TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); - TRSV_NUN (args -> m, args -> a, args -> lda, args -> b, 1, sb); - } else { - TRSM_LNLU (args, range_m, range_n, sa, sb, 0); - TRSM_LNUN (args, range_m, range_n, sa, sb, 0); - } - +#ifndef UPLO +#ifndef DIAG +#ifndef DIAG + if (args -> n == 1){ + TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { + TRSM_LNLU (args, range_m, range_n, sa, sb, 0); + } #else - - if (args -> n == 1){ - TRSV_TUN (args -> m, args -> a, args -> lda, args -> b, 1, sb); - TRSV_TLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); - } else { - TRSM_LTUN (args, range_m, range_n, sa, sb, 0); - TRSM_LTLU (args, range_m, range_n, sa, sb, 0); - } - - LASWP_MINUS(args -> n, 1, args -> m, ZERO, args -> b, args -> ldb, NULL, 0, args -> c, -1); + if (args -> n == 1){ + TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { + TRSM_LNLU (args, range_m, range_n, sa, sb, 0); + } +#endif +#else +#ifndef DIAG + if (args -> n == 1){ + TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { + TRSM_LNLU (args, range_m, range_n, sa, sb, 0); + } +#else + if (args -> n == 1){ + TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { + TRSM_LNLU (args, range_m, range_n, sa, sb, 0); + } +#endif +#else +#ifndef DIAG +#ifndef DIAG + if (args -> n == 1){ + TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { + TRSM_LNLU (args, range_m, range_n, sa, sb, 0); + } +#else + if (args -> n == 1){ + TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { + TRSM_LNLU (args, range_m, range_n, sa, sb, 0); + } +#endif +#else +#ifndef DIAG + if (args -> n == 1){ + TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { + TRSM_LNLU (args, range_m, range_n, sa, sb, 0); + } +#else + if (args -> n == 1){ + TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { + TRSM_LNLU (args, range_m, range_n, sa, sb, 0); + } +#endif #endif - return 0; } From 9f6984fe4bd5ca2c44df01c647462f54c7b84762 Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Tue, 3 Sep 2019 14:45:43 -0400 Subject: [PATCH 08/19] add missing files --- lapack/trtrs/Makefile | 144 +++++++++++++++++++++++++++++++++ lapack/trtrs/trtrs_parallel.c | 111 +++++++++++++++++++++++++ lapack/trtrs/trtrs_single.c | 84 +++++++------------ lapack/trtrs/ztrtrs_parallel.c | 118 +++++++++++++++++++++++++++ lapack/trtrs/ztrtrs_single.c | 98 ++++++++++++++++++++++ 5 files changed, 499 insertions(+), 56 deletions(-) create mode 100644 lapack/trtrs/trtrs_parallel.c create mode 100644 lapack/trtrs/ztrtrs_parallel.c create mode 100644 lapack/trtrs/ztrtrs_single.c diff --git a/lapack/trtrs/Makefile b/lapack/trtrs/Makefile index 400b8b653..f9faaf9b9 100644 --- a/lapack/trtrs/Makefile +++ b/lapack/trtrs/Makefile @@ -305,4 +305,148 @@ xtrtrs_LCU_single.$(SUFFIX) : xtrtrs_single.c xtrtrs_LCN_single.$(SUFFIX) : xtrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) +ctrtrs_UNU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +ctrtrs_UNN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +ctrtrs_UTU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +ctrtrs_UTN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +ctrtrs_URU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +ctrtrs_URN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +ctrtrs_UCU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +ctrtrs_UCN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +ctrtrs_LNU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +ctrtrs_LNN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +ctrtrs_LTU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +ctrtrs_LTN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +ctrtrs_LRU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +ctrtrs_LRN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +ctrtrs_LCU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +ctrtrs_LCN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +ztrtrs_UNU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +ztrtrs_UNN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +ztrtrs_UTU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +ztrtrs_UTN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +ztrtrs_URU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +ztrtrs_URN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +ztrtrs_UCU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +ztrtrs_UCN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +ztrtrs_LNU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +ztrtrs_LNN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +ztrtrs_LTU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +ztrtrs_LTN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +ztrtrs_LRU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +ztrtrs_LRN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +ztrtrs_LCU_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +ztrtrs_LCN_parallel.$(SUFFIX) : ztrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +xtrtrs_UNU_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +xtrtrs_UNN_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +xtrtrs_UTU_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +xtrtrs_UTN_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +xtrtrs_URU_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +xtrtrs_URN_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +xtrtrs_UCU_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +xtrtrs_UCN_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=4 -DDIAG $< -o $(@F) + +xtrtrs_LNU_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=1 -UDIAG $< -o $(@F) + +xtrtrs_LNN_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=1 -DDIAG $< -o $(@F) + +xtrtrs_LTU_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=2 -UDIAG $< -o $(@F) + +xtrtrs_LTN_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=2 -DDIAG $< -o $(@F) + +xtrtrs_LRU_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=3 -UDIAG $< -o $(@F) + +xtrtrs_LRN_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=3 -DDIAG $< -o $(@F) + +xtrtrs_LCU_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=4 -UDIAG $< -o $(@F) + +xtrtrs_LCN_parallel.$(SUFFIX) : xtrtrs_parallel.c + $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) + include ../../Makefile.tail diff --git a/lapack/trtrs/trtrs_parallel.c b/lapack/trtrs/trtrs_parallel.c new file mode 100644 index 000000000..52f42f693 --- /dev/null +++ b/lapack/trtrs/trtrs_parallel.c @@ -0,0 +1,111 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" + +#if !defined(TRANS) && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNUU +#define TRSV TRSV_NUU +#elif !defined(TRANS) && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNUN +#define TRSV TRSV_NUN +#elif !defined(TRANS) && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNLU +#define TRSV TRSV_NLU +#elif !defined(TRANS) && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNLN +#define TRSV TRSV_NLN +#elif defined(TRANS) && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTUU +#define TRSV TRSV_TUU +#elif defined(TRANS) && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTUN +#define TRSV TRSV_TUN +#elif defined(TRANS) && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTLU +#define TRSV TRSV_TLU +#elif defined(TRANS) && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTLN +#define TRSV TRSV_TLN +#endif + +static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, + FLOAT *sa, FLOAT *sb, BLASLONG mypos) { + + TRSM (args, range_m, range_n, sa, sb, 0); + + return 0; +} + +blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos) { + + int mode; + +#ifndef TRANS + if (args -> n == 1){ + TRSV (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { +#ifdef XDOUBLE + mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + mode = BLAS_DOUBLE | BLAS_REAL; +#else + mode = BLAS_SINGLE | BLAS_REAL; +#endif + + gemm_thread_n(mode, args, NULL, NULL, inner_thread, sa, sb, args -> nthreads); + } +#else + if (args -> n == 1){ + TRSV (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { +#ifdef XDOUBLE + mode = BLAS_XDOUBLE | BLAS_REAL | (1 << BLAS_TRANSA_SHIFT); +#elif defined(DOUBLE) + mode = BLAS_DOUBLE | BLAS_REAL | (1 << BLAS_TRANSA_SHIFT); +#else + mode = BLAS_SINGLE | BLAS_REAL | (1 << BLAS_TRANSA_SHIFT); +#endif + + gemm_thread_n(mode, args, NULL, NULL, inner_thread, sa, sb, args -> nthreads); + } +#endif + + return 0; + } diff --git a/lapack/trtrs/trtrs_single.c b/lapack/trtrs/trtrs_single.c index a690d4a25..c82b81303 100644 --- a/lapack/trtrs/trtrs_single.c +++ b/lapack/trtrs/trtrs_single.c @@ -39,65 +39,37 @@ #include #include "common.h" +#if !defined(TRANS) && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNUU +#define TRSV TRSV_NUU +#elif !defined(TRANS) && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNUN +#define TRSV TRSV_NUN +#elif !defined(TRANS) && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNLU +#define TRSV TRSV_NLU +#elif !defined(TRANS) && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNLN +#define TRSV TRSV_NLN +#elif defined(TRANS) && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTUU +#define TRSV TRSV_TUU +#elif defined(TRANS) && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTUN +#define TRSV TRSV_TUN +#elif defined(TRANS) && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTLU +#define TRSV TRSV_TLU +#elif defined(TRANS) && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTLN +#define TRSV TRSV_TLN +#endif + blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos) { -#ifndef UPLO -#ifndef DIAG -#ifndef DIAG if (args -> n == 1){ - TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); + TRSV (args -> m, args -> a, args -> lda, args -> b, 1, sb); } else { - TRSM_LNLU (args, range_m, range_n, sa, sb, 0); + TRSM (args, range_m, range_n, sa, sb, 0); } -#else - if (args -> n == 1){ - TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); - } else { - TRSM_LNLU (args, range_m, range_n, sa, sb, 0); - } -#endif -#else -#ifndef DIAG - if (args -> n == 1){ - TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); - } else { - TRSM_LNLU (args, range_m, range_n, sa, sb, 0); - } -#else - if (args -> n == 1){ - TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); - } else { - TRSM_LNLU (args, range_m, range_n, sa, sb, 0); - } -#endif -#else -#ifndef DIAG -#ifndef DIAG - if (args -> n == 1){ - TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); - } else { - TRSM_LNLU (args, range_m, range_n, sa, sb, 0); - } -#else - if (args -> n == 1){ - TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); - } else { - TRSM_LNLU (args, range_m, range_n, sa, sb, 0); - } -#endif -#else -#ifndef DIAG - if (args -> n == 1){ - TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); - } else { - TRSM_LNLU (args, range_m, range_n, sa, sb, 0); - } -#else - if (args -> n == 1){ - TRSV_NLU (args -> m, args -> a, args -> lda, args -> b, 1, sb); - } else { - TRSM_LNLU (args, range_m, range_n, sa, sb, 0); - } -#endif -#endif return 0; } diff --git a/lapack/trtrs/ztrtrs_parallel.c b/lapack/trtrs/ztrtrs_parallel.c new file mode 100644 index 000000000..d5248f21b --- /dev/null +++ b/lapack/trtrs/ztrtrs_parallel.c @@ -0,0 +1,118 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" + +#if TRANS == 1 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNUU +#define ZTRSV ZTRSV_NUU +#elif TRANS == 1 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNUN +#define ZTRSV ZTRSV_NUN +#elif TRANS == 1 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNLU +#define ZTRSV ZTRSV_NLU +#elif TRANS == 1 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNLN +#define ZTRSV ZTRSV_NLN +#elif TRANS == 2 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTUU +#define ZTRSV ZTRSV_TUU +#elif TRANS == 2 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTUN +#define ZTRSV ZTRSV_TUN +#elif TRANS == 2 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTLU +#define ZTRSV ZTRSV_TLU +#elif TRANS == 2 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTLN +#define ZTRSV ZTRSV_TLN +#elif TRANS == 3 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LRUU +#define ZTRSV ZTRSV_RUU +#elif TRANS == 3 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LRUN +#define ZTRSV ZTRSV_RUN +#elif TRANS == 3 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LRLU +#define ZTRSV ZTRSV_RLU +#elif TRANS == 3 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LRLN +#define ZTRSV ZTRSV_RLN +#elif TRANS == 4 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LCUU +#define ZTRSV ZTRSV_CUU +#elif TRANS == 4 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LCUN +#define ZTRSV ZTRSV_CUN +#elif TRANS == 4 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LCLU +#define ZTRSV ZTRSV_CLU +#elif TRANS == 4 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LCLN +#define ZTRSV ZTRSV_CLN +#endif + +static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, + FLOAT *sa, FLOAT *sb, BLASLONG mypos) { + + TRSM (args, range_m, range_n, sa, sb, 0); + return 0; +} + +blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos) { + + int mode; + + if (args -> n == 1){ + ZTRSV (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { +#ifdef XDOUBLE + mode = BLAS_XDOUBLE | BLAS_COMPLEX; +#elif defined(DOUBLE) + mode = BLAS_DOUBLE | BLAS_COMPLEX; +#else + mode = BLAS_SINGLE | BLAS_COMPLEX; +#endif + + gemm_thread_n(mode, args, NULL, NULL, inner_thread, sa, sb, args -> nthreads); + } + + return 0; + } diff --git a/lapack/trtrs/ztrtrs_single.c b/lapack/trtrs/ztrtrs_single.c new file mode 100644 index 000000000..f39d72900 --- /dev/null +++ b/lapack/trtrs/ztrtrs_single.c @@ -0,0 +1,98 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" + +#if TRANS == 1 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNUU +#define ZTRSV ZTRSV_NUU +#elif TRANS == 1 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNUN +#define ZTRSV ZTRSV_NUN +#elif TRANS == 1 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LNLU +#define ZTRSV ZTRSV_NLU +#elif TRANS == 1 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LNLN +#define ZTRSV ZTRSV_NLN +#elif TRANS == 2 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTUU +#define ZTRSV ZTRSV_TUU +#elif TRANS == 2 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTUN +#define ZTRSV ZTRSV_TUN +#elif TRANS == 2 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LTLU +#define ZTRSV ZTRSV_TLU +#elif TRANS == 2 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LTLN +#define ZTRSV ZTRSV_TLN +#elif TRANS == 3 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LRUU +#define ZTRSV ZTRSV_RUU +#elif TRANS == 3 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LRUN +#define ZTRSV ZTRSV_RUN +#elif TRANS == 3 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LRLU +#define ZTRSV ZTRSV_RLU +#elif TRANS == 3 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LRLN +#define ZTRSV ZTRSV_RLN +#elif TRANS == 4 && !defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LCUU +#define ZTRSV ZTRSV_CUU +#elif TRANS == 4 && !defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LCUN +#define ZTRSV ZTRSV_CUN +#elif TRANS == 4 && defined(UPLO) && !defined(DIAG) +#define TRSM TRSM_LCLU +#define ZTRSV ZTRSV_CLU +#elif TRANS == 4 && defined(UPLO) && defined(DIAG) +#define TRSM TRSM_LCLN +#define ZTRSV ZTRSV_CLN +#endif + +blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos) { + if (args -> n == 1){ + ZTRSV (args -> m, args -> a, args -> lda, args -> b, 1, sb); + } else { + TRSM (args, range_m, range_n, sa, sb, 0); + } + return 0; } From 9b2f0323d6ecc36b16038c6a806a4e3106245d5a Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Fri, 6 Sep 2019 16:01:55 -0400 Subject: [PATCH 09/19] update Makefile --- interface/Makefile | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/interface/Makefile b/interface/Makefile index f0577796d..2edf6387a 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -394,7 +394,7 @@ XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) SLAPACKOBJS = \ sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \ - slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) + slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) strtrs.$(SUFFIX) #DLAPACKOBJS = \ @@ -405,14 +405,14 @@ SLAPACKOBJS = \ DLAPACKOBJS = \ dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \ dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \ - dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) + dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dtrtrs.$(SUFFIX) QLAPACKOBJS = \ qgetf2.$(SUFFIX) qgetrf.$(SUFFIX) qlauu2.$(SUFFIX) qlauum.$(SUFFIX) \ qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \ - qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \ - + qlaswp.$(SUFFIX) qtrtrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \ + qtrtrs.$(SUFFIX) #CLAPACKOBJS = \ # cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ @@ -423,7 +423,7 @@ QLAPACKOBJS = \ CLAPACKOBJS = \ cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ - clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) + clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX) #ZLAPACKOBJS = \ @@ -435,13 +435,14 @@ CLAPACKOBJS = \ ZLAPACKOBJS = \ zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ - zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) + zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) ztrtrs.$(SUFFIX) XLAPACKOBJS = \ xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \ xpotf2.$(SUFFIX) xpotrf.$(SUFFIX) xtrti2.$(SUFFIX) xtrtri.$(SUFFIX) \ - xlaswp.$(SUFFIX) xgetrs.$(SUFFIX) xgesv.$(SUFFIX) xpotri.$(SUFFIX) \ + xlaswp.$(SUFFIX) xtrtrs.$(SUFFIX) xgesv.$(SUFFIX) xpotri.$(SUFFIX) \ + xtrtrs.$(SUFFIX) ifneq ($(NO_LAPACK), 1) SBLASOBJS += $(SLAPACKOBJS) @@ -2043,6 +2044,24 @@ zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : lapack/zgetrs.c xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : zgetrs.c $(CC) -c $(CFLAGS) $< -o $(@F) +strtrs.$(SUFFIX) strtrs.$(PSUFFIX) : lapack/trtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtrtrs.$(SUFFIX) dtrtrs.$(PSUFFIX) : lapack/trtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtrtrs.$(SUFFIX) qtrtrs.$(PSUFFIX) : trtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctrtrs.$(SUFFIX) ctrtrs.$(PSUFFIX) : lapack/ztrtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztrtrs.$(SUFFIX) ztrtrs.$(PSUFFIX) : lapack/ztrtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtrtrs.$(SUFFIX) xtrtrs.$(PSUFFIX) : ztrtrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : lapack/gesv.c $(CC) -c $(CFLAGS) $< -o $(@F) From c7b5a459b6191ceb5ad244b3ff2d059a9751375e Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Fri, 6 Sep 2019 16:48:18 -0400 Subject: [PATCH 10/19] add missing defines and headers --- common_lapack.h | 146 ++++++++++++++++++++++++++++++++++++++++++++++++ common_macro.h | 48 ++++++++++++++++ 2 files changed, 194 insertions(+) diff --git a/common_lapack.h b/common_lapack.h index f6d1956fc..f9c36646a 100644 --- a/common_lapack.h +++ b/common_lapack.h @@ -293,4 +293,150 @@ blasint zlarf_R(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLO blasint xlarf_L(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); blasint xlarf_R(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint strtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint dtrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint qtrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint ctrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_URU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_URN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LRU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LRN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ztrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_URU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_URN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LRU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LRN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint xtrtrs_UNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_URU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_URN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LNU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LNN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LTU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LTN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LRU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LRN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LCU_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LCN_single(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); + +blasint strtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint strtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint dtrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint dtrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint qtrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint qtrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint ctrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_URU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_URN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_UCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LRU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LRN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ctrtrs_LCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG); +blasint ztrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_URU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_URN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_UCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LRU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LRN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint ztrtrs_LCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG); +blasint xtrtrs_UNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_URU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_URN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_UCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LNU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LNN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LTU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LTN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LRU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LRN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LCU_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); +blasint xtrtrs_LCN_parallel(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); + #endif diff --git a/common_macro.h b/common_macro.h index e8a4a66ed..13bb85794 100644 --- a/common_macro.h +++ b/common_macro.h @@ -2867,51 +2867,99 @@ typedef struct { #define TRTRS_UNN_SINGLE xtrtrs_UNN_single #define TRTRS_UTU_SINGLE xtrtrs_UTU_single #define TRTRS_UTN_SINGLE xtrtrs_UTN_single +#define TRTRS_URU_SINGLE xtrtrs_URU_single +#define TRTRS_URN_SINGLE xtrtrs_URN_single +#define TRTRS_UCU_SINGLE xtrtrs_UCU_single +#define TRTRS_UCN_SINGLE xtrtrs_UCN_single #define TRTRS_LNU_SINGLE xtrtrs_LNU_single #define TRTRS_LNN_SINGLE xtrtrs_LNN_single #define TRTRS_LTU_SINGLE xtrtrs_LTU_single #define TRTRS_LTN_SINGLE xtrtrs_LTN_single +#define TRTRS_LRU_SINGLE xtrtrs_LRU_single +#define TRTRS_LRN_SINGLE xtrtrs_LRN_single +#define TRTRS_LCU_SINGLE xtrtrs_LCU_single +#define TRTRS_LCN_SINGLE xtrtrs_LCN_single #define TRTRS_UNU_PARALLEL xtrtrs_UNU_parallel #define TRTRS_UNN_PARALLEL xtrtrs_UNN_parallel #define TRTRS_UTU_PARALLEL xtrtrs_UTU_parallel #define TRTRS_UTN_PARALLEL xtrtrs_UTN_parallel +#define TRTRS_URU_PARALLEL xtrtrs_URU_parallel +#define TRTRS_URN_PARALLEL xtrtrs_URN_parallel +#define TRTRS_UCU_PARALLEL xtrtrs_UCU_parallel +#define TRTRS_UCN_PARALLEL xtrtrs_UCN_parallel #define TRTRS_LNU_PARALLEL xtrtrs_LNU_parallel #define TRTRS_LNN_PARALLEL xtrtrs_LNN_parallel #define TRTRS_LTU_PARALLEL xtrtrs_LTU_parallel #define TRTRS_LTN_PARALLEL xtrtrs_LTN_parallel +#define TRTRS_LRU_PARALLEL xtrtrs_LRU_parallel +#define TRTRS_LRN_PARALLEL xtrtrs_LRN_parallel +#define TRTRS_LCU_PARALLEL xtrtrs_LCU_parallel +#define TRTRS_LCN_PARALLEL xtrtrs_LCN_parallel #elif defined(DOUBLE) #define TRTRS_UNU_SINGLE ztrtrs_UNU_single #define TRTRS_UNN_SINGLE ztrtrs_UNN_single #define TRTRS_UTU_SINGLE ztrtrs_UTU_single #define TRTRS_UTN_SINGLE ztrtrs_UTN_single +#define TRTRS_URU_SINGLE ztrtrs_URU_single +#define TRTRS_URN_SINGLE ztrtrs_URN_single +#define TRTRS_UCU_SINGLE ztrtrs_UCU_single +#define TRTRS_UCN_SINGLE ztrtrs_UCN_single #define TRTRS_LNU_SINGLE ztrtrs_LNU_single #define TRTRS_LNN_SINGLE ztrtrs_LNN_single #define TRTRS_LTU_SINGLE ztrtrs_LTU_single #define TRTRS_LTN_SINGLE ztrtrs_LTN_single +#define TRTRS_LRU_SINGLE ztrtrs_LRU_single +#define TRTRS_LRN_SINGLE ztrtrs_LRN_single +#define TRTRS_LCU_SINGLE ztrtrs_LCU_single +#define TRTRS_LCN_SINGLE ztrtrs_LCN_single #define TRTRS_UNU_PARALLEL ztrtrs_UNU_parallel #define TRTRS_UNN_PARALLEL ztrtrs_UNN_parallel #define TRTRS_UTU_PARALLEL ztrtrs_UTU_parallel #define TRTRS_UTN_PARALLEL ztrtrs_UTN_parallel +#define TRTRS_URU_PARALLEL ztrtrs_URU_parallel +#define TRTRS_URN_PARALLEL ztrtrs_URN_parallel +#define TRTRS_UCU_PARALLEL ztrtrs_UCU_parallel +#define TRTRS_UCN_PARALLEL ztrtrs_UCN_parallel #define TRTRS_LNU_PARALLEL ztrtrs_LNU_parallel #define TRTRS_LNN_PARALLEL ztrtrs_LNN_parallel #define TRTRS_LTU_PARALLEL ztrtrs_LTU_parallel #define TRTRS_LTN_PARALLEL ztrtrs_LTN_parallel +#define TRTRS_LRU_PARALLEL ztrtrs_LRU_parallel +#define TRTRS_LRN_PARALLEL ztrtrs_LRN_parallel +#define TRTRS_LCU_PARALLEL ztrtrs_LCU_parallel +#define TRTRS_LCN_PARALLEL ztrtrs_LCN_parallel #else #define TRTRS_UNU_SINGLE ctrtrs_UNU_single #define TRTRS_UNN_SINGLE ctrtrs_UNN_single #define TRTRS_UTU_SINGLE ctrtrs_UTU_single #define TRTRS_UTN_SINGLE ctrtrs_UTN_single +#define TRTRS_URU_SINGLE ctrtrs_URU_single +#define TRTRS_URN_SINGLE ctrtrs_URN_single +#define TRTRS_UCU_SINGLE ctrtrs_UCU_single +#define TRTRS_UCN_SINGLE ctrtrs_UCN_single #define TRTRS_LNU_SINGLE ctrtrs_LNU_single #define TRTRS_LNN_SINGLE ctrtrs_LNN_single #define TRTRS_LTU_SINGLE ctrtrs_LTU_single #define TRTRS_LTN_SINGLE ctrtrs_LTN_single +#define TRTRS_LRU_SINGLE ctrtrs_LRU_single +#define TRTRS_LRN_SINGLE ctrtrs_LRN_single +#define TRTRS_LCU_SINGLE ctrtrs_LCU_single +#define TRTRS_LCN_SINGLE ctrtrs_LCN_single #define TRTRS_UNU_PARALLEL ctrtrs_UNU_parallel #define TRTRS_UNN_PARALLEL ctrtrs_UNN_parallel #define TRTRS_UTU_PARALLEL ctrtrs_UTU_parallel #define TRTRS_UTN_PARALLEL ctrtrs_UTN_parallel +#define TRTRS_URU_PARALLEL ctrtrs_URU_parallel +#define TRTRS_URN_PARALLEL ctrtrs_URN_parallel +#define TRTRS_UCU_PARALLEL ctrtrs_UCU_parallel +#define TRTRS_UCN_PARALLEL ctrtrs_UCN_parallel #define TRTRS_LNU_PARALLEL ctrtrs_LNU_parallel #define TRTRS_LNN_PARALLEL ctrtrs_LNN_parallel #define TRTRS_LTU_PARALLEL ctrtrs_LTU_parallel #define TRTRS_LTN_PARALLEL ctrtrs_LTN_parallel +#define TRTRS_LRU_PARALLEL ctrtrs_LRU_parallel +#define TRTRS_LRN_PARALLEL ctrtrs_LRN_parallel +#define TRTRS_LCU_PARALLEL ctrtrs_LCU_parallel +#define TRTRS_LCN_PARALLEL ctrtrs_LCN_parallel #endif #endif From af9ac0898af4357cf66d34215d9711df64f6e858 Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Fri, 6 Sep 2019 16:49:12 -0400 Subject: [PATCH 11/19] fix Makefile --- interface/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/interface/Makefile b/interface/Makefile index 2edf6387a..3f0dcca28 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -2032,7 +2032,7 @@ sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : lapack/getrs.c dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : lapack/getrs.c $(CC) -c $(CFLAGS) $< -o $(@F) -qgetrs.$(SUFFIX) qgetrs.$(PSUFFIX) : getrs.c +qgetrs.$(SUFFIX) qgetrs.$(PSUFFIX) : lapack/getrs.c $(CC) -c $(CFLAGS) $< -o $(@F) cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : lapack/zgetrs.c @@ -2041,7 +2041,7 @@ cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : lapack/zgetrs.c zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : lapack/zgetrs.c $(CC) -c $(CFLAGS) $< -o $(@F) -xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : zgetrs.c +xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : lapack/zgetrs.c $(CC) -c $(CFLAGS) $< -o $(@F) strtrs.$(SUFFIX) strtrs.$(PSUFFIX) : lapack/trtrs.c @@ -2050,7 +2050,7 @@ strtrs.$(SUFFIX) strtrs.$(PSUFFIX) : lapack/trtrs.c dtrtrs.$(SUFFIX) dtrtrs.$(PSUFFIX) : lapack/trtrs.c $(CC) -c $(CFLAGS) $< -o $(@F) -qtrtrs.$(SUFFIX) qtrtrs.$(PSUFFIX) : trtrs.c +qtrtrs.$(SUFFIX) qtrtrs.$(PSUFFIX) : lapack/trtrs.c $(CC) -c $(CFLAGS) $< -o $(@F) ctrtrs.$(SUFFIX) ctrtrs.$(PSUFFIX) : lapack/ztrtrs.c @@ -2059,7 +2059,7 @@ ctrtrs.$(SUFFIX) ctrtrs.$(PSUFFIX) : lapack/ztrtrs.c ztrtrs.$(SUFFIX) ztrtrs.$(PSUFFIX) : lapack/ztrtrs.c $(CC) -c $(CFLAGS) $< -o $(@F) -xtrtrs.$(SUFFIX) xtrtrs.$(PSUFFIX) : ztrtrs.c +xtrtrs.$(SUFFIX) xtrtrs.$(PSUFFIX) : lapack/ztrtrs.c $(CC) -c $(CFLAGS) $< -o $(@F) sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : lapack/gesv.c From 7ec7b999a543a5480db1f76dcb413deee6c50e2d Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Fri, 6 Sep 2019 16:49:27 -0400 Subject: [PATCH 12/19] add missing file --- interface/lapack/ztrtrs.c | 171 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 interface/lapack/ztrtrs.c diff --git a/interface/lapack/ztrtrs.c b/interface/lapack/ztrtrs.c new file mode 100644 index 000000000..4cd423069 --- /dev/null +++ b/interface/lapack/ztrtrs.c @@ -0,0 +1,171 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XTRTRS" +#elif defined(DOUBLE) +#define ERROR_NAME "ZTRTRS" +#else +#define ERROR_NAME "CTRTRS" +#endif + +static blasint (*trtrs_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + TRTRS_UNU_SINGLE, TRTRS_UNN_SINGLE, TRTRS_UTU_SINGLE, TRTRS_UTN_SINGLE, TRTRS_URU_SINGLE, TRTRS_URN_SINGLE, TRTRS_UCU_SINGLE, TRTRS_UCN_SINGLE, TRTRS_LNU_SINGLE, TRTRS_LNN_SINGLE, TRTRS_LTU_SINGLE, TRTRS_LTN_SINGLE, TRTRS_LRU_SINGLE, TRTRS_LRN_SINGLE, TRTRS_LCU_SINGLE, TRTRS_LCN_SINGLE, +}; + +#ifdef SMP +static blasint (*trtrs_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + TRTRS_UNU_PARALLEL, TRTRS_UNN_PARALLEL, TRTRS_UTU_PARALLEL, TRTRS_UTN_PARALLEL, TRTRS_URU_PARALLEL, TRTRS_URN_PARALLEL, TRTRS_UCU_PARALLEL, TRTRS_UCN_PARALLEL, TRTRS_LNU_PARALLEL, TRTRS_LNN_PARALLEL, TRTRS_LTU_PARALLEL, TRTRS_LTN_PARALLEL, TRTRS_LRU_PARALLEL, TRTRS_LRN_PARALLEL, TRTRS_LCU_PARALLEL, TRTRS_LCN_PARALLEL, +}; +#endif + +int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA, + FLOAT *b, blasint *ldB, blasint *Info){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blas_arg_t args; + + blasint info; + int uplo, trans, diag; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.m = *N; + args.n = *NRHS; + args.a = (void *)a; + args.lda = *ldA; + args.b = (void *)b; + args.ldb = *ldB; + + info = 0; + + TOUPPER(trans_arg); + trans = -1; + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 2; + if (trans_arg == 'C') trans = 3; + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + diag = -1; + if (diag_arg == 'U') diag = 0; + if (diag_arg == 'N') diag = 1; + + if (args.ldb < MAX(1, args.m)) info = 7; + if (args.lda < MAX(1, args.m)) info = 9; + if (args.n < 0) info = 5; + if (args.m < 0) info = 4; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + if (diag < 0) info = 3; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + args.alpha = NULL; + args.beta = NULL; + + *Info = 0; + + if (args.m == 0 || args.n == 0) return 0; + + if (diag) { + if (AMIN_K(args.n, args.a, args.lda + 1) == ZERO) { + *Info = IAMIN_K(args.n, args.a, args.lda + 1); + return 0; + } + } + + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + (trtrs_single[(uplo << 2) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + (trtrs_parallel[(uplo << 2) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2 * args.m * args.m * args.n); + + IDEBUG_END; + + return 0; + +} From 4b21b646ea49c9e00490bb29f978d1a91a4a1192 Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Fri, 6 Sep 2019 17:19:40 -0400 Subject: [PATCH 13/19] turn on optimized code --- lapack-netlib/SRC/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index 87a8f51e4..1c276aff6 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -507,22 +507,22 @@ ALL_AUX_OBJS = xerbla.o ../INSTALL/lsame.o SLAPACKOBJS = \ sgetrf.o sgetrs.o spotrf.o sgetf2.o \ spotf2.o slaswp.o sgesv.o slauu2.o \ - slauum.o strti2.o strtri.o + slauum.o strti2.o strtri.o strtrs.o DLAPACKOBJS = \ dgetrf.o dgetrs.o dpotrf.o dgetf2.o \ dpotf2.o dlaswp.o dgesv.o dlauu2.o \ - dlauum.o dtrti2.o dtrtri.o + dlauum.o dtrti2.o dtrtri.o dtrtrs.o CLAPACKOBJS = \ cgetrf.o cgetrs.o cpotrf.o cgetf2.o \ cpotf2.o claswp.o cgesv.o clauu2.o \ - clauum.o ctrti2.o ctrtri.o + clauum.o ctrti2.o ctrtri.o ctrtrs.o ZLAPACKOBJS = \ zgetrf.o zgetrs.o zpotrf.o zgetf2.o \ zpotf2.o zlaswp.o zgesv.o zlauu2.o \ - zlauum.o ztrti2.o ztrtri.o + zlauum.o ztrti2.o ztrtri.o ztrtrs.o ALLAUX = $(filter-out $(ALL_AUX_OBJS),$(ALLAUX_O)) From 5997b6b491a7fb9b0184f4edfc1c76d16de3affa Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Sat, 7 Sep 2019 22:06:27 -0400 Subject: [PATCH 14/19] bugfix --- interface/lapack/ztrtrs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/interface/lapack/ztrtrs.c b/interface/lapack/ztrtrs.c index 4cd423069..0536fc5d3 100644 --- a/interface/lapack/ztrtrs.c +++ b/interface/lapack/ztrtrs.c @@ -150,11 +150,11 @@ int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT * if (args.nthreads == 1) { #endif - (trtrs_single[(uplo << 2) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + (trtrs_single[(uplo << 3) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0); #ifdef SMP } else { - (trtrs_parallel[(uplo << 2) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + (trtrs_parallel[(uplo << 3) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0); } #endif From f2becb777a9640225a6ef89b939fb8b0bdbbbc77 Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Mon, 9 Sep 2019 11:36:50 -0400 Subject: [PATCH 15/19] fix Makefile --- lapack/trtrs/Makefile | 64 +++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/lapack/trtrs/Makefile b/lapack/trtrs/Makefile index f9faaf9b9..587d94e3d 100644 --- a/lapack/trtrs/Makefile +++ b/lapack/trtrs/Makefile @@ -257,52 +257,52 @@ ztrtrs_LCU_single.$(SUFFIX) : ztrtrs_single.c ztrtrs_LCN_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) -xtrtrs_UNU_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_UNU_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=1 -UDIAG $< -o $(@F) -xtrtrs_UNN_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_UNN_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=1 -DDIAG $< -o $(@F) -xtrtrs_UTU_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_UTU_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=2 -UDIAG $< -o $(@F) -xtrtrs_UTN_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_UTN_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=2 -DDIAG $< -o $(@F) -xtrtrs_URU_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_URU_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=3 -UDIAG $< -o $(@F) -xtrtrs_URN_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_URN_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=3 -DDIAG $< -o $(@F) -xtrtrs_UCU_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_UCU_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=4 -UDIAG $< -o $(@F) -xtrtrs_UCN_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_UCN_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=4 -DDIAG $< -o $(@F) -xtrtrs_LNU_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_LNU_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=1 -UDIAG $< -o $(@F) -xtrtrs_LNN_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_LNN_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=1 -DDIAG $< -o $(@F) -xtrtrs_LTU_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_LTU_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=2 -UDIAG $< -o $(@F) -xtrtrs_LTN_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_LTN_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=2 -DDIAG $< -o $(@F) -xtrtrs_LRU_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_LRU_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=3 -UDIAG $< -o $(@F) -xtrtrs_LRN_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_LRN_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=3 -DDIAG $< -o $(@F) -xtrtrs_LCU_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_LCU_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=4 -UDIAG $< -o $(@F) -xtrtrs_LCN_single.$(SUFFIX) : xtrtrs_single.c +xtrtrs_LCN_single.$(SUFFIX) : ztrtrs_single.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) ctrtrs_UNU_parallel.$(SUFFIX) : ztrtrs_parallel.c @@ -401,52 +401,52 @@ ztrtrs_LCU_parallel.$(SUFFIX) : ztrtrs_parallel.c ztrtrs_LCN_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) -xtrtrs_UNU_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_UNU_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=1 -UDIAG $< -o $(@F) -xtrtrs_UNN_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_UNN_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=1 -DDIAG $< -o $(@F) -xtrtrs_UTU_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_UTU_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=2 -UDIAG $< -o $(@F) -xtrtrs_UTN_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_UTN_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=2 -DDIAG $< -o $(@F) -xtrtrs_URU_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_URU_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=3 -UDIAG $< -o $(@F) -xtrtrs_URN_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_URN_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=3 -DDIAG $< -o $(@F) -xtrtrs_UCU_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_UCU_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=4 -UDIAG $< -o $(@F) -xtrtrs_UCN_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_UCN_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UUPLO -DTRANS=4 -DDIAG $< -o $(@F) -xtrtrs_LNU_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_LNU_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=1 -UDIAG $< -o $(@F) -xtrtrs_LNN_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_LNN_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=1 -DDIAG $< -o $(@F) -xtrtrs_LTU_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_LTU_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=2 -UDIAG $< -o $(@F) -xtrtrs_LTN_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_LTN_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=2 -DDIAG $< -o $(@F) -xtrtrs_LRU_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_LRU_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=3 -UDIAG $< -o $(@F) -xtrtrs_LRN_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_LRN_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=3 -DDIAG $< -o $(@F) -xtrtrs_LCU_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_LCU_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=4 -UDIAG $< -o $(@F) -xtrtrs_LCN_parallel.$(SUFFIX) : xtrtrs_parallel.c +xtrtrs_LCN_parallel.$(SUFFIX) : ztrtrs_parallel.c $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DUPLO -DTRANS=4 -DDIAG $< -o $(@F) include ../../Makefile.tail From eb45eb6942b9aa35970f4f58ce268130891a79c5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 10 Sep 2019 08:27:06 +0200 Subject: [PATCH 16/19] Fix C compiler handling and BINARY=32 mode in CMAKE builds (#2248) * Fix compiler identification and option setting * Handle BINARY=32 option on X86_64 * Add xGEMM3M unroll parameters for crossbuild-target CORE2 * Replace bogus mingw64/32bit CI job with actual 32bit build mingw64 is not multilib-capable, so using an x86_64-mingw with BINARY=32 in the CI was not going to work anyway (but build passed while BINARY=32 was ignored). --- appveyor.yml | 7 ++++--- cmake/cc.cmake | 10 +++++----- cmake/prebuild.cmake | 4 ++++ cmake/system_check.cmake | 31 +++++++++++++++++++++++++++---- 4 files changed, 40 insertions(+), 12 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 2f9cc7b0b..1936059d5 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -38,7 +38,8 @@ environment: - COMPILER: MinGW64-gcc-7.2.0-mingw DYNAMIC_ARCH: OFF WITH_FORTRAN: ignore - - COMPILER: MinGW64-gcc-7.2.0 + - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 + COMPILER: MinGW-gcc-6.3.0-32 - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 COMPILER: MinGW-gcc-5.3.0 WITH_FORTRAN: ignore @@ -62,10 +63,10 @@ before_build: - set PATH=%PATH:C:\Program Files\Git\usr\bin;=% - if [%COMPILER%]==[MinGW-gcc-5.3.0] set PATH=C:\MinGW\bin;C:\msys64\usr\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH% - if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] set PATH=C:\MinGW\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH% - - if [%COMPILER%]==[MinGW64-gcc-7.2.0] set PATH=C:\msys64\usr\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH% + - if [%COMPILER%]==[MinGW-gcc-6.3.0-32] set PATH=C:\msys64\usr\bin;C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw64\bin;%PATH% - if [%COMPILER%]==[cl] cmake -G "Visual Studio 15 2017 Win64" .. - if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 .. - - if [%COMPILER%]==[MinGW64-gcc-7.2.0] cmake -G "MSYS Makefiles" -DBINARY=32 -DNOFORTRAN=1 .. + - if [%COMPILER%]==[MinGW-gcc-6.3.0-32] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 .. - if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 .. - if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON .. - if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 .. diff --git a/cmake/cc.cmake b/cmake/cc.cmake index 98f9298f8..37da0d6ed 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -3,7 +3,7 @@ ## Description: Ported from portion of OpenBLAS/Makefile.system ## Sets C related variables. -if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang") +if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB" OR ${CMAKE_C_COMPILER_ID} MATCHES "Clang") set(CCOMMON_OPT "${CCOMMON_OPT} -Wall") set(COMMON_PROF "${COMMON_PROF} -fno-inline") @@ -43,7 +43,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR endif () endif () -if (${CMAKE_C_COMPILER} STREQUAL "PGI") +if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI") if (BINARY64) set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64") else () @@ -51,7 +51,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "PGI") endif () endif () -if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") +if (${CMAKE_C_COMPILER_ID} STREQUAL "PATHSCALE") if (BINARY64) set(CCOMMON_OPT "${CCOMMON_OPT} -m64") else () @@ -59,7 +59,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") endif () endif () -if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") +if (${CMAKE_C_COMPILER_ID} STREQUAL "OPEN64") if (MIPS64) @@ -87,7 +87,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") endif () endif () -if (${CMAKE_C_COMPILER} STREQUAL "SUN") +if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN") set(CCOMMON_OPT "${CCOMMON_OPT} -w") if (X86) set(CCOMMON_OPT "${CCOMMON_OPT} -m32") diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index da185db5a..086df1943 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -133,6 +133,10 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS set(CGEMM_UNROLL_N 2) set(ZGEMM_UNROLL_M 2) set(ZGEMM_UNROLL_N 2) + set(CGEMM3M_UNROLL_M 8) + set(CGEMM3M_UNROLL_N 4) + set(ZGEMM3M_UNROLL_M 4) + set(ZGEMM3M_UNROLL_N 4) elseif ("${TCORE}" STREQUAL "ARMV7") file(APPEND ${TARGET_CONF_TEMP} "#define L1_DATA_SIZE\t65536\n" diff --git a/cmake/system_check.cmake b/cmake/system_check.cmake index 610f689e0..c4a553c5a 100644 --- a/cmake/system_check.cmake +++ b/cmake/system_check.cmake @@ -39,10 +39,18 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc.*|power.*|Power.*") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "mips64.*") set(MIPS64 1) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") - if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") - set(X86_64 1) + if (NOT BINARY) + if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + set(X86_64 1) + else() + set(X86 1) + endif() else() - set(X86 1) + if (${BINARY} EQUAL "64") + set(X86_64 1) + else () + set(X86 1) + endif() endif() elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*") set(X86 1) @@ -54,6 +62,22 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") else() set(ARM 1) endif() +elseif (${CMAKE_CROSSCOMPILING}) + if (${TARGET} STREQUAL "CORE2") + if (NOT BINARY) + set(X86 1) + elseif (${BINARY} EQUAL "64") + set(X86_64 1) + else () + set(X86 1) + endif() + elseif (${TARGET} STREQUAL "ARMV7") + set(ARM 1) + else() + set(ARM64 1) + endif () +else () + message(WARNING "Target ARCH could not be determined, got \"${CMAKE_SYSTEM_PROCESSOR}\"") endif() if (X86_64) @@ -92,4 +116,3 @@ set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512") endif() file(REMOVE "avx512.tmp" "avx512.o") endif() - From 459bb9291db0a9a97718cb312c77f8ea3dba7c60 Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Tue, 10 Sep 2019 17:10:33 -0400 Subject: [PATCH 17/19] fix error codes --- interface/lapack/trtrs.c | 6 +++--- interface/lapack/ztrtrs.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/interface/lapack/trtrs.c b/interface/lapack/trtrs.c index 261b07ec6..96dde1618 100644 --- a/interface/lapack/trtrs.c +++ b/interface/lapack/trtrs.c @@ -103,8 +103,8 @@ int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT * if (diag_arg == 'U') diag = 0; if (diag_arg == 'N') diag = 1; - if (args.ldb < MAX(1, args.m)) info = 7; - if (args.lda < MAX(1, args.m)) info = 9; + if (args.ldb < MAX(1, args.m)) info = 9; + if (args.lda < MAX(1, args.m)) info = 7; if (args.n < 0) info = 5; if (args.m < 0) info = 4; if (trans < 0) info = 2; @@ -112,7 +112,7 @@ int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT * if (diag < 0) info = 3; if (info != 0) { - BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME) - 1); *Info = - info; return 0; } diff --git a/interface/lapack/ztrtrs.c b/interface/lapack/ztrtrs.c index 0536fc5d3..4ee51435b 100644 --- a/interface/lapack/ztrtrs.c +++ b/interface/lapack/ztrtrs.c @@ -103,8 +103,8 @@ int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT * if (diag_arg == 'U') diag = 0; if (diag_arg == 'N') diag = 1; - if (args.ldb < MAX(1, args.m)) info = 7; - if (args.lda < MAX(1, args.m)) info = 9; + if (args.ldb < MAX(1, args.m)) info = 9; + if (args.lda < MAX(1, args.m)) info = 7; if (args.n < 0) info = 5; if (args.m < 0) info = 4; if (trans < 0) info = 2; @@ -112,7 +112,7 @@ int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT * if (diag < 0) info = 3; if (info != 0) { - BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME) - 1); *Info = - info; return 0; } From 6cb47ea3f0a6d4263cca3d2649b8512a6b53192d Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Tue, 10 Sep 2019 17:11:01 -0400 Subject: [PATCH 18/19] fix Makefile --- lapack/trtrs/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lapack/trtrs/Makefile b/lapack/trtrs/Makefile index 587d94e3d..a3b8f4322 100644 --- a/lapack/trtrs/Makefile +++ b/lapack/trtrs/Makefile @@ -36,7 +36,7 @@ strtrs_LNN_single.$(SUFFIX) : trtrs_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) strtrs_LTU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -DTRANS -UDIAG $< -o $(@F) strtrs_LTN_single.$(SUFFIX) : trtrs_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) @@ -60,7 +60,7 @@ strtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) strtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -DTRANS -UDIAG $< -o $(@F) strtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) @@ -84,7 +84,7 @@ dtrtrs_LNN_single.$(SUFFIX) : trtrs_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) dtrtrs_LTU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -DTRANS -UDIAG $< -o $(@F) dtrtrs_LTN_single.$(SUFFIX) : trtrs_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) @@ -108,7 +108,7 @@ dtrtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) dtrtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -DTRANS -UDIAG $< -o $(@F) dtrtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) @@ -132,7 +132,7 @@ qtrtrs_LNN_single.$(SUFFIX) : trtrs_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) qtrtrs_LTU_single.$(SUFFIX) : trtrs_single.c - $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -UDIAG $< -o $(@F) qtrtrs_LTN_single.$(SUFFIX) : trtrs_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) @@ -156,7 +156,7 @@ qtrtrs_LNN_parallel.$(SUFFIX) : trtrs_parallel.c $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -UTRANS -DDIAG $< -o $(@F) qtrtrs_LTU_parallel.$(SUFFIX) : trtrs_parallel.c - $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) + $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -UDIAG $< -o $(@F) qtrtrs_LTN_parallel.$(SUFFIX) : trtrs_parallel.c $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DUPLO -DTRANS -DDIAG $< -o $(@F) From 5d6525c87cfc6a7ba69e24eefb8b053d480bc97b Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Tue, 10 Sep 2019 17:30:57 -0400 Subject: [PATCH 19/19] more bugfix --- interface/lapack/trtrs.c | 6 +++--- interface/lapack/ztrtrs.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/interface/lapack/trtrs.c b/interface/lapack/trtrs.c index 96dde1618..54fbe8394 100644 --- a/interface/lapack/trtrs.c +++ b/interface/lapack/trtrs.c @@ -122,11 +122,11 @@ int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT * *Info = 0; - if (args.m == 0 || args.n == 0) return 0; + if (args.m == 0) return 0; if (diag) { - if (AMIN_K(args.n, args.a, args.lda + 1) == ZERO) { - *Info = IAMIN_K(args.n, args.a, args.lda + 1); + if (AMIN_K(args.m, args.a, args.lda + 1) == ZERO) { + *Info = IAMIN_K(args.m, args.a, args.lda + 1); return 0; } } diff --git a/interface/lapack/ztrtrs.c b/interface/lapack/ztrtrs.c index 4ee51435b..7f1bd9af4 100644 --- a/interface/lapack/ztrtrs.c +++ b/interface/lapack/ztrtrs.c @@ -122,11 +122,11 @@ int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT * *Info = 0; - if (args.m == 0 || args.n == 0) return 0; + if (args.m == 0) return 0; if (diag) { - if (AMIN_K(args.n, args.a, args.lda + 1) == ZERO) { - *Info = IAMIN_K(args.n, args.a, args.lda + 1); + if (AMIN_K(args.m, args.a, args.lda + 1) == ZERO) { + *Info = IAMIN_K(args.m, args.a, args.lda + 1); return 0; } }