112 lines
3.2 KiB
C
112 lines
3.2 KiB
C
/***************************************************************************
|
|
* Copyright (c) 2013, The OpenBLAS Project
|
|
* All rights reserved.
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* 3. Neither the name of the OpenBLAS project nor the names of
|
|
* its contributors may be used to endorse or promote products
|
|
* derived from this software without specific prior written permission.
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
* *****************************************************************************/
|
|
|
|
/**************************************************************************************
|
|
* 2014/05/22 Saar
|
|
* TEST double precision unblocked : OK
|
|
* TEST double precision blocked : OK
|
|
* 2014/05/23
|
|
* TEST single precision blocked : OK
|
|
*
|
|
**************************************************************************************/
|
|
|
|
#include <stdio.h>
|
|
#include "common.h"
|
|
|
|
// static FLOAT dp1 = 1.;
|
|
// static FLOAT dm1 = -1.;
|
|
|
|
#ifdef UNIT
|
|
#define TRTI2 TRTI2_UU
|
|
#else
|
|
#define TRTI2 TRTI2_UN
|
|
#endif
|
|
|
|
#ifdef UNIT
|
|
#define TRMM TRMM_LNUU
|
|
#define TRSM TRSM_RNUU
|
|
#else
|
|
#define TRMM TRMM_LNUN
|
|
#define TRSM TRSM_RNUN
|
|
#endif
|
|
|
|
|
|
blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) {
|
|
|
|
BLASLONG j, n, lda;
|
|
FLOAT *a;
|
|
|
|
// BLASLONG info=0;
|
|
BLASLONG jb;
|
|
BLASLONG NB;
|
|
|
|
FLOAT beta_plus[2] = { ONE, ZERO};
|
|
FLOAT beta_minus[2] = {-ONE, ZERO};
|
|
|
|
n = args -> n;
|
|
|
|
NB = GEMM_Q;
|
|
|
|
if (n <= NB) {
|
|
TRTI2(args, NULL, range_n, sa, sb, 0);
|
|
return 0;
|
|
}
|
|
|
|
|
|
lda = args -> lda;
|
|
a = (FLOAT *) args -> a;
|
|
args -> ldb = lda;
|
|
args -> ldc = lda;
|
|
args -> alpha = NULL;
|
|
|
|
for (j = 0; j < n; j += NB)
|
|
{
|
|
jb = n - j;
|
|
if ( jb > NB ) jb = NB;
|
|
|
|
args -> n = jb;
|
|
args -> m = j;
|
|
|
|
args -> a = &a[0];
|
|
args -> b = &a[(j*lda) * COMPSIZE];
|
|
args -> beta = beta_plus;
|
|
|
|
TRMM(args, NULL, NULL, sa, sb, 0);
|
|
|
|
args -> a = &a[(j+j*lda) * COMPSIZE];
|
|
args -> beta = beta_minus;
|
|
|
|
TRSM(args, NULL, NULL, sa, sb, 0);
|
|
|
|
args -> a = &a[(j+j*lda) * COMPSIZE];
|
|
|
|
TRTI2(args, NULL, range_n, sa, sb, 0);
|
|
|
|
}
|
|
return 0;
|
|
}
|