commit
0d1f30a297
|
@ -17,6 +17,7 @@ endif
|
|||
ifeq ($(CORE), POWER9)
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CCOMMON_OPT += -Ofast -mvsx -fno-fast-math
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
ifneq ($(GCCVERSIONGT4), 1)
|
||||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
|
||||
CCOMMON_OPT += -mcpu=power8 -mtune=power8
|
||||
|
@ -24,10 +25,14 @@ else
|
|||
CCOMMON_OPT += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
|
||||
endif
|
||||
ifneq ($(F_COMPILER), PGI)
|
||||
FCOMMON_OPT += -O2 -frecursive -fno-fast-math
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
ifneq ($(GCCVERSIONGT4), 1)
|
||||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
|
||||
FCOMMON_OPT += -mcpu=power8 -mtune=power8
|
||||
|
@ -35,6 +40,9 @@ else
|
|||
FCOMMON_OPT += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -O2 -Mrecursive
|
||||
endif
|
||||
endif
|
||||
|
|
|
@ -526,7 +526,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
BLASLONG width, i, j, k;
|
||||
BLASLONG n, n_from, n_to;
|
||||
int mode, mask;
|
||||
double dnum;
|
||||
double dnum, di, dinum;
|
||||
|
||||
if ((nthreads == 1) || (args -> n < nthreads * SWITCH_RATIO)) {
|
||||
SYRK_LOCAL(args, range_m, range_n, sa, sb, 0);
|
||||
|
@ -601,9 +601,14 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
if (nthreads - num_cpu > 1) {
|
||||
|
||||
double di = (double)i;
|
||||
di = (double)i;
|
||||
|
||||
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1) );
|
||||
dinum = di * di + dnum;
|
||||
|
||||
if (dinum > 0)
|
||||
width = (((BLASLONG)((sqrt(dinum) - di) + mask)/(mask+1)) * (mask+1) );
|
||||
else
|
||||
width = (((BLASLONG)(- di + mask)/(mask+1)) * (mask+1) );
|
||||
|
||||
if (num_cpu == 0) width = n - (((n - width)/(mask+1)) * (mask+1) );
|
||||
|
||||
|
@ -643,10 +648,15 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
if (nthreads - num_cpu > 1) {
|
||||
|
||||
double di = (double)i;
|
||||
di = (double)i;
|
||||
|
||||
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1));
|
||||
dinum = di * di +dnum;
|
||||
|
||||
if (dinum > 0)
|
||||
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1));
|
||||
else
|
||||
width = (((BLASLONG)(- di + mask)/(mask+1)) * (mask+1));
|
||||
|
||||
if ((width > n - i) || (width < mask)) width = n - i;
|
||||
|
||||
} else {
|
||||
|
|
|
@ -2513,7 +2513,7 @@ void LAPACK_zgesvdq(
|
|||
lapack_complex_double* U, lapack_int const* ldu,
|
||||
lapack_complex_double* V, lapack_int const* ldv, lapack_int* numrank,
|
||||
lapack_int* iwork, lapack_int const* liwork,
|
||||
lapack_complex_float* cwork, lapack_int* lcwork,
|
||||
lapack_complex_double* cwork, lapack_int* lcwork,
|
||||
double* rwork, lapack_int const* lrwork,
|
||||
lapack_int* info );
|
||||
|
||||
|
@ -3650,45 +3650,58 @@ void LAPACK_zggrqf(
|
|||
lapack_int* info );
|
||||
|
||||
#define LAPACK_sggsvd LAPACK_GLOBAL(sggsvd,SGGSVD)
|
||||
lapack_int LAPACK_sggsvd( char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int* m, lapack_int* n, lapack_int* p,
|
||||
lapack_int* k, lapack_int* l, float* a,
|
||||
lapack_int* lda, float* b, lapack_int* ldb,
|
||||
float* alpha, float* beta, float* u, lapack_int* ldu,
|
||||
float* v, lapack_int* ldv, float* q, lapack_int* ldq,
|
||||
float* work, lapack_int* iwork, lapack_int* info );
|
||||
lapack_int LAPACK_sggsvd(
|
||||
char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int const* m, lapack_int const* n, lapack_int const* p,
|
||||
lapack_int* k, lapack_int* l,
|
||||
float* a, lapack_int const* lda,
|
||||
float* b, lapack_int const* ldb,
|
||||
float* alpha, float* beta,
|
||||
float* u, lapack_int const* ldu,
|
||||
float* v, lapack_int const* ldv,
|
||||
float* q, lapack_int const* ldq,
|
||||
float* work, lapack_int* iwork, lapack_int* info );
|
||||
|
||||
#define LAPACK_dggsvd LAPACK_GLOBAL(dggsvd,DGGSVD)
|
||||
lapack_int LAPACK_dggsvd( char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int* m, lapack_int* n, lapack_int* p,
|
||||
lapack_int* k, lapack_int* l, double* a,
|
||||
lapack_int* lda, double* b, lapack_int* ldb,
|
||||
double* alpha, double* beta, double* u,
|
||||
lapack_int* ldu, double* v, lapack_int* ldv, double* q,
|
||||
lapack_int* ldq, float* work, lapack_int* iwork, lapack_int* info );
|
||||
lapack_int LAPACK_dggsvd(
|
||||
char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int const* m, lapack_int const* n, lapack_int const* p,
|
||||
lapack_int* k, lapack_int* l,
|
||||
double* a, lapack_int const* lda,
|
||||
double* b, lapack_int const* ldb,
|
||||
double* alpha, double* beta,
|
||||
double* u, lapack_int const* ldu,
|
||||
double* v, lapack_int const* ldv,
|
||||
double* q, lapack_int const* ldq,
|
||||
double* work, lapack_int* iwork, lapack_int* info );
|
||||
|
||||
#define LAPACK_cggsvd LAPACK_GLOBAL(cggsvd,CGGSVD)
|
||||
lapack_int LAPACK_cggsvd( char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int* m, lapack_int* n, lapack_int* p,
|
||||
lapack_int* k, lapack_int* l,
|
||||
lapack_complex_float* a, lapack_int* lda,
|
||||
lapack_complex_float* b, lapack_int* ldb,
|
||||
float* alpha, float* beta, lapack_complex_float* u,
|
||||
lapack_int* ldu, lapack_complex_float* v,
|
||||
lapack_int* ldv, lapack_complex_float* q,
|
||||
lapack_int* ldq, float* work, lapack_int* rwork, lapack_int* iwork, lapack_int *info );
|
||||
lapack_int LAPACK_cggsvd(
|
||||
char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int const* m, lapack_int const* n, lapack_int const* p,
|
||||
lapack_int* k, lapack_int* l,
|
||||
lapack_complex_float* a, lapack_int const* lda,
|
||||
lapack_complex_float* b, lapack_int const* ldb,
|
||||
float* alpha, float* beta,
|
||||
lapack_complex_float* u, lapack_int const* ldu,
|
||||
lapack_complex_float* v, lapack_int const* ldv,
|
||||
lapack_complex_float* q, lapack_int const* ldq,
|
||||
lapack_complex_float* work, float* rwork,
|
||||
lapack_int* iwork, lapack_int* info );
|
||||
|
||||
#define LAPACK_zggsvd LAPACK_GLOBAL(zggsvd,ZGGSVD)
|
||||
lapack_int LAPACK_zggsvd( char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int* m, lapack_int* n, lapack_int* p,
|
||||
lapack_int* k, lapack_int* l,
|
||||
lapack_complex_double* a, lapack_int* lda,
|
||||
lapack_complex_double* b, lapack_int* ldb,
|
||||
double* alpha, double* beta,
|
||||
lapack_complex_double* u, lapack_int* ldu,
|
||||
lapack_complex_double* v, lapack_int* ldv,
|
||||
lapack_complex_double* q, lapack_int* ldq,
|
||||
float* work, lapack_int* rwork, lapack_int* iwork, lapack_int* info );
|
||||
lapack_int LAPACK_zggsvd(
|
||||
char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int const* m, lapack_int const* n, lapack_int const* p,
|
||||
lapack_int* k, lapack_int* l,
|
||||
lapack_complex_double* a, lapack_int const* lda,
|
||||
lapack_complex_double* b, lapack_int const* ldb,
|
||||
double* alpha, double* beta,
|
||||
lapack_complex_double* u, lapack_int const* ldu,
|
||||
lapack_complex_double* v, lapack_int const* ldv,
|
||||
lapack_complex_double* q, lapack_int const* ldq,
|
||||
lapack_complex_double* work, double* rwork,
|
||||
lapack_int* iwork, lapack_int* info );
|
||||
|
||||
#define LAPACK_cggsvd3 LAPACK_GLOBAL(cggsvd3,CGGSVD3)
|
||||
void LAPACK_cggsvd3(
|
||||
|
@ -3753,49 +3766,58 @@ void LAPACK_zggsvd3(
|
|||
lapack_int* info );
|
||||
|
||||
#define LAPACK_sggsvp LAPACK_GLOBAL(sggsvp,SGGSVP)
|
||||
lapack_int LAPACK_sggsvp( char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int* m, lapack_int* p, lapack_int* n, float* a,
|
||||
lapack_int* lda, float* b, lapack_int* ldb, float* tola,
|
||||
float* tolb, lapack_int* k, lapack_int* l, float* u,
|
||||
lapack_int* ldu, float* v, lapack_int* ldv, float* q,
|
||||
lapack_int* ldq, lapack_int* iwork, float* tau,
|
||||
float* work, lapack_int* info);
|
||||
lapack_int LAPACK_sggsvp(
|
||||
char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int const* m, lapack_int const* p, lapack_int const* n,
|
||||
float* a, lapack_int const* lda,
|
||||
float* b, lapack_int const* ldb,
|
||||
float* tola, float* tolb,
|
||||
lapack_int* k, lapack_int* l,
|
||||
float* u, lapack_int const* ldu,
|
||||
float* v, lapack_int const* ldv,
|
||||
float* q, lapack_int const* ldq,
|
||||
lapack_int* iwork, float* tau,
|
||||
float* work, lapack_int* info );
|
||||
|
||||
#define LAPACK_dggsvp LAPACK_GLOBAL(dggsvp,DGGSVP)
|
||||
lapack_int LAPACK_dggsvp( char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int* m, lapack_int* p, lapack_int* n, double* a,
|
||||
lapack_int* lda, double* b, lapack_int* ldb,
|
||||
double* tola, double* tolb, lapack_int* k,
|
||||
lapack_int* l, double* u, lapack_int* ldu, double* v,
|
||||
lapack_int* ldv, double* q, lapack_int* ldq,
|
||||
lapack_int* iwork, double* tau, double* work,
|
||||
lapack_int* info);
|
||||
lapack_int LAPACK_dggsvp(
|
||||
char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int const* m, lapack_int const* p, lapack_int const* n,
|
||||
double* a, lapack_int const* lda,
|
||||
double* b, lapack_int const* ldb,
|
||||
double* tola, double* tolb,
|
||||
lapack_int* k, lapack_int* l,
|
||||
double* u, lapack_int const* ldu,
|
||||
double* v, lapack_int const* ldv,
|
||||
double* q, lapack_int const* ldq,
|
||||
lapack_int* iwork, double* tau,
|
||||
double* work, lapack_int* info );
|
||||
|
||||
#define LAPACK_cggsvp LAPACK_GLOBAL(cggsvp,CGGSVP)
|
||||
lapack_int LAPACK_cggsvp( char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int* m, lapack_int* p, lapack_int* n,
|
||||
lapack_complex_float* a, lapack_int* lda,
|
||||
lapack_complex_float* b, lapack_int* ldb, float* tola,
|
||||
float* tolb, lapack_int* k, lapack_int* l,
|
||||
lapack_complex_float* u, lapack_int* ldu,
|
||||
lapack_complex_float* v, lapack_int* ldv,
|
||||
lapack_complex_float* q, lapack_int* ldq,
|
||||
lapack_int* iwork, lapack_int* rwork,
|
||||
lapack_complex_float* tau, lapack_complex_float* work,
|
||||
lapack_int* info);
|
||||
lapack_int LAPACK_cggsvp(
|
||||
char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int const* m, lapack_int const* p, lapack_int const* n,
|
||||
lapack_complex_float* a, lapack_int const* lda,
|
||||
lapack_complex_float* b, lapack_int const* ldb,
|
||||
float* tola, float* tolb, lapack_int* k, lapack_int* l,
|
||||
lapack_complex_float* u, lapack_int const* ldu,
|
||||
lapack_complex_float* v, lapack_int const* ldv,
|
||||
lapack_complex_float* q, lapack_int const* ldq,
|
||||
lapack_int* iwork, float* rwork, lapack_complex_float* tau,
|
||||
lapack_complex_float* work, lapack_int* info );
|
||||
|
||||
#define LAPACK_zggsvp LAPACK_GLOBAL(zggsvp,ZGGSVP)
|
||||
lapack_int LAPACK_zggsvp( char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int* m, lapack_int* p, lapack_int* n,
|
||||
lapack_complex_double* a, lapack_int* lda,
|
||||
lapack_complex_double* b, lapack_int* ldb,
|
||||
double* tola, double* tolb, lapack_int* k,
|
||||
lapack_int* l, lapack_complex_double* u,
|
||||
lapack_int* ldu, lapack_complex_double* v,
|
||||
lapack_int* ldv, lapack_complex_double* q,
|
||||
lapack_int* ldq, lapack_int* iwork, lapack_int* rwork,
|
||||
lapack_complex_double* tau, lapack_complex_double* work,
|
||||
lapack_int* info);
|
||||
lapack_int LAPACK_zggsvp(
|
||||
char const* jobu, char const* jobv, char const* jobq,
|
||||
lapack_int const* m, lapack_int const* p, lapack_int const* n,
|
||||
lapack_complex_double* a, lapack_int const* lda,
|
||||
lapack_complex_double* b, lapack_int const* ldb,
|
||||
double* tola, double* tolb, lapack_int* k, lapack_int* l,
|
||||
lapack_complex_double* u, lapack_int const* ldu,
|
||||
lapack_complex_double* v, lapack_int const* ldv,
|
||||
lapack_complex_double* q, lapack_int const* ldq,
|
||||
lapack_int* iwork, double* rwork, lapack_complex_double* tau,
|
||||
lapack_complex_double* work, lapack_int* info );
|
||||
|
||||
#define LAPACK_cggsvp3 LAPACK_GLOBAL(cggsvp3,CGGSVP3)
|
||||
void LAPACK_cggsvp3(
|
||||
|
|
|
@ -47,8 +47,8 @@ lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp,
|
|||
lapack_complex_float* cwork = NULL;
|
||||
lapack_complex_float cwork_query;
|
||||
lapack_int lrwork = -1;
|
||||
double* rwork = NULL;
|
||||
double rwork_query;
|
||||
float* rwork = NULL;
|
||||
float rwork_query;
|
||||
lapack_int i;
|
||||
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_cgesvdq", -1 );
|
||||
|
@ -84,7 +84,7 @@ lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp,
|
|||
info = LAPACK_WORK_MEMORY_ERROR;
|
||||
goto exit_level_0;
|
||||
}
|
||||
rwork = (double*)LAPACKE_malloc( sizeof(double) * lrwork );
|
||||
rwork = (float*)LAPACKE_malloc( sizeof(float) * lrwork );
|
||||
if( rwork == NULL ) {
|
||||
info = LAPACK_WORK_MEMORY_ERROR;
|
||||
goto exit_level_0;
|
||||
|
|
|
@ -680,8 +680,8 @@
|
|||
* the one from above. Compare it with D1 computed
|
||||
* using the DSBTRD.
|
||||
*
|
||||
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
|
||||
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
|
||||
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
|
||||
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
|
||||
CALL CLACPY( ' ', K+1, N, A, LDA, U, LDU )
|
||||
LH = MAX(1, 4*N)
|
||||
LW = LWORK - LH
|
||||
|
@ -753,8 +753,8 @@
|
|||
* the one from above. Compare it with D1 computed
|
||||
* using the DSBTRD.
|
||||
*
|
||||
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
|
||||
CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
|
||||
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
|
||||
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
|
||||
CALL CLACPY( ' ', K+1, N, A, LDA, U, LDU )
|
||||
LH = MAX(1, 4*N)
|
||||
LW = LWORK - LH
|
||||
|
|
|
@ -670,8 +670,8 @@
|
|||
* the one from above. Compare it with D1 computed
|
||||
* using the SSBTRD.
|
||||
*
|
||||
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
|
||||
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
|
||||
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, N )
|
||||
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, N )
|
||||
CALL SLACPY( ' ', K+1, N, A, LDA, U, LDU )
|
||||
LH = MAX(1, 4*N)
|
||||
LW = LWORK - LH
|
||||
|
|
|
@ -999,8 +999,8 @@
|
|||
* the one from above. Compare it with D1 computed
|
||||
* using the 1-stage.
|
||||
*
|
||||
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 )
|
||||
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 )
|
||||
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, N )
|
||||
CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, N )
|
||||
CALL SLACPY( "U", N, N, A, LDA, V, LDU )
|
||||
LH = MAX(1, 4*N)
|
||||
LW = LWORK - LH
|
||||
|
|
Loading…
Reference in New Issue