Merge pull request #16 from xianyi/develop

rebase
This commit is contained in:
Martin Kroeker 2021-03-11 11:48:37 +01:00 committed by GitHub
commit b1215f2f8c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 790 additions and 94 deletions

View File

@ -904,8 +904,8 @@ CCOMMON_OPT += -DF_INTERFACE_FLANG
FCOMMON_OPT += -Mrecursive -Kieee FCOMMON_OPT += -Mrecursive -Kieee
ifeq ($(OSNAME), Linux) ifeq ($(OSNAME), Linux)
ifeq ($(ARCH), x86_64) ifeq ($(ARCH), x86_64)
FLANG_VENDOR := $(shell `$(FC) --version|cut -f 1 -d "."|head -1`) FLANG_VENDOR := $(shell $(FC) --version|head -1 |cut -f 1 -d " ")
ifeq ($(FLANG_VENDOR),AOCC) ifeq ($(FLANG_VENDOR), AMD)
FCOMMON_OPT += -fno-unroll-loops FCOMMON_OPT += -fno-unroll-loops
endif endif
endif endif

View File

@ -74,6 +74,9 @@ static void *huge_malloc(BLASLONG size){
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
struct timeval start, stop; struct timeval start, stop;
#elif defined(__APPLE__)
mach_timebase_info_data_t info;
uint64_t start = 0, stop = 0;
#else #else
struct timespec start = { 0, 0 }, stop = { 0, 0 }; struct timespec start = { 0, 0 }, stop = { 0, 0 };
#endif #endif
@ -82,6 +85,9 @@ double getsec()
{ {
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
#elif defined(__APPLE__)
mach_timebase_info(&info);
return (double)(((stop - start) * info.numer)/info.denom) * 1.e-9;
#else #else
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9; return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
#endif #endif
@ -90,6 +96,8 @@ double getsec()
void begin() { void begin() {
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
gettimeofday( &start, (struct timezone *)0); gettimeofday( &start, (struct timezone *)0);
#elif defined(__APPLE__)
start = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
#else #else
clock_gettime(CLOCK_REALTIME, &start); clock_gettime(CLOCK_REALTIME, &start);
#endif #endif
@ -98,7 +106,9 @@ void begin() {
void end() { void end() {
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
gettimeofday( &stop, (struct timezone *)0); gettimeofday( &stop, (struct timezone *)0);
#elif defined(__APPLE__)
stop = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
#else #else
clock_gettime(CLOCK_REALTIME, &stop); clock_gettime(CLOCK_REALTIME, &stop);
#endif #endif
} }

View File

@ -330,7 +330,7 @@ if ($link ne "") {
$flags =~ s/\@/\,/g; $flags =~ s/\@/\,/g;
$linker_L .= "-Wl,". $flags . " " ; $linker_L .= "-Wl,". $flags . " " ;
} }
if ($flags =~ /-lgomp/ && $CC =~ /clang/) { if ($flags =~ /-lgomp/ && $ENV{"CC"} =~ /clang/) {
$flags = "-lomp"; $flags = "-lomp";
} }

View File

@ -46,9 +46,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#if defined(__VEC__) || defined(__ALTIVEC__) #if defined(__VEC__) || defined(__ALTIVEC__)
#if defined(POWER8) || defined(POWER9)
#include "dasum_microk_power8.c" #include "dasum_microk_power8.c"
#elif defined(POWER10)
#include "dasum_microk_power10.c"
#endif #endif
#endif #endif
@ -110,6 +112,21 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
if ( inc_x == 1 ) if ( inc_x == 1 )
{ {
#if defined(POWER10)
if ( n >= 16 )
{
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3;
for (i = 0; i < align; i++) {
sumf += ABS(x[i]);
}
}
n1 = (n-i) & -16;
if ( n1 > 0 )
{
sumf += dasum_kernel_16(n1, &x[i]);
i+=n1;
}
#else
n1 = n & -16; n1 = n & -16;
if ( n1 > 0 ) if ( n1 > 0 )
{ {
@ -117,6 +134,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
sumf = dasum_kernel_16(n1, x); sumf = dasum_kernel_16(n1, x);
i=n1; i=n1;
} }
#endif
while(i < n) while(i < n)
{ {

View File

@ -0,0 +1,152 @@
/***************************************************************************
Copyright (c) 2021, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#define HAVE_KERNEL_16 1
static double dasum_kernel_16 (long n, double *x)
{
double sum;
__vector double t0;
__vector double t1;
__vector double t2;
__vector double t3;
__asm__
(
"dcbt 0, %2 \n\t"
"xxlxor 32, 32, 32 \n\t"
"xxlxor 33, 33, 33 \n\t"
"xxlxor 34, 34, 34 \n\t"
"xxlxor 35, 35, 35 \n\t"
"xxlxor 36, 36, 36 \n\t"
"xxlxor 37, 37, 37 \n\t"
"xxlxor 38, 38, 38 \n\t"
"xxlxor 39, 39, 39 \n\t"
"lxvp 40, 0(%2) \n\t"
"lxvp 42, 32(%2) \n\t"
"lxvp 44, 64(%2) \n\t"
"lxvp 46, 96(%2) \n\t"
"addi %2, %2, 128 \n\t"
"addic. %1, %1, -16 \n\t"
"ble two%= \n\t"
".align 5 \n"
"one%=: \n\t"
"xvabsdp 48, 40 \n\t"
"xvabsdp 49, 41 \n\t"
"xvabsdp 50, 42 \n\t"
"xvabsdp 51, 43 \n\t"
"lxvp 40, 0(%2) \n\t"
"xvabsdp %x3, 44 \n\t"
"xvabsdp %x4, 45 \n\t"
"lxvp 42, 32(%2) \n\t"
"xvabsdp %x5, 46 \n\t"
"xvabsdp %x6, 47 \n\t"
"lxvp 44, 64(%2) \n\t"
"xvadddp 32, 32, 48 \n\t"
"xvadddp 33, 33, 49 \n\t"
"lxvp 46, 96(%2) \n\t"
"xvadddp 34, 34, 50 \n\t"
"xvadddp 35, 35, 51 \n\t"
"addi %2, %2, 128 \n\t"
"xvadddp 36, 36, %x3 \n\t"
"xvadddp 37, 37, %x4 \n\t"
"addic. %1, %1, -16 \n\t"
"xvadddp 38, 38, %x5 \n\t"
"xvadddp 39, 39, %x6 \n\t"
"bgt one%= \n"
"two%=: \n\t"
"xvabsdp 48, 40 \n\t"
"xvabsdp 49, 41 \n\t"
"xvabsdp 50, 42 \n\t"
"xvabsdp 51, 43 \n\t"
"xvabsdp %x3, 44 \n\t"
"xvabsdp %x4, 45 \n\t"
"xvabsdp %x5, 46 \n\t"
"xvabsdp %x6, 47 \n\t"
"xvadddp 32, 32, 48 \n\t"
"xvadddp 33, 33, 49 \n\t"
"xvadddp 34, 34, 50 \n\t"
"xvadddp 35, 35, 51 \n\t"
"xvadddp 36, 36, %x3 \n\t"
"xvadddp 37, 37, %x4 \n\t"
"xvadddp 38, 38, %x5 \n\t"
"xvadddp 39, 39, %x6 \n\t"
"xvadddp 32, 32, 33 \n\t"
"xvadddp 34, 34, 35 \n\t"
"xvadddp 36, 36, 37 \n\t"
"xvadddp 38, 38, 39 \n\t"
"xvadddp 32, 32, 34 \n\t"
"xvadddp 36, 36, 38 \n\t"
"xvadddp 32, 32, 36 \n\t"
XXSWAPD_S(33,32)
"xsadddp %x0, 32, 33 \n"
"#n=%1 x=%3=%2 sum=%0\n"
"#t0=%x3 t1=%x4 t2=%x5 t3=%x6"
:
"=d" (sum), // 0
"+r" (n), // 1
"+b" (x), // 2
"=wa" (t0), // 3
"=wa" (t1), // 4
"=wa" (t2), // 5
"=wa" (t3) // 6
:
"m" (*x)
:
"cr0",
"vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39",
"vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47",
"vs48","vs49","vs50","vs51"
);
return sum;
}

View File

@ -29,7 +29,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
typedef __vector unsigned char vec_t; typedef __vector unsigned char vec_t;
typedef FLOAT v4sf_t __attribute__ ((vector_size (16))); typedef FLOAT v4sf_t __attribute__ ((vector_size (16)));
typedef FLOAT v2sf_t __attribute__ ((vector_size (8))); #if !__has_builtin(__builtin_vsx_assemble_pair)
#define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair
#endif
#if !__has_builtin(__builtin_vsx_disassemble_pair)
#define __builtin_vsx_disassemble_pair __builtin_mma_disassemble_pair
#endif
#ifdef TRMMKERNEL #ifdef TRMMKERNEL
#define SAVE_ACC(ACC, J) \ #define SAVE_ACC(ACC, J) \
@ -186,8 +192,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
vec_t *rowA = (vec_t *) & AO[0]; vec_t *rowA = (vec_t *) & AO[0];
vec_t *rb = (vec_t *) & BO[0]; vec_t *rb = (vec_t *) & BO[0];
__vector_pair rowB, rowB1; __vector_pair rowB, rowB1;
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]); __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
__builtin_mma_xvf64ger (&acc2, rowB, rowA[1]); __builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
@ -200,8 +206,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
{ {
rowA = (vec_t *) & AO[l << 3]; rowA = (vec_t *) & AO[l << 3];
rb = (vec_t *) & BO[l << 3]; rb = (vec_t *) & BO[l << 3];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]); __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]); __builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]); __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
@ -242,8 +248,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
vec_t *rowA = (vec_t *) & AO[0]; vec_t *rowA = (vec_t *) & AO[0];
__vector_pair rowB, rowB1; __vector_pair rowB, rowB1;
vec_t *rb = (vec_t *) & BO[0]; vec_t *rb = (vec_t *) & BO[0];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]); __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
__builtin_mma_xvf64ger (&acc2, rowB, rowA[1]); __builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
@ -252,8 +258,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
{ {
rowA = (vec_t *) & AO[l << 2]; rowA = (vec_t *) & AO[l << 2];
rb = (vec_t *) & BO[l << 3]; rb = (vec_t *) & BO[l << 3];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]); __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]); __builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]); __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
@ -286,16 +292,16 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
vec_t *rowA = (vec_t *) & AO[0]; vec_t *rowA = (vec_t *) & AO[0];
__vector_pair rowB, rowB1; __vector_pair rowB, rowB1;
vec_t *rb = (vec_t *) & BO[0]; vec_t *rb = (vec_t *) & BO[0];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]); __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
for (l = 1; l < temp; l++) for (l = 1; l < temp; l++)
{ {
rowA = (vec_t *) & AO[l << 1]; rowA = (vec_t *) & AO[l << 1];
rb = (vec_t *) & BO[l << 3]; rb = (vec_t *) & BO[l << 3];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]); __builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]); __builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
} }
@ -398,7 +404,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
vec_t *rowA = (vec_t *) & AO[0]; vec_t *rowA = (vec_t *) & AO[0];
__vector_pair rowB; __vector_pair rowB;
vec_t *rb = (vec_t *) & BO[0]; vec_t *rb = (vec_t *) & BO[0];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]); __builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
__builtin_mma_xvf64ger (&acc2, rowB, rowA[2]); __builtin_mma_xvf64ger (&acc2, rowB, rowA[2]);
@ -407,7 +413,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
{ {
rowA = (vec_t *) & AO[l << 3]; rowA = (vec_t *) & AO[l << 3];
rb = (vec_t *) & BO[l << 2]; rb = (vec_t *) & BO[l << 2];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]); __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
@ -440,14 +446,14 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
vec_t *rowA = (vec_t *) & AO[0]; vec_t *rowA = (vec_t *) & AO[0];
__vector_pair rowB; __vector_pair rowB;
vec_t *rb = (vec_t *) & BO[0]; vec_t *rb = (vec_t *) & BO[0];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]); __builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
for (l = 1; l < temp; l++) for (l = 1; l < temp; l++)
{ {
rowA = (vec_t *) & AO[l << 2]; rowA = (vec_t *) & AO[l << 2];
rb = (vec_t *) & BO[l << 2]; rb = (vec_t *) & BO[l << 2];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
} }
@ -476,13 +482,13 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
vec_t *rowA = (vec_t *) & AO[0]; vec_t *rowA = (vec_t *) & AO[0];
__vector_pair rowB; __vector_pair rowB;
vec_t *rb = (vec_t *) & BO[0]; vec_t *rb = (vec_t *) & BO[0];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
for (l = 1; l < temp; l++) for (l = 1; l < temp; l++)
{ {
rowA = (vec_t *) & AO[l << 1]; rowA = (vec_t *) & AO[l << 1];
rb = (vec_t *) & BO[l << 2]; rb = (vec_t *) & BO[l << 2];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
} }
SAVE_ACC (&acc0, 0); SAVE_ACC (&acc0, 0);
@ -562,11 +568,9 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
v4sf_t result[4]; v4sf_t result[4];
__vector_quad acc0, acc1, acc2, acc3; __vector_quad acc0, acc1, acc2, acc3;
BLASLONG l = 0; BLASLONG l = 0;
FLOAT t[4] = { 0, 0, 0, 0 };
t[0] = BO[0], t[1] = BO[1];
__vector_pair rowB; __vector_pair rowB;
vec_t *rb = (vec_t *) & t[0]; vec_t *rb = (vec_t *) & BO[0];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
vec_t *rowA = (vec_t *) & AO[0]; vec_t *rowA = (vec_t *) & AO[0];
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]); __builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
@ -574,9 +578,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
__builtin_mma_xvf64ger (&acc3, rowB, rowA[3]); __builtin_mma_xvf64ger (&acc3, rowB, rowA[3]);
for (l = 1; l < temp; l++) for (l = 1; l < temp; l++)
{ {
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1]; rb = (vec_t *) & BO[l << 1];
rb = (vec_t *) & t[0]; __builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
rowA = (vec_t *) & AO[l << 3]; rowA = (vec_t *) & AO[l << 3];
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
@ -607,19 +610,16 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
v4sf_t result[4]; v4sf_t result[4];
__vector_quad acc0, acc1; __vector_quad acc0, acc1;
BLASLONG l = 0; BLASLONG l = 0;
FLOAT t[4] = { 0, 0, 0, 0 };
t[0] = BO[0], t[1] = BO[1];
__vector_pair rowB; __vector_pair rowB;
vec_t *rb = (vec_t *) & t[0]; vec_t *rb = (vec_t *) & BO[0];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
vec_t *rowA = (vec_t *) & AO[0]; vec_t *rowA = (vec_t *) & AO[0];
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]); __builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
for (l = 1; l < temp; l++) for (l = 1; l < temp; l++)
{ {
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1]; rb = (vec_t *) & BO[l << 1];
rb = (vec_t *) & t[0]; __builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
rowA = (vec_t *) & AO[l << 2]; rowA = (vec_t *) & AO[l << 2];
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
@ -646,18 +646,15 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
v4sf_t result[4]; v4sf_t result[4];
__vector_quad acc0; __vector_quad acc0;
BLASLONG l = 0; BLASLONG l = 0;
FLOAT t[4] = { 0, 0, 0, 0 };
t[0] = BO[0], t[1] = BO[1];
__vector_pair rowB; __vector_pair rowB;
vec_t *rb = (vec_t *) & t[0]; vec_t *rb = (vec_t *) & BO[0];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); __builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
vec_t *rowA = (vec_t *) & AO[0]; vec_t *rowA = (vec_t *) & AO[0];
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
for (l = 1; l < temp; l++) for (l = 1; l < temp; l++)
{ {
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1]; rb = (vec_t *) & BO[l << 1];
rb = (vec_t *) & t[0]; __builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
rowA = (vec_t *) & AO[l << 1]; rowA = (vec_t *) & AO[l << 1];
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
} }

View File

@ -46,9 +46,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#if defined(__VEC__) || defined(__ALTIVEC__) #if defined(__VEC__) || defined(__ALTIVEC__)
#if defined(POWER8) || defined(POWER9)
#include "sasum_microk_power8.c" #include "sasum_microk_power8.c"
#elif defined(POWER10)
#include "sasum_microk_power10.c"
#endif #endif
#endif #endif
@ -110,6 +112,21 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
if ( inc_x == 1 ) if ( inc_x == 1 )
{ {
#if defined(POWER10)
if ( n >= 32 )
{
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7;
for (i = 0; i < align; i++) {
sumf += ABS(x[i]);
}
}
n1 = (n-i) & -32;
if ( n1 > 0 )
{
sumf += sasum_kernel_32(n1, &x[i]);
i+=n1;
}
#else
n1 = n & -32; n1 = n & -32;
if ( n1 > 0 ) if ( n1 > 0 )
{ {
@ -117,6 +134,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
sumf = sasum_kernel_32(n1, x); sumf = sasum_kernel_32(n1, x);
i=n1; i=n1;
} }
#endif
while(i < n) while(i < n)
{ {

View File

@ -0,0 +1,153 @@
/***************************************************************************
Copyright (c) 2021, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#define HAVE_KERNEL_32 1
static float sasum_kernel_32 (long n, float *x)
{
float sum;
__vector float t0;
__vector float t1;
__vector float t2;
__vector float t3;
__asm__
(
"dcbt 0, %2 \n\t"
"xxlxor 32, 32, 32 \n\t"
"xxlxor 33, 33, 33 \n\t"
"xxlxor 34, 34, 34 \n\t"
"xxlxor 35, 35, 35 \n\t"
"xxlxor 36, 36, 36 \n\t"
"xxlxor 37, 37, 37 \n\t"
"xxlxor 38, 38, 38 \n\t"
"xxlxor 39, 39, 39 \n\t"
"lxvp 40, 0(%2) \n\t"
"lxvp 42, 32(%2) \n\t"
"lxvp 44, 64(%2) \n\t"
"lxvp 46, 96(%2) \n\t"
"addi %2, %2, 128 \n\t"
"addic. %1, %1, -32 \n\t"
"ble two%= \n\t"
".align 5 \n"
"one%=: \n\t"
"xvabssp 48, 40 \n\t"
"xvabssp 49, 41 \n\t"
"xvabssp 50, 42 \n\t"
"xvabssp 51, 43 \n\t"
"lxvp 40, 0(%2) \n\t"
"xvabssp %x3, 44 \n\t"
"xvabssp %x4, 45 \n\t"
"lxvp 42, 32(%2) \n\t"
"xvabssp %x5, 46 \n\t"
"xvabssp %x6, 47 \n\t"
"lxvp 44, 64(%2) \n\t"
"xvaddsp 32, 32, 48 \n\t"
"xvaddsp 33, 33, 49 \n\t"
"lxvp 46, 96(%2) \n\t"
"xvaddsp 34, 34, 50 \n\t"
"xvaddsp 35, 35, 51 \n\t"
"addi %2, %2, 128 \n\t"
"xvaddsp 36, 36, %x3 \n\t"
"xvaddsp 37, 37, %x4 \n\t"
"addic. %1, %1, -32 \n\t"
"xvaddsp 38, 38, %x5 \n\t"
"xvaddsp 39, 39, %x6 \n\t"
"bgt one%= \n"
"two%=: \n\t"
"xvabssp 48, 40 \n\t"
"xvabssp 49, 41 \n\t"
"xvabssp 50, 42 \n\t"
"xvabssp 51, 43 \n\t"
"xvabssp %x3, 44 \n\t"
"xvabssp %x4, 45 \n\t"
"xvabssp %x5, 46 \n\t"
"xvabssp %x6, 47 \n\t"
"xvaddsp 32, 32, 48 \n\t"
"xvaddsp 33, 33, 49 \n\t"
"xvaddsp 34, 34, 50 \n\t"
"xvaddsp 35, 35, 51 \n\t"
"xvaddsp 36, 36, %x3 \n\t"
"xvaddsp 37, 37, %x4 \n\t"
"xvaddsp 38, 38, %x5 \n\t"
"xvaddsp 39, 39, %x6 \n\t"
"xvaddsp 32, 32, 33 \n\t"
"xvaddsp 34, 34, 35 \n\t"
"xvaddsp 36, 36, 37 \n\t"
"xvaddsp 38, 38, 39 \n\t"
"xvaddsp 32, 32, 34 \n\t"
"xvaddsp 36, 36, 38 \n\t"
"xvaddsp 32, 32, 36 \n\t"
"xxsldwi 33, 32, 32, 2 \n\t"
"xvaddsp 32, 32, 33 \n\t"
"xxsldwi 33, 32, 32, 1 \n\t"
"xvaddsp 32, 32, 33 \n\t"
"xscvspdp %x0, 32 \n"
"#n=%1 x=%3=%2 sum=%0\n"
"#t0=%x3 t1=%x4 t2=%x5 t3=%x6"
:
"=f" (sum), // 0
"+r" (n), // 1
"+b" (x), // 2
"=wa" (t0), // 3
"=wa" (t1), // 4
"=wa" (t2), // 5
"=wa" (t3) // 6
:
"m" (*x)
:
"cr0",
"vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39",
"vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47",
"vs48","vs49","vs50","vs51"
);
return sum;
}

View File

@ -45,7 +45,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#elif defined(POWER10) #elif defined(POWER10)
#if defined(DOUBLE) #if defined(DOUBLE)
#include "zscal_microk_power8.c" #include "zscal_microk_power10.c"
#else #else
#include "cscal_microk_power10.c" #include "cscal_microk_power10.c"
#endif #endif

View File

@ -0,0 +1,195 @@
/***************************************************************************
Copyright (c) 2021, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#define HAVE_KERNEL_8 1
static void zscal_kernel_8 (long n, double *x, double alpha_r, double alpha_i)
{
__vector double t0;
__vector double t1;
__vector double t2;
__vector double t3;
__vector double t4;
__vector double t5;
__asm__
(
"dcbt 0, %2 \n\t"
"xsnegdp 33, %x10 \n\t" // -alpha_i
XXSPLTD_S(32,%x9,0) // alpha_r , alpha_r
XXMRGHD_S(33,%x10, 33) // -alpha_i , alpha_i
"lxvp 40, 0(%2) \n\t"
"lxvp 42, 32(%2) \n\t"
"lxvp 44, 64(%2) \n\t"
"lxvp 46, 96(%2) \n\t"
"addic. %1, %1, -8 \n\t"
"ble two%= \n\t"
".align 5 \n"
"one%=: \n\t"
"xvmuldp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r
"xvmuldp 49, 41, 32 \n\t"
"xvmuldp 50, 42, 32 \n\t"
"xvmuldp 51, 43, 32 \n\t"
"xvmuldp 34, 44, 32 \n\t"
"xvmuldp 35, 45, 32 \n\t"
"xvmuldp 36, 46, 32 \n\t"
"xvmuldp 37, 47, 32 \n\t"
XXSWAPD_S(38,40)
XXSWAPD_S(39,41)
XXSWAPD_S(%x3,42)
XXSWAPD_S(%x4,43)
XXSWAPD_S(%x5,44)
XXSWAPD_S(%x6,45)
XXSWAPD_S(%x7,46)
XXSWAPD_S(%x8,47)
"xvmuldp 38, 38, 33 \n\t" // x0_i * -alpha_i, x0_r * alpha_i
"xvmuldp 39, 39, 33 \n\t"
"xvmuldp %x3, %x3, 33 \n\t"
"xvmuldp %x4, %x4, 33 \n\t"
"lxvp 40, 128(%2) \n\t"
"lxvp 42, 160(%2) \n\t"
"xvmuldp %x5, %x5, 33 \n\t"
"xvmuldp %x6, %x6, 33 \n\t"
"xvmuldp %x7, %x7, 33 \n\t"
"xvmuldp %x8, %x8, 33 \n\t"
"lxvp 44, 192(%2) \n\t"
"lxvp 46, 224(%2) \n\t"
"xvadddp 48, 48, 38 \n\t"
"xvadddp 49, 49, 39 \n\t"
"xvadddp 50, 50, %x3 \n\t"
"xvadddp 51, 51, %x4 \n\t"
"stxv 49, 0(%2) \n\t"
"stxv 48, 16(%2) \n\t"
"stxv 51, 32(%2) \n\t"
"stxv 50, 48(%2) \n\t"
"xvadddp 34, 34, %x5 \n\t"
"xvadddp 35, 35, %x6 \n\t"
"xvadddp 36, 36, %x7 \n\t"
"xvadddp 37, 37, %x8 \n\t"
"stxv 35, 64(%2) \n\t"
"stxv 34, 80(%2) \n\t"
"stxv 37, 96(%2) \n\t"
"stxv 36, 112(%2) \n\t"
"addi %2, %2, 128 \n\t"
"addic. %1, %1, -8 \n\t"
"bgt one%= \n"
"two%=: \n\t"
"xvmuldp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r
"xvmuldp 49, 41, 32 \n\t"
"xvmuldp 50, 42, 32 \n\t"
"xvmuldp 51, 43, 32 \n\t"
"xvmuldp 34, 44, 32 \n\t"
"xvmuldp 35, 45, 32 \n\t"
"xvmuldp 36, 46, 32 \n\t"
"xvmuldp 37, 47, 32 \n\t"
XXSWAPD_S(38,40)
XXSWAPD_S(39,41)
XXSWAPD_S(%x3,42)
XXSWAPD_S(%x4,43)
XXSWAPD_S(%x5,44)
XXSWAPD_S(%x6,45)
XXSWAPD_S(%x7,46)
XXSWAPD_S(%x8,47)
"xvmuldp 38, 38, 33 \n\t" // x0_i * -alpha_i, x0_r * alpha_i
"xvmuldp 39, 39, 33 \n\t"
"xvmuldp %x3, %x3, 33 \n\t"
"xvmuldp %x4, %x4, 33 \n\t"
"xvmuldp %x5, %x5, 33 \n\t"
"xvmuldp %x6, %x6, 33 \n\t"
"xvmuldp %x7, %x7, 33 \n\t"
"xvmuldp %x8, %x8, 33 \n\t"
"xvadddp 48, 48, 38 \n\t"
"xvadddp 49, 49, 39 \n\t"
"xvadddp 50, 50, %x3 \n\t"
"xvadddp 51, 51, %x4 \n\t"
"stxv 49, 0(%2) \n\t"
"stxv 48, 16(%2) \n\t"
"stxv 51, 32(%2) \n\t"
"stxv 50, 48(%2) \n\t"
"xvadddp 34, 34, %x5 \n\t"
"xvadddp 35, 35, %x6 \n\t"
"xvadddp 36, 36, %x7 \n\t"
"xvadddp 37, 37, %x8 \n\t"
"stxv 35, 64(%2) \n\t"
"stxv 34, 80(%2) \n\t"
"stxv 37, 96(%2) \n\t"
"stxv 36, 112(%2) \n\t"
"#n=%1 x=%0=%2 alpha=(%9,%10) \n"
:
"+m" (*x),
"+r" (n), // 1
"+b" (x), // 2
"=wa" (t0), // 3
"=wa" (t1), // 4
"=wa" (t2), // 5
"=wa" (t3), // 6
"=wa" (t4), // 7
"=wa" (t5) // 8
:
"d" (alpha_r), // 9
"d" (alpha_i) // 10
:
"cr0",
"vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39",
"vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47",
"vs48","vs49","vs50","vs51"
);
}

View File

@ -1,5 +1,4 @@
/* need a new enough GCC for avx512 support */ #if defined(HAVE_FMA3) && defined(HAVE_AVX2)
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
#define HAVE_SROT_KERNEL 1 #define HAVE_SROT_KERNEL 1

View File

@ -25,7 +25,7 @@ set(AEIGTST
set(SCIGTST slafts.f slahd2.f slasum.f slatb9.f sstech.f sstect.f set(SCIGTST slafts.f slahd2.f slasum.f slatb9.f sstech.f sstect.f
ssvdch.f ssvdct.f ssxt1.f) ssvdch.f ssvdct.f ssxt1.f)
set(SEIGTST schkee.f set(SEIGTST schkee.F
sbdt01.f sbdt02.f sbdt03.f sbdt04.f sbdt05.f sbdt01.f sbdt02.f sbdt03.f sbdt04.f sbdt05.f
schkbb.f schkbd.f schkbk.f schkbl.f schkec.f schkbb.f schkbd.f schkbk.f schkbl.f schkec.f
schkgg.f schkgk.f schkgl.f schkhs.f schksb.f schkst.f schkst2stg.f schksb2stg.f schkgg.f schkgk.f schkgl.f schkhs.f schksb.f schkst.f schkst2stg.f schksb2stg.f
@ -42,7 +42,7 @@ set(SEIGTST schkee.f
sort03.f ssbt21.f ssgt01.f sslect.f sspt21.f sstt21.f sort03.f ssbt21.f ssgt01.f sslect.f sspt21.f sstt21.f
sstt22.f ssyt21.f ssyt22.f) sstt22.f ssyt21.f ssyt22.f)
set(CEIGTST cchkee.f set(CEIGTST cchkee.F
cbdt01.f cbdt02.f cbdt03.f cbdt05.f cbdt01.f cbdt02.f cbdt03.f cbdt05.f
cchkbb.f cchkbd.f cchkbk.f cchkbl.f cchkec.f cchkbb.f cchkbd.f cchkbk.f cchkbl.f cchkec.f
cchkgg.f cchkgk.f cchkgl.f cchkhb.f cchkhs.f cchkst.f cchkst2stg.f cchkhb2stg.f cchkgg.f cchkgk.f cchkgl.f cchkhb.f cchkhs.f cchkst.f cchkst2stg.f cchkhb2stg.f
@ -62,7 +62,7 @@ set(CEIGTST cchkee.f
set(DZIGTST dlafts.f dlahd2.f dlasum.f dlatb9.f dstech.f dstect.f set(DZIGTST dlafts.f dlahd2.f dlasum.f dlatb9.f dstech.f dstect.f
dsvdch.f dsvdct.f dsxt1.f) dsvdch.f dsvdct.f dsxt1.f)
set(DEIGTST dchkee.f set(DEIGTST dchkee.F
dbdt01.f dbdt02.f dbdt03.f dbdt04.f dbdt05.f dbdt01.f dbdt02.f dbdt03.f dbdt04.f dbdt05.f
dchkbb.f dchkbd.f dchkbk.f dchkbl.f dchkec.f dchkbb.f dchkbd.f dchkbk.f dchkbl.f dchkec.f
dchkgg.f dchkgk.f dchkgl.f dchkhs.f dchksb.f dchkst.f dchkst2stg.f dchksb2stg.f dchkgg.f dchkgk.f dchkgl.f dchkhs.f dchksb.f dchkst.f dchkst2stg.f dchksb2stg.f
@ -79,7 +79,7 @@ set(DEIGTST dchkee.f
dort03.f dsbt21.f dsgt01.f dslect.f dspt21.f dstt21.f dort03.f dsbt21.f dsgt01.f dslect.f dspt21.f dstt21.f
dstt22.f dsyt21.f dsyt22.f) dstt22.f dsyt21.f dsyt22.f)
set(ZEIGTST zchkee.f set(ZEIGTST zchkee.F
zbdt01.f zbdt02.f zbdt03.f zbdt05.f zbdt01.f zbdt02.f zbdt03.f zbdt05.f
zchkbb.f zchkbd.f zchkbk.f zchkbl.f zchkec.f zchkbb.f zchkbd.f zchkbk.f zchkbl.f zchkec.f
zchkgg.f zchkgk.f zchkgl.f zchkhb.f zchkhs.f zchkst.f zchkst2stg.f zchkhb2stg.f zchkgg.f zchkgk.f zchkgl.f zchkhb.f zchkhs.f zchkst.f zchkst2stg.f zchkhb2stg.f

View File

@ -157,11 +157,11 @@ cleanobj:
cleanexe: cleanexe:
rm -f xeigtst* rm -f xeigtst*
schkee.o: schkee.f schkee.o: schkee.F
$(FC) $(FFLAGS_DRV) -c -o $@ $< $(FC) $(FFLAGS_DRV) -c -o $@ $<
dchkee.o: dchkee.f dchkee.o: dchkee.F
$(FC) $(FFLAGS_DRV) -c -o $@ $< $(FC) $(FFLAGS_DRV) -c -o $@ $<
cchkee.o: cchkee.f cchkee.o: cchkee.F
$(FC) $(FFLAGS_DRV) -c -o $@ $< $(FC) $(FFLAGS_DRV) -c -o $@ $<
zchkee.o: zchkee.f zchkee.o: zchkee.F
$(FC) $(FFLAGS_DRV) -c -o $@ $< $(FC) $(FFLAGS_DRV) -c -o $@ $<

View File

@ -1034,6 +1034,10 @@
* ===================================================================== * =====================================================================
PROGRAM CCHKEE PROGRAM CCHKEE
* *
#if defined(_OPENMP)
use omp_lib
#endif
*
* -- LAPACK test routine (version 3.7.0) -- * -- LAPACK test routine (version 3.7.0) --
* -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
@ -1071,7 +1075,7 @@
CHARACTER*80 LINE CHARACTER*80 LINE
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD, INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
$ NK, NN, NPARMS, NRHS, NTYPES, $ NK, NN, NPARMS, NRHS, NTYPES,
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH $ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS
REAL EPS, S1, S2, THRESH, THRSHN REAL EPS, S1, S2, THRESH, THRSHN
* .. * ..
* .. Local Arrays .. * .. Local Arrays ..
@ -1084,12 +1088,16 @@
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ), INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
$ ISHFTS( MAXIN ), IACC22( MAXIN ) $ ISHFTS( MAXIN ), IACC22( MAXIN )
REAL ALPHA( NMAX ), BETA( NMAX ), DR( NMAX, 12 ), REAL ALPHA( NMAX ), BETA( NMAX ), DR( NMAX, 12 ),
$ RESULT( 500 ), RWORK( LWORK ), S( NMAX*NMAX ) $ RESULT( 500 )
COMPLEX A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ), COMPLEX DC( NMAX, 6 ), TAUA( NMAX ), TAUB( NMAX ),
$ C( NCMAX*NCMAX, NCMAX*NCMAX ), DC( NMAX, 6 ),
$ TAUA( NMAX ), TAUB( NMAX ), WORK( LWORK ),
$ X( 5*NMAX ) $ X( 5*NMAX )
* .. * ..
* .. Allocatable Arrays ..
INTEGER AllocateStatus
REAL, DIMENSION(:), ALLOCATABLE :: RWORK, S
COMPLEX, DIMENSION(:), ALLOCATABLE :: WORK
COMPLEX, DIMENSION(:,:), ALLOCATABLE :: A, B, C
* ..
* .. External Functions .. * .. External Functions ..
LOGICAL LSAMEN LOGICAL LSAMEN
REAL SECOND, SLAMCH REAL SECOND, SLAMCH
@ -1130,6 +1138,21 @@
DATA INTSTR / '0123456789' / DATA INTSTR / '0123456789' /
DATA IOLDSD / 0, 0, 0, 1 / DATA IOLDSD / 0, 0, 0, 1 /
* .. * ..
* .. Allocate memory dynamically ..
*
ALLOCATE ( S(NMAX*NMAX), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( RWORK(LWORK), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( WORK(LWORK), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
* ..
* .. Executable Statements .. * .. Executable Statements ..
* *
A = 0.0 A = 0.0
@ -1846,8 +1869,16 @@
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
CALL XLAENV( 1, 1 ) CALL XLAENV( 1, 1 )
CALL XLAENV( 9, 25 ) CALL XLAENV( 9, 25 )
IF( TSTERR ) IF( TSTERR ) THEN
$ CALL CERRST( 'CST', NOUT ) #if defined(_OPENMP)
N_THREADS = OMP_GET_NUM_THREADS()
CALL OMP_SET_NUM_THREADS(1)
#endif
CALL CERRST( 'CST', NOUT )
#if defined(_OPENMP)
CALL OMP_SET_NUM_THREADS(N_THREADS)
#endif
END IF
DO 290 I = 1, NPARMS DO 290 I = 1, NPARMS
CALL XLAENV( 1, NBVAL( I ) ) CALL XLAENV( 1, NBVAL( I ) )
CALL XLAENV( 2, NBMIN( I ) ) CALL XLAENV( 2, NBMIN( I ) )
@ -2305,8 +2336,16 @@
MAXTYP = 15 MAXTYP = 15
NTYPES = MIN( MAXTYP, NTYPES ) NTYPES = MIN( MAXTYP, NTYPES )
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
IF( TSTERR ) IF( TSTERR ) THEN
$ CALL CERRST( 'CHB', NOUT ) #if defined(_OPENMP)
N_THREADS = OMP_GET_NUM_THREADS()
CALL OMP_SET_NUM_THREADS(1)
#endif
CALL CERRST( 'CHB', NOUT )
#if defined(_OPENMP)
CALL OMP_SET_NUM_THREADS(N_THREADS)
#endif
END IF
* CALL CCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, * CALL CCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH,
* $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ), * $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ),
* $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, * $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT,
@ -2436,7 +2475,14 @@
380 CONTINUE 380 CONTINUE
WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9994 )
S2 = SECOND( ) S2 = SECOND( )
WRITE( NOUT, FMT = 9993 )S2 - S1 WRITE( NOUT, FMT = 9993 )S2 - S1
*
DEALLOCATE (S, STAT = AllocateStatus)
DEALLOCATE (A, STAT = AllocateStatus)
DEALLOCATE (B, STAT = AllocateStatus)
DEALLOCATE (C, STAT = AllocateStatus)
DEALLOCATE (RWORK, STAT = AllocateStatus)
DEALLOCATE (WORK, STAT = AllocateStatus)
* *
9999 FORMAT( / ' Execution not attempted due to input errors' ) 9999 FORMAT( / ' Execution not attempted due to input errors' )
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 ) 9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )

View File

@ -1038,7 +1038,11 @@
*> \ingroup double_eig *> \ingroup double_eig
* *
* ===================================================================== * =====================================================================
PROGRAM DCHKEE PROGRAM DCHKEE
*
#if defined(_OPENMP)
use omp_lib
#endif
* *
* -- LAPACK test routine (version 3.7.0) -- * -- LAPACK test routine (version 3.7.0) --
* -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- LAPACK is a software package provided by Univ. of Tennessee, --
@ -1077,7 +1081,7 @@
CHARACTER*80 LINE CHARACTER*80 LINE
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD, INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
$ NK, NN, NPARMS, NRHS, NTYPES, $ NK, NN, NPARMS, NRHS, NTYPES,
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH $ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS
DOUBLE PRECISION EPS, S1, S2, THRESH, THRSHN DOUBLE PRECISION EPS, S1, S2, THRESH, THRSHN
* .. * ..
* .. Local Arrays .. * .. Local Arrays ..
@ -1089,10 +1093,13 @@
$ PVAL( MAXIN ) $ PVAL( MAXIN )
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ), INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
$ ISHFTS( MAXIN ), IACC22( MAXIN ) $ ISHFTS( MAXIN ), IACC22( MAXIN )
DOUBLE PRECISION A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ), DOUBLE PRECISION D( NMAX, 12 ), RESULT( 500 ), TAUA( NMAX ),
$ C( NCMAX*NCMAX, NCMAX*NCMAX ), D( NMAX, 12 ), $ TAUB( NMAX ), X( 5*NMAX )
$ RESULT( 500 ), TAUA( NMAX ), TAUB( NMAX ), * ..
$ WORK( LWORK ), X( 5*NMAX ) * .. Allocatable Arrays ..
INTEGER AllocateStatus
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: WORK
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: A, B, C
* .. * ..
* .. External Functions .. * .. External Functions ..
LOGICAL LSAMEN LOGICAL LSAMEN
@ -1132,7 +1139,18 @@
* .. * ..
* .. Data statements .. * .. Data statements ..
DATA INTSTR / '0123456789' / DATA INTSTR / '0123456789' /
DATA IOLDSD / 0, 0, 0, 1 / DATA IOLDSD / 0, 0, 0, 1 /
* ..
* .. Allocate memory dynamically ..
*
ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( WORK(LWORK), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
* .. * ..
* .. Executable Statements .. * .. Executable Statements ..
* *
@ -1856,8 +1874,16 @@
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
CALL XLAENV( 1, 1 ) CALL XLAENV( 1, 1 )
CALL XLAENV( 9, 25 ) CALL XLAENV( 9, 25 )
IF( TSTERR ) IF( TSTERR ) THEN
$ CALL DERRST( 'DST', NOUT ) #if defined(_OPENMP)
N_THREADS = OMP_GET_NUM_THREADS()
CALL OMP_SET_NUM_THREADS(1)
#endif
CALL DERRST( 'DST', NOUT )
#if defined(_OPENMP)
CALL OMP_SET_NUM_THREADS(N_THREADS)
#endif
END IF
DO 290 I = 1, NPARMS DO 290 I = 1, NPARMS
CALL XLAENV( 1, NBVAL( I ) ) CALL XLAENV( 1, NBVAL( I ) )
CALL XLAENV( 2, NBMIN( I ) ) CALL XLAENV( 2, NBMIN( I ) )
@ -2436,7 +2462,12 @@
380 CONTINUE 380 CONTINUE
WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9994 )
S2 = DSECND( ) S2 = DSECND( )
WRITE( NOUT, FMT = 9993 )S2 - S1 WRITE( NOUT, FMT = 9993 )S2 - S1
*
DEALLOCATE (A, STAT = AllocateStatus)
DEALLOCATE (B, STAT = AllocateStatus)
DEALLOCATE (C, STAT = AllocateStatus)
DEALLOCATE (WORK, STAT = AllocateStatus)
* *
9999 FORMAT( / ' Execution not attempted due to input errors' ) 9999 FORMAT( / ' Execution not attempted due to input errors' )
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 ) 9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )

View File

@ -1040,6 +1040,10 @@
* ===================================================================== * =====================================================================
PROGRAM SCHKEE PROGRAM SCHKEE
* *
#if defined(_OPENMP)
use omp_lib
#endif
*
* -- LAPACK test routine (version 3.7.0) -- * -- LAPACK test routine (version 3.7.0) --
* -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
@ -1077,7 +1081,7 @@
CHARACTER*80 LINE CHARACTER*80 LINE
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD, INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
$ NK, NN, NPARMS, NRHS, NTYPES, $ NK, NN, NPARMS, NRHS, NTYPES,
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH $ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS
REAL EPS, S1, S2, THRESH, THRSHN REAL EPS, S1, S2, THRESH, THRSHN
* .. * ..
* .. Local Arrays .. * .. Local Arrays ..
@ -1089,10 +1093,13 @@
$ PVAL( MAXIN ) $ PVAL( MAXIN )
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ), INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
$ ISHFTS( MAXIN ), IACC22( MAXIN ) $ ISHFTS( MAXIN ), IACC22( MAXIN )
REAL A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ), REAL D( NMAX, 12 ), RESULT( 500 ), TAUA( NMAX ),
$ C( NCMAX*NCMAX, NCMAX*NCMAX ), D( NMAX, 12 ), $ TAUB( NMAX ), X( 5*NMAX )
$ RESULT( 500 ), TAUA( NMAX ), TAUB( NMAX ), * ..
$ WORK( LWORK ), X( 5*NMAX ) * .. Allocatable Arrays ..
INTEGER AllocateStatus
REAL, DIMENSION(:), ALLOCATABLE :: WORK
REAL, DIMENSION(:,:), ALLOCATABLE :: A, B, C
* .. * ..
* .. External Functions .. * .. External Functions ..
LOGICAL LSAMEN LOGICAL LSAMEN
@ -1132,7 +1139,18 @@
* .. * ..
* .. Data statements .. * .. Data statements ..
DATA INTSTR / '0123456789' / DATA INTSTR / '0123456789' /
DATA IOLDSD / 0, 0, 0, 1 / DATA IOLDSD / 0, 0, 0, 1 /
* ..
* .. Allocate memory dynamically ..
*
ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( WORK(LWORK), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
* .. * ..
* .. Executable Statements .. * .. Executable Statements ..
* *
@ -1857,8 +1875,16 @@
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
CALL XLAENV( 1, 1 ) CALL XLAENV( 1, 1 )
CALL XLAENV( 9, 25 ) CALL XLAENV( 9, 25 )
IF( TSTERR ) IF( TSTERR ) THEN
$ CALL SERRST( 'SST', NOUT ) #if defined(_OPENMP)
N_THREADS = OMP_GET_NUM_THREADS()
CALL OMP_SET_NUM_THREADS(1)
#endif
CALL SERRST( 'SST', NOUT )
#if defined(_OPENMP)
CALL OMP_SET_NUM_THREADS(N_THREADS)
#endif
END IF
DO 290 I = 1, NPARMS DO 290 I = 1, NPARMS
CALL XLAENV( 1, NBVAL( I ) ) CALL XLAENV( 1, NBVAL( I ) )
CALL XLAENV( 2, NBMIN( I ) ) CALL XLAENV( 2, NBMIN( I ) )
@ -2440,6 +2466,11 @@
WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9994 )
S2 = SECOND( ) S2 = SECOND( )
WRITE( NOUT, FMT = 9993 )S2 - S1 WRITE( NOUT, FMT = 9993 )S2 - S1
*
DEALLOCATE (A, STAT = AllocateStatus)
DEALLOCATE (B, STAT = AllocateStatus)
DEALLOCATE (C, STAT = AllocateStatus)
DEALLOCATE (WORK, STAT = AllocateStatus)
* *
9999 FORMAT( / ' Execution not attempted due to input errors' ) 9999 FORMAT( / ' Execution not attempted due to input errors' )
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 ) 9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )

View File

@ -1034,6 +1034,10 @@
* ===================================================================== * =====================================================================
PROGRAM ZCHKEE PROGRAM ZCHKEE
* *
#if defined(_OPENMP)
use omp_lib
#endif
*
* -- LAPACK test routine (version 3.7.0) -- * -- LAPACK test routine (version 3.7.0) --
* -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
@ -1071,7 +1075,7 @@
CHARACTER*80 LINE CHARACTER*80 LINE
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD, INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
$ NK, NN, NPARMS, NRHS, NTYPES, $ NK, NN, NPARMS, NRHS, NTYPES,
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH $ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS
DOUBLE PRECISION EPS, S1, S2, THRESH, THRSHN DOUBLE PRECISION EPS, S1, S2, THRESH, THRSHN
* .. * ..
* .. Local Arrays .. * .. Local Arrays ..
@ -1084,12 +1088,16 @@
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ), INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
$ ISHFTS( MAXIN ), IACC22( MAXIN ) $ ISHFTS( MAXIN ), IACC22( MAXIN )
DOUBLE PRECISION ALPHA( NMAX ), BETA( NMAX ), DR( NMAX, 12 ), DOUBLE PRECISION ALPHA( NMAX ), BETA( NMAX ), DR( NMAX, 12 ),
$ RESULT( 500 ), RWORK( LWORK ), S( NMAX*NMAX ) $ RESULT( 500 )
COMPLEX*16 A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ), COMPLEX*16 DC( NMAX, 6 ), TAUA( NMAX ), TAUB( NMAX ),
$ C( NCMAX*NCMAX, NCMAX*NCMAX ), DC( NMAX, 6 ),
$ TAUA( NMAX ), TAUB( NMAX ), WORK( LWORK ),
$ X( 5*NMAX ) $ X( 5*NMAX )
* .. * ..
* .. Allocatable Arrays ..
INTEGER AllocateStatus
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: RWORK, S
COMPLEX*16, DIMENSION(:), ALLOCATABLE :: WORK
COMPLEX*16, DIMENSION(:,:), ALLOCATABLE :: A, B, C
* ..
* .. External Functions .. * .. External Functions ..
LOGICAL LSAMEN LOGICAL LSAMEN
DOUBLE PRECISION DLAMCH, DSECND DOUBLE PRECISION DLAMCH, DSECND
@ -1130,6 +1138,21 @@
DATA INTSTR / '0123456789' / DATA INTSTR / '0123456789' /
DATA IOLDSD / 0, 0, 0, 1 / DATA IOLDSD / 0, 0, 0, 1 /
* .. * ..
* .. Allocate memory dynamically ..
*
ALLOCATE ( S(NMAX*NMAX), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( RWORK(LWORK), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
ALLOCATE ( WORK(LWORK), STAT = AllocateStatus )
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
* ..
* .. Executable Statements .. * .. Executable Statements ..
* *
A = 0.0 A = 0.0
@ -1846,8 +1869,16 @@
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
CALL XLAENV( 1, 1 ) CALL XLAENV( 1, 1 )
CALL XLAENV( 9, 25 ) CALL XLAENV( 9, 25 )
IF( TSTERR ) IF( TSTERR ) THEN
$ CALL ZERRST( 'ZST', NOUT ) #if defined(_OPENMP)
N_THREADS = OMP_GET_NUM_THREADS()
CALL OMP_SET_NUM_THREADS(1)
#endif
CALL ZERRST( 'ZST', NOUT )
#if defined(_OPENMP)
CALL OMP_SET_NUM_THREADS(N_THREADS)
#endif
END IF
DO 290 I = 1, NPARMS DO 290 I = 1, NPARMS
CALL XLAENV( 1, NBVAL( I ) ) CALL XLAENV( 1, NBVAL( I ) )
CALL XLAENV( 2, NBMIN( I ) ) CALL XLAENV( 2, NBMIN( I ) )
@ -2303,8 +2334,16 @@
MAXTYP = 15 MAXTYP = 15
NTYPES = MIN( MAXTYP, NTYPES ) NTYPES = MIN( MAXTYP, NTYPES )
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
IF( TSTERR ) IF( TSTERR ) THEN
$ CALL ZERRST( 'ZHB', NOUT ) #if defined(_OPENMP)
N_THREADS = OMP_GET_NUM_THREADS()
CALL OMP_SET_NUM_THREADS(1)
#endif
CALL ZERRST( 'ZHB', NOUT )
#if defined(_OPENMP)
CALL OMP_SET_NUM_THREADS(N_THREADS)
#endif
END IF
* CALL ZCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, * CALL ZCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH,
* $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ), * $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ),
* $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, * $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT,
@ -2435,6 +2474,13 @@
WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9994 )
S2 = DSECND( ) S2 = DSECND( )
WRITE( NOUT, FMT = 9993 )S2 - S1 WRITE( NOUT, FMT = 9993 )S2 - S1
*
DEALLOCATE (S, STAT = AllocateStatus)
DEALLOCATE (A, STAT = AllocateStatus)
DEALLOCATE (B, STAT = AllocateStatus)
DEALLOCATE (C, STAT = AllocateStatus)
DEALLOCATE (RWORK, STAT = AllocateStatus)
DEALLOCATE (WORK, STAT = AllocateStatus)
* *
9999 FORMAT( / ' Execution not attempted due to input errors' ) 9999 FORMAT( / ' Execution not attempted due to input errors' )
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 ) 9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )