commit
b1215f2f8c
|
@ -904,8 +904,8 @@ CCOMMON_OPT += -DF_INTERFACE_FLANG
|
||||||
FCOMMON_OPT += -Mrecursive -Kieee
|
FCOMMON_OPT += -Mrecursive -Kieee
|
||||||
ifeq ($(OSNAME), Linux)
|
ifeq ($(OSNAME), Linux)
|
||||||
ifeq ($(ARCH), x86_64)
|
ifeq ($(ARCH), x86_64)
|
||||||
FLANG_VENDOR := $(shell `$(FC) --version|cut -f 1 -d "."|head -1`)
|
FLANG_VENDOR := $(shell $(FC) --version|head -1 |cut -f 1 -d " ")
|
||||||
ifeq ($(FLANG_VENDOR),AOCC)
|
ifeq ($(FLANG_VENDOR), AMD)
|
||||||
FCOMMON_OPT += -fno-unroll-loops
|
FCOMMON_OPT += -fno-unroll-loops
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -74,6 +74,9 @@ static void *huge_malloc(BLASLONG size){
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||||
struct timeval start, stop;
|
struct timeval start, stop;
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
mach_timebase_info_data_t info;
|
||||||
|
uint64_t start = 0, stop = 0;
|
||||||
#else
|
#else
|
||||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
||||||
#endif
|
#endif
|
||||||
|
@ -82,6 +85,9 @@ double getsec()
|
||||||
{
|
{
|
||||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||||
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
mach_timebase_info(&info);
|
||||||
|
return (double)(((stop - start) * info.numer)/info.denom) * 1.e-9;
|
||||||
#else
|
#else
|
||||||
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
|
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
|
||||||
#endif
|
#endif
|
||||||
|
@ -90,6 +96,8 @@ double getsec()
|
||||||
void begin() {
|
void begin() {
|
||||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
gettimeofday( &start, (struct timezone *)0);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
start = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
|
||||||
#else
|
#else
|
||||||
clock_gettime(CLOCK_REALTIME, &start);
|
clock_gettime(CLOCK_REALTIME, &start);
|
||||||
#endif
|
#endif
|
||||||
|
@ -98,6 +106,8 @@ void begin() {
|
||||||
void end() {
|
void end() {
|
||||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
gettimeofday( &stop, (struct timezone *)0);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
stop = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
|
||||||
#else
|
#else
|
||||||
clock_gettime(CLOCK_REALTIME, &stop);
|
clock_gettime(CLOCK_REALTIME, &stop);
|
||||||
#endif
|
#endif
|
||||||
|
|
2
f_check
2
f_check
|
@ -330,7 +330,7 @@ if ($link ne "") {
|
||||||
$flags =~ s/\@/\,/g;
|
$flags =~ s/\@/\,/g;
|
||||||
$linker_L .= "-Wl,". $flags . " " ;
|
$linker_L .= "-Wl,". $flags . " " ;
|
||||||
}
|
}
|
||||||
if ($flags =~ /-lgomp/ && $CC =~ /clang/) {
|
if ($flags =~ /-lgomp/ && $ENV{"CC"} =~ /clang/) {
|
||||||
$flags = "-lomp";
|
$flags = "-lomp";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -46,9 +46,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
|
||||||
#if defined(__VEC__) || defined(__ALTIVEC__)
|
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||||
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "dasum_microk_power8.c"
|
#include "dasum_microk_power8.c"
|
||||||
|
#elif defined(POWER10)
|
||||||
|
#include "dasum_microk_power10.c"
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -110,6 +112,21 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
if ( inc_x == 1 )
|
if ( inc_x == 1 )
|
||||||
{
|
{
|
||||||
|
|
||||||
|
#if defined(POWER10)
|
||||||
|
if ( n >= 16 )
|
||||||
|
{
|
||||||
|
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3;
|
||||||
|
for (i = 0; i < align; i++) {
|
||||||
|
sumf += ABS(x[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
n1 = (n-i) & -16;
|
||||||
|
if ( n1 > 0 )
|
||||||
|
{
|
||||||
|
sumf += dasum_kernel_16(n1, &x[i]);
|
||||||
|
i+=n1;
|
||||||
|
}
|
||||||
|
#else
|
||||||
n1 = n & -16;
|
n1 = n & -16;
|
||||||
if ( n1 > 0 )
|
if ( n1 > 0 )
|
||||||
{
|
{
|
||||||
|
@ -117,6 +134,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
sumf = dasum_kernel_16(n1, x);
|
sumf = dasum_kernel_16(n1, x);
|
||||||
i=n1;
|
i=n1;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
while(i < n)
|
while(i < n)
|
||||||
{
|
{
|
||||||
|
|
|
@ -0,0 +1,152 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2021, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#define HAVE_KERNEL_16 1
|
||||||
|
|
||||||
|
static double dasum_kernel_16 (long n, double *x)
|
||||||
|
{
|
||||||
|
double sum;
|
||||||
|
__vector double t0;
|
||||||
|
__vector double t1;
|
||||||
|
__vector double t2;
|
||||||
|
__vector double t3;
|
||||||
|
|
||||||
|
__asm__
|
||||||
|
(
|
||||||
|
"dcbt 0, %2 \n\t"
|
||||||
|
|
||||||
|
"xxlxor 32, 32, 32 \n\t"
|
||||||
|
"xxlxor 33, 33, 33 \n\t"
|
||||||
|
"xxlxor 34, 34, 34 \n\t"
|
||||||
|
"xxlxor 35, 35, 35 \n\t"
|
||||||
|
"xxlxor 36, 36, 36 \n\t"
|
||||||
|
"xxlxor 37, 37, 37 \n\t"
|
||||||
|
"xxlxor 38, 38, 38 \n\t"
|
||||||
|
"xxlxor 39, 39, 39 \n\t"
|
||||||
|
|
||||||
|
"lxvp 40, 0(%2) \n\t"
|
||||||
|
"lxvp 42, 32(%2) \n\t"
|
||||||
|
"lxvp 44, 64(%2) \n\t"
|
||||||
|
"lxvp 46, 96(%2) \n\t"
|
||||||
|
|
||||||
|
"addi %2, %2, 128 \n\t"
|
||||||
|
|
||||||
|
"addic. %1, %1, -16 \n\t"
|
||||||
|
"ble two%= \n\t"
|
||||||
|
|
||||||
|
".align 5 \n"
|
||||||
|
"one%=: \n\t"
|
||||||
|
|
||||||
|
"xvabsdp 48, 40 \n\t"
|
||||||
|
"xvabsdp 49, 41 \n\t"
|
||||||
|
"xvabsdp 50, 42 \n\t"
|
||||||
|
"xvabsdp 51, 43 \n\t"
|
||||||
|
"lxvp 40, 0(%2) \n\t"
|
||||||
|
|
||||||
|
|
||||||
|
"xvabsdp %x3, 44 \n\t"
|
||||||
|
"xvabsdp %x4, 45 \n\t"
|
||||||
|
"lxvp 42, 32(%2) \n\t"
|
||||||
|
|
||||||
|
|
||||||
|
"xvabsdp %x5, 46 \n\t"
|
||||||
|
"xvabsdp %x6, 47 \n\t"
|
||||||
|
"lxvp 44, 64(%2) \n\t"
|
||||||
|
|
||||||
|
|
||||||
|
"xvadddp 32, 32, 48 \n\t"
|
||||||
|
"xvadddp 33, 33, 49 \n\t"
|
||||||
|
|
||||||
|
"lxvp 46, 96(%2) \n\t"
|
||||||
|
|
||||||
|
"xvadddp 34, 34, 50 \n\t"
|
||||||
|
"xvadddp 35, 35, 51 \n\t"
|
||||||
|
"addi %2, %2, 128 \n\t"
|
||||||
|
"xvadddp 36, 36, %x3 \n\t"
|
||||||
|
"xvadddp 37, 37, %x4 \n\t"
|
||||||
|
"addic. %1, %1, -16 \n\t"
|
||||||
|
"xvadddp 38, 38, %x5 \n\t"
|
||||||
|
"xvadddp 39, 39, %x6 \n\t"
|
||||||
|
|
||||||
|
"bgt one%= \n"
|
||||||
|
|
||||||
|
"two%=: \n\t"
|
||||||
|
|
||||||
|
"xvabsdp 48, 40 \n\t"
|
||||||
|
"xvabsdp 49, 41 \n\t"
|
||||||
|
"xvabsdp 50, 42 \n\t"
|
||||||
|
"xvabsdp 51, 43 \n\t"
|
||||||
|
"xvabsdp %x3, 44 \n\t"
|
||||||
|
"xvabsdp %x4, 45 \n\t"
|
||||||
|
"xvabsdp %x5, 46 \n\t"
|
||||||
|
"xvabsdp %x6, 47 \n\t"
|
||||||
|
|
||||||
|
"xvadddp 32, 32, 48 \n\t"
|
||||||
|
"xvadddp 33, 33, 49 \n\t"
|
||||||
|
"xvadddp 34, 34, 50 \n\t"
|
||||||
|
"xvadddp 35, 35, 51 \n\t"
|
||||||
|
"xvadddp 36, 36, %x3 \n\t"
|
||||||
|
"xvadddp 37, 37, %x4 \n\t"
|
||||||
|
"xvadddp 38, 38, %x5 \n\t"
|
||||||
|
"xvadddp 39, 39, %x6 \n\t"
|
||||||
|
|
||||||
|
"xvadddp 32, 32, 33 \n\t"
|
||||||
|
"xvadddp 34, 34, 35 \n\t"
|
||||||
|
"xvadddp 36, 36, 37 \n\t"
|
||||||
|
"xvadddp 38, 38, 39 \n\t"
|
||||||
|
|
||||||
|
"xvadddp 32, 32, 34 \n\t"
|
||||||
|
"xvadddp 36, 36, 38 \n\t"
|
||||||
|
|
||||||
|
"xvadddp 32, 32, 36 \n\t"
|
||||||
|
|
||||||
|
XXSWAPD_S(33,32)
|
||||||
|
"xsadddp %x0, 32, 33 \n"
|
||||||
|
|
||||||
|
"#n=%1 x=%3=%2 sum=%0\n"
|
||||||
|
"#t0=%x3 t1=%x4 t2=%x5 t3=%x6"
|
||||||
|
:
|
||||||
|
"=d" (sum), // 0
|
||||||
|
"+r" (n), // 1
|
||||||
|
"+b" (x), // 2
|
||||||
|
"=wa" (t0), // 3
|
||||||
|
"=wa" (t1), // 4
|
||||||
|
"=wa" (t2), // 5
|
||||||
|
"=wa" (t3) // 6
|
||||||
|
:
|
||||||
|
"m" (*x)
|
||||||
|
:
|
||||||
|
"cr0",
|
||||||
|
"vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39",
|
||||||
|
"vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47",
|
||||||
|
"vs48","vs49","vs50","vs51"
|
||||||
|
);
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
typedef __vector unsigned char vec_t;
|
typedef __vector unsigned char vec_t;
|
||||||
typedef FLOAT v4sf_t __attribute__ ((vector_size (16)));
|
typedef FLOAT v4sf_t __attribute__ ((vector_size (16)));
|
||||||
typedef FLOAT v2sf_t __attribute__ ((vector_size (8)));
|
#if !__has_builtin(__builtin_vsx_assemble_pair)
|
||||||
|
#define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !__has_builtin(__builtin_vsx_disassemble_pair)
|
||||||
|
#define __builtin_vsx_disassemble_pair __builtin_mma_disassemble_pair
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef TRMMKERNEL
|
#ifdef TRMMKERNEL
|
||||||
#define SAVE_ACC(ACC, J) \
|
#define SAVE_ACC(ACC, J) \
|
||||||
|
@ -186,8 +192,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
||||||
vec_t *rowA = (vec_t *) & AO[0];
|
vec_t *rowA = (vec_t *) & AO[0];
|
||||||
vec_t *rb = (vec_t *) & BO[0];
|
vec_t *rb = (vec_t *) & BO[0];
|
||||||
__vector_pair rowB, rowB1;
|
__vector_pair rowB, rowB1;
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||||
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
|
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||||
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
|
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
|
||||||
__builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
|
__builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
|
||||||
|
@ -200,8 +206,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
||||||
{
|
{
|
||||||
rowA = (vec_t *) & AO[l << 3];
|
rowA = (vec_t *) & AO[l << 3];
|
||||||
rb = (vec_t *) & BO[l << 3];
|
rb = (vec_t *) & BO[l << 3];
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||||
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
|
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||||
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
|
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
|
||||||
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
|
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
|
||||||
|
@ -242,8 +248,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
||||||
vec_t *rowA = (vec_t *) & AO[0];
|
vec_t *rowA = (vec_t *) & AO[0];
|
||||||
__vector_pair rowB, rowB1;
|
__vector_pair rowB, rowB1;
|
||||||
vec_t *rb = (vec_t *) & BO[0];
|
vec_t *rb = (vec_t *) & BO[0];
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||||
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
|
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||||
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
|
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
|
||||||
__builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
|
__builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
|
||||||
|
@ -252,8 +258,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
||||||
{
|
{
|
||||||
rowA = (vec_t *) & AO[l << 2];
|
rowA = (vec_t *) & AO[l << 2];
|
||||||
rb = (vec_t *) & BO[l << 3];
|
rb = (vec_t *) & BO[l << 3];
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||||
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
|
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||||
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
|
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
|
||||||
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
|
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
|
||||||
|
@ -286,16 +292,16 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
||||||
vec_t *rowA = (vec_t *) & AO[0];
|
vec_t *rowA = (vec_t *) & AO[0];
|
||||||
__vector_pair rowB, rowB1;
|
__vector_pair rowB, rowB1;
|
||||||
vec_t *rb = (vec_t *) & BO[0];
|
vec_t *rb = (vec_t *) & BO[0];
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||||
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
|
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||||
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
|
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
|
||||||
for (l = 1; l < temp; l++)
|
for (l = 1; l < temp; l++)
|
||||||
{
|
{
|
||||||
rowA = (vec_t *) & AO[l << 1];
|
rowA = (vec_t *) & AO[l << 1];
|
||||||
rb = (vec_t *) & BO[l << 3];
|
rb = (vec_t *) & BO[l << 3];
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||||
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
|
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||||
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
|
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
|
||||||
}
|
}
|
||||||
|
@ -398,7 +404,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
||||||
vec_t *rowA = (vec_t *) & AO[0];
|
vec_t *rowA = (vec_t *) & AO[0];
|
||||||
__vector_pair rowB;
|
__vector_pair rowB;
|
||||||
vec_t *rb = (vec_t *) & BO[0];
|
vec_t *rb = (vec_t *) & BO[0];
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||||
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
|
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
|
||||||
__builtin_mma_xvf64ger (&acc2, rowB, rowA[2]);
|
__builtin_mma_xvf64ger (&acc2, rowB, rowA[2]);
|
||||||
|
@ -407,7 +413,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
||||||
{
|
{
|
||||||
rowA = (vec_t *) & AO[l << 3];
|
rowA = (vec_t *) & AO[l << 3];
|
||||||
rb = (vec_t *) & BO[l << 2];
|
rb = (vec_t *) & BO[l << 2];
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||||
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
|
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
|
||||||
|
@ -440,14 +446,14 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
||||||
vec_t *rowA = (vec_t *) & AO[0];
|
vec_t *rowA = (vec_t *) & AO[0];
|
||||||
__vector_pair rowB;
|
__vector_pair rowB;
|
||||||
vec_t *rb = (vec_t *) & BO[0];
|
vec_t *rb = (vec_t *) & BO[0];
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||||
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
|
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
|
||||||
for (l = 1; l < temp; l++)
|
for (l = 1; l < temp; l++)
|
||||||
{
|
{
|
||||||
rowA = (vec_t *) & AO[l << 2];
|
rowA = (vec_t *) & AO[l << 2];
|
||||||
rb = (vec_t *) & BO[l << 2];
|
rb = (vec_t *) & BO[l << 2];
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||||
}
|
}
|
||||||
|
@ -476,13 +482,13 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
||||||
vec_t *rowA = (vec_t *) & AO[0];
|
vec_t *rowA = (vec_t *) & AO[0];
|
||||||
__vector_pair rowB;
|
__vector_pair rowB;
|
||||||
vec_t *rb = (vec_t *) & BO[0];
|
vec_t *rb = (vec_t *) & BO[0];
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||||
for (l = 1; l < temp; l++)
|
for (l = 1; l < temp; l++)
|
||||||
{
|
{
|
||||||
rowA = (vec_t *) & AO[l << 1];
|
rowA = (vec_t *) & AO[l << 1];
|
||||||
rb = (vec_t *) & BO[l << 2];
|
rb = (vec_t *) & BO[l << 2];
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||||
}
|
}
|
||||||
SAVE_ACC (&acc0, 0);
|
SAVE_ACC (&acc0, 0);
|
||||||
|
@ -562,11 +568,9 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
||||||
v4sf_t result[4];
|
v4sf_t result[4];
|
||||||
__vector_quad acc0, acc1, acc2, acc3;
|
__vector_quad acc0, acc1, acc2, acc3;
|
||||||
BLASLONG l = 0;
|
BLASLONG l = 0;
|
||||||
FLOAT t[4] = { 0, 0, 0, 0 };
|
|
||||||
t[0] = BO[0], t[1] = BO[1];
|
|
||||||
__vector_pair rowB;
|
__vector_pair rowB;
|
||||||
vec_t *rb = (vec_t *) & t[0];
|
vec_t *rb = (vec_t *) & BO[0];
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
|
||||||
vec_t *rowA = (vec_t *) & AO[0];
|
vec_t *rowA = (vec_t *) & AO[0];
|
||||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||||
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
|
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
|
||||||
|
@ -574,9 +578,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
||||||
__builtin_mma_xvf64ger (&acc3, rowB, rowA[3]);
|
__builtin_mma_xvf64ger (&acc3, rowB, rowA[3]);
|
||||||
for (l = 1; l < temp; l++)
|
for (l = 1; l < temp; l++)
|
||||||
{
|
{
|
||||||
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1];
|
rb = (vec_t *) & BO[l << 1];
|
||||||
rb = (vec_t *) & t[0];
|
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
|
||||||
rowA = (vec_t *) & AO[l << 3];
|
rowA = (vec_t *) & AO[l << 3];
|
||||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||||
|
@ -607,19 +610,16 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
||||||
v4sf_t result[4];
|
v4sf_t result[4];
|
||||||
__vector_quad acc0, acc1;
|
__vector_quad acc0, acc1;
|
||||||
BLASLONG l = 0;
|
BLASLONG l = 0;
|
||||||
FLOAT t[4] = { 0, 0, 0, 0 };
|
|
||||||
t[0] = BO[0], t[1] = BO[1];
|
|
||||||
__vector_pair rowB;
|
__vector_pair rowB;
|
||||||
vec_t *rb = (vec_t *) & t[0];
|
vec_t *rb = (vec_t *) & BO[0];
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
|
||||||
vec_t *rowA = (vec_t *) & AO[0];
|
vec_t *rowA = (vec_t *) & AO[0];
|
||||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||||
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
|
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
|
||||||
for (l = 1; l < temp; l++)
|
for (l = 1; l < temp; l++)
|
||||||
{
|
{
|
||||||
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1];
|
rb = (vec_t *) & BO[l << 1];
|
||||||
rb = (vec_t *) & t[0];
|
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
|
||||||
rowA = (vec_t *) & AO[l << 2];
|
rowA = (vec_t *) & AO[l << 2];
|
||||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||||
|
@ -646,18 +646,15 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
||||||
v4sf_t result[4];
|
v4sf_t result[4];
|
||||||
__vector_quad acc0;
|
__vector_quad acc0;
|
||||||
BLASLONG l = 0;
|
BLASLONG l = 0;
|
||||||
FLOAT t[4] = { 0, 0, 0, 0 };
|
|
||||||
t[0] = BO[0], t[1] = BO[1];
|
|
||||||
__vector_pair rowB;
|
__vector_pair rowB;
|
||||||
vec_t *rb = (vec_t *) & t[0];
|
vec_t *rb = (vec_t *) & BO[0];
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
|
||||||
vec_t *rowA = (vec_t *) & AO[0];
|
vec_t *rowA = (vec_t *) & AO[0];
|
||||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||||
for (l = 1; l < temp; l++)
|
for (l = 1; l < temp; l++)
|
||||||
{
|
{
|
||||||
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1];
|
rb = (vec_t *) & BO[l << 1];
|
||||||
rb = (vec_t *) & t[0];
|
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
|
||||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
|
||||||
rowA = (vec_t *) & AO[l << 1];
|
rowA = (vec_t *) & AO[l << 1];
|
||||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,9 +46,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
|
||||||
#if defined(__VEC__) || defined(__ALTIVEC__)
|
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||||
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "sasum_microk_power8.c"
|
#include "sasum_microk_power8.c"
|
||||||
|
#elif defined(POWER10)
|
||||||
|
#include "sasum_microk_power10.c"
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -110,6 +112,21 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
if ( inc_x == 1 )
|
if ( inc_x == 1 )
|
||||||
{
|
{
|
||||||
|
|
||||||
|
#if defined(POWER10)
|
||||||
|
if ( n >= 32 )
|
||||||
|
{
|
||||||
|
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7;
|
||||||
|
for (i = 0; i < align; i++) {
|
||||||
|
sumf += ABS(x[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
n1 = (n-i) & -32;
|
||||||
|
if ( n1 > 0 )
|
||||||
|
{
|
||||||
|
sumf += sasum_kernel_32(n1, &x[i]);
|
||||||
|
i+=n1;
|
||||||
|
}
|
||||||
|
#else
|
||||||
n1 = n & -32;
|
n1 = n & -32;
|
||||||
if ( n1 > 0 )
|
if ( n1 > 0 )
|
||||||
{
|
{
|
||||||
|
@ -117,6 +134,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
sumf = sasum_kernel_32(n1, x);
|
sumf = sasum_kernel_32(n1, x);
|
||||||
i=n1;
|
i=n1;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
while(i < n)
|
while(i < n)
|
||||||
{
|
{
|
||||||
|
|
|
@ -0,0 +1,153 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2021, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#define HAVE_KERNEL_32 1
|
||||||
|
|
||||||
|
static float sasum_kernel_32 (long n, float *x)
|
||||||
|
{
|
||||||
|
float sum;
|
||||||
|
__vector float t0;
|
||||||
|
__vector float t1;
|
||||||
|
__vector float t2;
|
||||||
|
__vector float t3;
|
||||||
|
|
||||||
|
__asm__
|
||||||
|
(
|
||||||
|
"dcbt 0, %2 \n\t"
|
||||||
|
|
||||||
|
"xxlxor 32, 32, 32 \n\t"
|
||||||
|
"xxlxor 33, 33, 33 \n\t"
|
||||||
|
"xxlxor 34, 34, 34 \n\t"
|
||||||
|
"xxlxor 35, 35, 35 \n\t"
|
||||||
|
"xxlxor 36, 36, 36 \n\t"
|
||||||
|
"xxlxor 37, 37, 37 \n\t"
|
||||||
|
"xxlxor 38, 38, 38 \n\t"
|
||||||
|
"xxlxor 39, 39, 39 \n\t"
|
||||||
|
|
||||||
|
"lxvp 40, 0(%2) \n\t"
|
||||||
|
"lxvp 42, 32(%2) \n\t"
|
||||||
|
"lxvp 44, 64(%2) \n\t"
|
||||||
|
"lxvp 46, 96(%2) \n\t"
|
||||||
|
|
||||||
|
"addi %2, %2, 128 \n\t"
|
||||||
|
|
||||||
|
"addic. %1, %1, -32 \n\t"
|
||||||
|
"ble two%= \n\t"
|
||||||
|
|
||||||
|
".align 5 \n"
|
||||||
|
"one%=: \n\t"
|
||||||
|
|
||||||
|
"xvabssp 48, 40 \n\t"
|
||||||
|
"xvabssp 49, 41 \n\t"
|
||||||
|
"xvabssp 50, 42 \n\t"
|
||||||
|
"xvabssp 51, 43 \n\t"
|
||||||
|
"lxvp 40, 0(%2) \n\t"
|
||||||
|
|
||||||
|
"xvabssp %x3, 44 \n\t"
|
||||||
|
"xvabssp %x4, 45 \n\t"
|
||||||
|
"lxvp 42, 32(%2) \n\t"
|
||||||
|
|
||||||
|
"xvabssp %x5, 46 \n\t"
|
||||||
|
"xvabssp %x6, 47 \n\t"
|
||||||
|
"lxvp 44, 64(%2) \n\t"
|
||||||
|
|
||||||
|
"xvaddsp 32, 32, 48 \n\t"
|
||||||
|
"xvaddsp 33, 33, 49 \n\t"
|
||||||
|
|
||||||
|
"lxvp 46, 96(%2) \n\t"
|
||||||
|
|
||||||
|
"xvaddsp 34, 34, 50 \n\t"
|
||||||
|
"xvaddsp 35, 35, 51 \n\t"
|
||||||
|
"addi %2, %2, 128 \n\t"
|
||||||
|
"xvaddsp 36, 36, %x3 \n\t"
|
||||||
|
"xvaddsp 37, 37, %x4 \n\t"
|
||||||
|
"addic. %1, %1, -32 \n\t"
|
||||||
|
"xvaddsp 38, 38, %x5 \n\t"
|
||||||
|
"xvaddsp 39, 39, %x6 \n\t"
|
||||||
|
|
||||||
|
"bgt one%= \n"
|
||||||
|
|
||||||
|
"two%=: \n\t"
|
||||||
|
|
||||||
|
"xvabssp 48, 40 \n\t"
|
||||||
|
"xvabssp 49, 41 \n\t"
|
||||||
|
"xvabssp 50, 42 \n\t"
|
||||||
|
"xvabssp 51, 43 \n\t"
|
||||||
|
"xvabssp %x3, 44 \n\t"
|
||||||
|
"xvabssp %x4, 45 \n\t"
|
||||||
|
"xvabssp %x5, 46 \n\t"
|
||||||
|
"xvabssp %x6, 47 \n\t"
|
||||||
|
|
||||||
|
"xvaddsp 32, 32, 48 \n\t"
|
||||||
|
"xvaddsp 33, 33, 49 \n\t"
|
||||||
|
"xvaddsp 34, 34, 50 \n\t"
|
||||||
|
"xvaddsp 35, 35, 51 \n\t"
|
||||||
|
"xvaddsp 36, 36, %x3 \n\t"
|
||||||
|
"xvaddsp 37, 37, %x4 \n\t"
|
||||||
|
"xvaddsp 38, 38, %x5 \n\t"
|
||||||
|
"xvaddsp 39, 39, %x6 \n\t"
|
||||||
|
|
||||||
|
"xvaddsp 32, 32, 33 \n\t"
|
||||||
|
"xvaddsp 34, 34, 35 \n\t"
|
||||||
|
"xvaddsp 36, 36, 37 \n\t"
|
||||||
|
"xvaddsp 38, 38, 39 \n\t"
|
||||||
|
|
||||||
|
"xvaddsp 32, 32, 34 \n\t"
|
||||||
|
"xvaddsp 36, 36, 38 \n\t"
|
||||||
|
|
||||||
|
"xvaddsp 32, 32, 36 \n\t"
|
||||||
|
|
||||||
|
"xxsldwi 33, 32, 32, 2 \n\t"
|
||||||
|
"xvaddsp 32, 32, 33 \n\t"
|
||||||
|
|
||||||
|
"xxsldwi 33, 32, 32, 1 \n\t"
|
||||||
|
"xvaddsp 32, 32, 33 \n\t"
|
||||||
|
|
||||||
|
"xscvspdp %x0, 32 \n"
|
||||||
|
|
||||||
|
"#n=%1 x=%3=%2 sum=%0\n"
|
||||||
|
"#t0=%x3 t1=%x4 t2=%x5 t3=%x6"
|
||||||
|
:
|
||||||
|
"=f" (sum), // 0
|
||||||
|
"+r" (n), // 1
|
||||||
|
"+b" (x), // 2
|
||||||
|
"=wa" (t0), // 3
|
||||||
|
"=wa" (t1), // 4
|
||||||
|
"=wa" (t2), // 5
|
||||||
|
"=wa" (t3) // 6
|
||||||
|
:
|
||||||
|
"m" (*x)
|
||||||
|
:
|
||||||
|
"cr0",
|
||||||
|
"vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39",
|
||||||
|
"vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47",
|
||||||
|
"vs48","vs49","vs50","vs51"
|
||||||
|
);
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
|
@ -45,7 +45,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#elif defined(POWER10)
|
#elif defined(POWER10)
|
||||||
#if defined(DOUBLE)
|
#if defined(DOUBLE)
|
||||||
#include "zscal_microk_power8.c"
|
#include "zscal_microk_power10.c"
|
||||||
#else
|
#else
|
||||||
#include "cscal_microk_power10.c"
|
#include "cscal_microk_power10.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -0,0 +1,195 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2021, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#define HAVE_KERNEL_8 1
|
||||||
|
|
||||||
|
static void zscal_kernel_8 (long n, double *x, double alpha_r, double alpha_i)
|
||||||
|
{
|
||||||
|
__vector double t0;
|
||||||
|
__vector double t1;
|
||||||
|
__vector double t2;
|
||||||
|
__vector double t3;
|
||||||
|
__vector double t4;
|
||||||
|
__vector double t5;
|
||||||
|
|
||||||
|
__asm__
|
||||||
|
(
|
||||||
|
"dcbt 0, %2 \n\t"
|
||||||
|
|
||||||
|
"xsnegdp 33, %x10 \n\t" // -alpha_i
|
||||||
|
XXSPLTD_S(32,%x9,0) // alpha_r , alpha_r
|
||||||
|
XXMRGHD_S(33,%x10, 33) // -alpha_i , alpha_i
|
||||||
|
|
||||||
|
"lxvp 40, 0(%2) \n\t"
|
||||||
|
"lxvp 42, 32(%2) \n\t"
|
||||||
|
"lxvp 44, 64(%2) \n\t"
|
||||||
|
"lxvp 46, 96(%2) \n\t"
|
||||||
|
|
||||||
|
"addic. %1, %1, -8 \n\t"
|
||||||
|
"ble two%= \n\t"
|
||||||
|
|
||||||
|
".align 5 \n"
|
||||||
|
"one%=: \n\t"
|
||||||
|
|
||||||
|
"xvmuldp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r
|
||||||
|
"xvmuldp 49, 41, 32 \n\t"
|
||||||
|
"xvmuldp 50, 42, 32 \n\t"
|
||||||
|
"xvmuldp 51, 43, 32 \n\t"
|
||||||
|
"xvmuldp 34, 44, 32 \n\t"
|
||||||
|
"xvmuldp 35, 45, 32 \n\t"
|
||||||
|
"xvmuldp 36, 46, 32 \n\t"
|
||||||
|
"xvmuldp 37, 47, 32 \n\t"
|
||||||
|
|
||||||
|
XXSWAPD_S(38,40)
|
||||||
|
XXSWAPD_S(39,41)
|
||||||
|
XXSWAPD_S(%x3,42)
|
||||||
|
XXSWAPD_S(%x4,43)
|
||||||
|
XXSWAPD_S(%x5,44)
|
||||||
|
XXSWAPD_S(%x6,45)
|
||||||
|
XXSWAPD_S(%x7,46)
|
||||||
|
XXSWAPD_S(%x8,47)
|
||||||
|
|
||||||
|
"xvmuldp 38, 38, 33 \n\t" // x0_i * -alpha_i, x0_r * alpha_i
|
||||||
|
"xvmuldp 39, 39, 33 \n\t"
|
||||||
|
|
||||||
|
|
||||||
|
"xvmuldp %x3, %x3, 33 \n\t"
|
||||||
|
"xvmuldp %x4, %x4, 33 \n\t"
|
||||||
|
|
||||||
|
|
||||||
|
"lxvp 40, 128(%2) \n\t"
|
||||||
|
"lxvp 42, 160(%2) \n\t"
|
||||||
|
"xvmuldp %x5, %x5, 33 \n\t"
|
||||||
|
"xvmuldp %x6, %x6, 33 \n\t"
|
||||||
|
|
||||||
|
|
||||||
|
"xvmuldp %x7, %x7, 33 \n\t"
|
||||||
|
"xvmuldp %x8, %x8, 33 \n\t"
|
||||||
|
"lxvp 44, 192(%2) \n\t"
|
||||||
|
"lxvp 46, 224(%2) \n\t"
|
||||||
|
|
||||||
|
|
||||||
|
"xvadddp 48, 48, 38 \n\t"
|
||||||
|
"xvadddp 49, 49, 39 \n\t"
|
||||||
|
"xvadddp 50, 50, %x3 \n\t"
|
||||||
|
"xvadddp 51, 51, %x4 \n\t"
|
||||||
|
"stxv 49, 0(%2) \n\t"
|
||||||
|
"stxv 48, 16(%2) \n\t"
|
||||||
|
"stxv 51, 32(%2) \n\t"
|
||||||
|
"stxv 50, 48(%2) \n\t"
|
||||||
|
|
||||||
|
|
||||||
|
"xvadddp 34, 34, %x5 \n\t"
|
||||||
|
"xvadddp 35, 35, %x6 \n\t"
|
||||||
|
|
||||||
|
|
||||||
|
"xvadddp 36, 36, %x7 \n\t"
|
||||||
|
"xvadddp 37, 37, %x8 \n\t"
|
||||||
|
|
||||||
|
"stxv 35, 64(%2) \n\t"
|
||||||
|
"stxv 34, 80(%2) \n\t"
|
||||||
|
"stxv 37, 96(%2) \n\t"
|
||||||
|
"stxv 36, 112(%2) \n\t"
|
||||||
|
|
||||||
|
"addi %2, %2, 128 \n\t"
|
||||||
|
|
||||||
|
"addic. %1, %1, -8 \n\t"
|
||||||
|
"bgt one%= \n"
|
||||||
|
|
||||||
|
"two%=: \n\t"
|
||||||
|
|
||||||
|
"xvmuldp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r
|
||||||
|
"xvmuldp 49, 41, 32 \n\t"
|
||||||
|
"xvmuldp 50, 42, 32 \n\t"
|
||||||
|
"xvmuldp 51, 43, 32 \n\t"
|
||||||
|
"xvmuldp 34, 44, 32 \n\t"
|
||||||
|
"xvmuldp 35, 45, 32 \n\t"
|
||||||
|
"xvmuldp 36, 46, 32 \n\t"
|
||||||
|
"xvmuldp 37, 47, 32 \n\t"
|
||||||
|
|
||||||
|
XXSWAPD_S(38,40)
|
||||||
|
XXSWAPD_S(39,41)
|
||||||
|
XXSWAPD_S(%x3,42)
|
||||||
|
XXSWAPD_S(%x4,43)
|
||||||
|
XXSWAPD_S(%x5,44)
|
||||||
|
XXSWAPD_S(%x6,45)
|
||||||
|
XXSWAPD_S(%x7,46)
|
||||||
|
XXSWAPD_S(%x8,47)
|
||||||
|
|
||||||
|
|
||||||
|
"xvmuldp 38, 38, 33 \n\t" // x0_i * -alpha_i, x0_r * alpha_i
|
||||||
|
"xvmuldp 39, 39, 33 \n\t"
|
||||||
|
"xvmuldp %x3, %x3, 33 \n\t"
|
||||||
|
"xvmuldp %x4, %x4, 33 \n\t"
|
||||||
|
"xvmuldp %x5, %x5, 33 \n\t"
|
||||||
|
"xvmuldp %x6, %x6, 33 \n\t"
|
||||||
|
"xvmuldp %x7, %x7, 33 \n\t"
|
||||||
|
"xvmuldp %x8, %x8, 33 \n\t"
|
||||||
|
|
||||||
|
"xvadddp 48, 48, 38 \n\t"
|
||||||
|
"xvadddp 49, 49, 39 \n\t"
|
||||||
|
|
||||||
|
"xvadddp 50, 50, %x3 \n\t"
|
||||||
|
"xvadddp 51, 51, %x4 \n\t"
|
||||||
|
"stxv 49, 0(%2) \n\t"
|
||||||
|
"stxv 48, 16(%2) \n\t"
|
||||||
|
"stxv 51, 32(%2) \n\t"
|
||||||
|
"stxv 50, 48(%2) \n\t"
|
||||||
|
|
||||||
|
"xvadddp 34, 34, %x5 \n\t"
|
||||||
|
"xvadddp 35, 35, %x6 \n\t"
|
||||||
|
|
||||||
|
|
||||||
|
"xvadddp 36, 36, %x7 \n\t"
|
||||||
|
"xvadddp 37, 37, %x8 \n\t"
|
||||||
|
|
||||||
|
"stxv 35, 64(%2) \n\t"
|
||||||
|
"stxv 34, 80(%2) \n\t"
|
||||||
|
"stxv 37, 96(%2) \n\t"
|
||||||
|
"stxv 36, 112(%2) \n\t"
|
||||||
|
|
||||||
|
"#n=%1 x=%0=%2 alpha=(%9,%10) \n"
|
||||||
|
:
|
||||||
|
"+m" (*x),
|
||||||
|
"+r" (n), // 1
|
||||||
|
"+b" (x), // 2
|
||||||
|
"=wa" (t0), // 3
|
||||||
|
"=wa" (t1), // 4
|
||||||
|
"=wa" (t2), // 5
|
||||||
|
"=wa" (t3), // 6
|
||||||
|
"=wa" (t4), // 7
|
||||||
|
"=wa" (t5) // 8
|
||||||
|
:
|
||||||
|
"d" (alpha_r), // 9
|
||||||
|
"d" (alpha_i) // 10
|
||||||
|
:
|
||||||
|
"cr0",
|
||||||
|
"vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39",
|
||||||
|
"vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47",
|
||||||
|
"vs48","vs49","vs50","vs51"
|
||||||
|
);
|
||||||
|
}
|
|
@ -1,5 +1,4 @@
|
||||||
/* need a new enough GCC for avx512 support */
|
#if defined(HAVE_FMA3) && defined(HAVE_AVX2)
|
||||||
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
|
|
||||||
|
|
||||||
#define HAVE_SROT_KERNEL 1
|
#define HAVE_SROT_KERNEL 1
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,7 @@ set(AEIGTST
|
||||||
set(SCIGTST slafts.f slahd2.f slasum.f slatb9.f sstech.f sstect.f
|
set(SCIGTST slafts.f slahd2.f slasum.f slatb9.f sstech.f sstect.f
|
||||||
ssvdch.f ssvdct.f ssxt1.f)
|
ssvdch.f ssvdct.f ssxt1.f)
|
||||||
|
|
||||||
set(SEIGTST schkee.f
|
set(SEIGTST schkee.F
|
||||||
sbdt01.f sbdt02.f sbdt03.f sbdt04.f sbdt05.f
|
sbdt01.f sbdt02.f sbdt03.f sbdt04.f sbdt05.f
|
||||||
schkbb.f schkbd.f schkbk.f schkbl.f schkec.f
|
schkbb.f schkbd.f schkbk.f schkbl.f schkec.f
|
||||||
schkgg.f schkgk.f schkgl.f schkhs.f schksb.f schkst.f schkst2stg.f schksb2stg.f
|
schkgg.f schkgk.f schkgl.f schkhs.f schksb.f schkst.f schkst2stg.f schksb2stg.f
|
||||||
|
@ -42,7 +42,7 @@ set(SEIGTST schkee.f
|
||||||
sort03.f ssbt21.f ssgt01.f sslect.f sspt21.f sstt21.f
|
sort03.f ssbt21.f ssgt01.f sslect.f sspt21.f sstt21.f
|
||||||
sstt22.f ssyt21.f ssyt22.f)
|
sstt22.f ssyt21.f ssyt22.f)
|
||||||
|
|
||||||
set(CEIGTST cchkee.f
|
set(CEIGTST cchkee.F
|
||||||
cbdt01.f cbdt02.f cbdt03.f cbdt05.f
|
cbdt01.f cbdt02.f cbdt03.f cbdt05.f
|
||||||
cchkbb.f cchkbd.f cchkbk.f cchkbl.f cchkec.f
|
cchkbb.f cchkbd.f cchkbk.f cchkbl.f cchkec.f
|
||||||
cchkgg.f cchkgk.f cchkgl.f cchkhb.f cchkhs.f cchkst.f cchkst2stg.f cchkhb2stg.f
|
cchkgg.f cchkgk.f cchkgl.f cchkhb.f cchkhs.f cchkst.f cchkst2stg.f cchkhb2stg.f
|
||||||
|
@ -62,7 +62,7 @@ set(CEIGTST cchkee.f
|
||||||
set(DZIGTST dlafts.f dlahd2.f dlasum.f dlatb9.f dstech.f dstect.f
|
set(DZIGTST dlafts.f dlahd2.f dlasum.f dlatb9.f dstech.f dstect.f
|
||||||
dsvdch.f dsvdct.f dsxt1.f)
|
dsvdch.f dsvdct.f dsxt1.f)
|
||||||
|
|
||||||
set(DEIGTST dchkee.f
|
set(DEIGTST dchkee.F
|
||||||
dbdt01.f dbdt02.f dbdt03.f dbdt04.f dbdt05.f
|
dbdt01.f dbdt02.f dbdt03.f dbdt04.f dbdt05.f
|
||||||
dchkbb.f dchkbd.f dchkbk.f dchkbl.f dchkec.f
|
dchkbb.f dchkbd.f dchkbk.f dchkbl.f dchkec.f
|
||||||
dchkgg.f dchkgk.f dchkgl.f dchkhs.f dchksb.f dchkst.f dchkst2stg.f dchksb2stg.f
|
dchkgg.f dchkgk.f dchkgl.f dchkhs.f dchksb.f dchkst.f dchkst2stg.f dchksb2stg.f
|
||||||
|
@ -79,7 +79,7 @@ set(DEIGTST dchkee.f
|
||||||
dort03.f dsbt21.f dsgt01.f dslect.f dspt21.f dstt21.f
|
dort03.f dsbt21.f dsgt01.f dslect.f dspt21.f dstt21.f
|
||||||
dstt22.f dsyt21.f dsyt22.f)
|
dstt22.f dsyt21.f dsyt22.f)
|
||||||
|
|
||||||
set(ZEIGTST zchkee.f
|
set(ZEIGTST zchkee.F
|
||||||
zbdt01.f zbdt02.f zbdt03.f zbdt05.f
|
zbdt01.f zbdt02.f zbdt03.f zbdt05.f
|
||||||
zchkbb.f zchkbd.f zchkbk.f zchkbl.f zchkec.f
|
zchkbb.f zchkbd.f zchkbk.f zchkbl.f zchkec.f
|
||||||
zchkgg.f zchkgk.f zchkgl.f zchkhb.f zchkhs.f zchkst.f zchkst2stg.f zchkhb2stg.f
|
zchkgg.f zchkgk.f zchkgl.f zchkhb.f zchkhs.f zchkst.f zchkst2stg.f zchkhb2stg.f
|
||||||
|
|
|
@ -157,11 +157,11 @@ cleanobj:
|
||||||
cleanexe:
|
cleanexe:
|
||||||
rm -f xeigtst*
|
rm -f xeigtst*
|
||||||
|
|
||||||
schkee.o: schkee.f
|
schkee.o: schkee.F
|
||||||
$(FC) $(FFLAGS_DRV) -c -o $@ $<
|
$(FC) $(FFLAGS_DRV) -c -o $@ $<
|
||||||
dchkee.o: dchkee.f
|
dchkee.o: dchkee.F
|
||||||
$(FC) $(FFLAGS_DRV) -c -o $@ $<
|
$(FC) $(FFLAGS_DRV) -c -o $@ $<
|
||||||
cchkee.o: cchkee.f
|
cchkee.o: cchkee.F
|
||||||
$(FC) $(FFLAGS_DRV) -c -o $@ $<
|
$(FC) $(FFLAGS_DRV) -c -o $@ $<
|
||||||
zchkee.o: zchkee.f
|
zchkee.o: zchkee.F
|
||||||
$(FC) $(FFLAGS_DRV) -c -o $@ $<
|
$(FC) $(FFLAGS_DRV) -c -o $@ $<
|
||||||
|
|
|
@ -1034,6 +1034,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
PROGRAM CCHKEE
|
PROGRAM CCHKEE
|
||||||
*
|
*
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
use omp_lib
|
||||||
|
#endif
|
||||||
|
*
|
||||||
* -- LAPACK test routine (version 3.7.0) --
|
* -- LAPACK test routine (version 3.7.0) --
|
||||||
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
|
@ -1071,7 +1075,7 @@
|
||||||
CHARACTER*80 LINE
|
CHARACTER*80 LINE
|
||||||
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
|
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
|
||||||
$ NK, NN, NPARMS, NRHS, NTYPES,
|
$ NK, NN, NPARMS, NRHS, NTYPES,
|
||||||
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH
|
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS
|
||||||
REAL EPS, S1, S2, THRESH, THRSHN
|
REAL EPS, S1, S2, THRESH, THRSHN
|
||||||
* ..
|
* ..
|
||||||
* .. Local Arrays ..
|
* .. Local Arrays ..
|
||||||
|
@ -1084,12 +1088,16 @@
|
||||||
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
|
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
|
||||||
$ ISHFTS( MAXIN ), IACC22( MAXIN )
|
$ ISHFTS( MAXIN ), IACC22( MAXIN )
|
||||||
REAL ALPHA( NMAX ), BETA( NMAX ), DR( NMAX, 12 ),
|
REAL ALPHA( NMAX ), BETA( NMAX ), DR( NMAX, 12 ),
|
||||||
$ RESULT( 500 ), RWORK( LWORK ), S( NMAX*NMAX )
|
$ RESULT( 500 )
|
||||||
COMPLEX A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ),
|
COMPLEX DC( NMAX, 6 ), TAUA( NMAX ), TAUB( NMAX ),
|
||||||
$ C( NCMAX*NCMAX, NCMAX*NCMAX ), DC( NMAX, 6 ),
|
|
||||||
$ TAUA( NMAX ), TAUB( NMAX ), WORK( LWORK ),
|
|
||||||
$ X( 5*NMAX )
|
$ X( 5*NMAX )
|
||||||
* ..
|
* ..
|
||||||
|
* .. Allocatable Arrays ..
|
||||||
|
INTEGER AllocateStatus
|
||||||
|
REAL, DIMENSION(:), ALLOCATABLE :: RWORK, S
|
||||||
|
COMPLEX, DIMENSION(:), ALLOCATABLE :: WORK
|
||||||
|
COMPLEX, DIMENSION(:,:), ALLOCATABLE :: A, B, C
|
||||||
|
* ..
|
||||||
* .. External Functions ..
|
* .. External Functions ..
|
||||||
LOGICAL LSAMEN
|
LOGICAL LSAMEN
|
||||||
REAL SECOND, SLAMCH
|
REAL SECOND, SLAMCH
|
||||||
|
@ -1130,6 +1138,21 @@
|
||||||
DATA INTSTR / '0123456789' /
|
DATA INTSTR / '0123456789' /
|
||||||
DATA IOLDSD / 0, 0, 0, 1 /
|
DATA IOLDSD / 0, 0, 0, 1 /
|
||||||
* ..
|
* ..
|
||||||
|
* .. Allocate memory dynamically ..
|
||||||
|
*
|
||||||
|
ALLOCATE ( S(NMAX*NMAX), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( RWORK(LWORK), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( WORK(LWORK), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
* ..
|
||||||
* .. Executable Statements ..
|
* .. Executable Statements ..
|
||||||
*
|
*
|
||||||
A = 0.0
|
A = 0.0
|
||||||
|
@ -1846,8 +1869,16 @@
|
||||||
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
||||||
CALL XLAENV( 1, 1 )
|
CALL XLAENV( 1, 1 )
|
||||||
CALL XLAENV( 9, 25 )
|
CALL XLAENV( 9, 25 )
|
||||||
IF( TSTERR )
|
IF( TSTERR ) THEN
|
||||||
$ CALL CERRST( 'CST', NOUT )
|
#if defined(_OPENMP)
|
||||||
|
N_THREADS = OMP_GET_NUM_THREADS()
|
||||||
|
CALL OMP_SET_NUM_THREADS(1)
|
||||||
|
#endif
|
||||||
|
CALL CERRST( 'CST', NOUT )
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
CALL OMP_SET_NUM_THREADS(N_THREADS)
|
||||||
|
#endif
|
||||||
|
END IF
|
||||||
DO 290 I = 1, NPARMS
|
DO 290 I = 1, NPARMS
|
||||||
CALL XLAENV( 1, NBVAL( I ) )
|
CALL XLAENV( 1, NBVAL( I ) )
|
||||||
CALL XLAENV( 2, NBMIN( I ) )
|
CALL XLAENV( 2, NBMIN( I ) )
|
||||||
|
@ -2305,8 +2336,16 @@
|
||||||
MAXTYP = 15
|
MAXTYP = 15
|
||||||
NTYPES = MIN( MAXTYP, NTYPES )
|
NTYPES = MIN( MAXTYP, NTYPES )
|
||||||
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
||||||
IF( TSTERR )
|
IF( TSTERR ) THEN
|
||||||
$ CALL CERRST( 'CHB', NOUT )
|
#if defined(_OPENMP)
|
||||||
|
N_THREADS = OMP_GET_NUM_THREADS()
|
||||||
|
CALL OMP_SET_NUM_THREADS(1)
|
||||||
|
#endif
|
||||||
|
CALL CERRST( 'CHB', NOUT )
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
CALL OMP_SET_NUM_THREADS(N_THREADS)
|
||||||
|
#endif
|
||||||
|
END IF
|
||||||
* CALL CCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH,
|
* CALL CCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH,
|
||||||
* $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ),
|
* $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ),
|
||||||
* $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT,
|
* $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT,
|
||||||
|
@ -2437,6 +2476,13 @@
|
||||||
WRITE( NOUT, FMT = 9994 )
|
WRITE( NOUT, FMT = 9994 )
|
||||||
S2 = SECOND( )
|
S2 = SECOND( )
|
||||||
WRITE( NOUT, FMT = 9993 )S2 - S1
|
WRITE( NOUT, FMT = 9993 )S2 - S1
|
||||||
|
*
|
||||||
|
DEALLOCATE (S, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (A, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (B, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (C, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (RWORK, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (WORK, STAT = AllocateStatus)
|
||||||
*
|
*
|
||||||
9999 FORMAT( / ' Execution not attempted due to input errors' )
|
9999 FORMAT( / ' Execution not attempted due to input errors' )
|
||||||
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )
|
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )
|
|
@ -1040,6 +1040,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
PROGRAM DCHKEE
|
PROGRAM DCHKEE
|
||||||
*
|
*
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
use omp_lib
|
||||||
|
#endif
|
||||||
|
*
|
||||||
* -- LAPACK test routine (version 3.7.0) --
|
* -- LAPACK test routine (version 3.7.0) --
|
||||||
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
|
@ -1077,7 +1081,7 @@
|
||||||
CHARACTER*80 LINE
|
CHARACTER*80 LINE
|
||||||
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
|
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
|
||||||
$ NK, NN, NPARMS, NRHS, NTYPES,
|
$ NK, NN, NPARMS, NRHS, NTYPES,
|
||||||
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH
|
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS
|
||||||
DOUBLE PRECISION EPS, S1, S2, THRESH, THRSHN
|
DOUBLE PRECISION EPS, S1, S2, THRESH, THRSHN
|
||||||
* ..
|
* ..
|
||||||
* .. Local Arrays ..
|
* .. Local Arrays ..
|
||||||
|
@ -1089,10 +1093,13 @@
|
||||||
$ PVAL( MAXIN )
|
$ PVAL( MAXIN )
|
||||||
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
|
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
|
||||||
$ ISHFTS( MAXIN ), IACC22( MAXIN )
|
$ ISHFTS( MAXIN ), IACC22( MAXIN )
|
||||||
DOUBLE PRECISION A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ),
|
DOUBLE PRECISION D( NMAX, 12 ), RESULT( 500 ), TAUA( NMAX ),
|
||||||
$ C( NCMAX*NCMAX, NCMAX*NCMAX ), D( NMAX, 12 ),
|
$ TAUB( NMAX ), X( 5*NMAX )
|
||||||
$ RESULT( 500 ), TAUA( NMAX ), TAUB( NMAX ),
|
* ..
|
||||||
$ WORK( LWORK ), X( 5*NMAX )
|
* .. Allocatable Arrays ..
|
||||||
|
INTEGER AllocateStatus
|
||||||
|
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: WORK
|
||||||
|
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: A, B, C
|
||||||
* ..
|
* ..
|
||||||
* .. External Functions ..
|
* .. External Functions ..
|
||||||
LOGICAL LSAMEN
|
LOGICAL LSAMEN
|
||||||
|
@ -1134,6 +1141,17 @@
|
||||||
DATA INTSTR / '0123456789' /
|
DATA INTSTR / '0123456789' /
|
||||||
DATA IOLDSD / 0, 0, 0, 1 /
|
DATA IOLDSD / 0, 0, 0, 1 /
|
||||||
* ..
|
* ..
|
||||||
|
* .. Allocate memory dynamically ..
|
||||||
|
*
|
||||||
|
ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( WORK(LWORK), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
* ..
|
||||||
* .. Executable Statements ..
|
* .. Executable Statements ..
|
||||||
*
|
*
|
||||||
A = 0.0
|
A = 0.0
|
||||||
|
@ -1856,8 +1874,16 @@
|
||||||
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
||||||
CALL XLAENV( 1, 1 )
|
CALL XLAENV( 1, 1 )
|
||||||
CALL XLAENV( 9, 25 )
|
CALL XLAENV( 9, 25 )
|
||||||
IF( TSTERR )
|
IF( TSTERR ) THEN
|
||||||
$ CALL DERRST( 'DST', NOUT )
|
#if defined(_OPENMP)
|
||||||
|
N_THREADS = OMP_GET_NUM_THREADS()
|
||||||
|
CALL OMP_SET_NUM_THREADS(1)
|
||||||
|
#endif
|
||||||
|
CALL DERRST( 'DST', NOUT )
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
CALL OMP_SET_NUM_THREADS(N_THREADS)
|
||||||
|
#endif
|
||||||
|
END IF
|
||||||
DO 290 I = 1, NPARMS
|
DO 290 I = 1, NPARMS
|
||||||
CALL XLAENV( 1, NBVAL( I ) )
|
CALL XLAENV( 1, NBVAL( I ) )
|
||||||
CALL XLAENV( 2, NBMIN( I ) )
|
CALL XLAENV( 2, NBMIN( I ) )
|
||||||
|
@ -2437,6 +2463,11 @@
|
||||||
WRITE( NOUT, FMT = 9994 )
|
WRITE( NOUT, FMT = 9994 )
|
||||||
S2 = DSECND( )
|
S2 = DSECND( )
|
||||||
WRITE( NOUT, FMT = 9993 )S2 - S1
|
WRITE( NOUT, FMT = 9993 )S2 - S1
|
||||||
|
*
|
||||||
|
DEALLOCATE (A, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (B, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (C, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (WORK, STAT = AllocateStatus)
|
||||||
*
|
*
|
||||||
9999 FORMAT( / ' Execution not attempted due to input errors' )
|
9999 FORMAT( / ' Execution not attempted due to input errors' )
|
||||||
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )
|
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )
|
|
@ -1040,6 +1040,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
PROGRAM SCHKEE
|
PROGRAM SCHKEE
|
||||||
*
|
*
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
use omp_lib
|
||||||
|
#endif
|
||||||
|
*
|
||||||
* -- LAPACK test routine (version 3.7.0) --
|
* -- LAPACK test routine (version 3.7.0) --
|
||||||
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
|
@ -1077,7 +1081,7 @@
|
||||||
CHARACTER*80 LINE
|
CHARACTER*80 LINE
|
||||||
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
|
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
|
||||||
$ NK, NN, NPARMS, NRHS, NTYPES,
|
$ NK, NN, NPARMS, NRHS, NTYPES,
|
||||||
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH
|
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS
|
||||||
REAL EPS, S1, S2, THRESH, THRSHN
|
REAL EPS, S1, S2, THRESH, THRSHN
|
||||||
* ..
|
* ..
|
||||||
* .. Local Arrays ..
|
* .. Local Arrays ..
|
||||||
|
@ -1089,10 +1093,13 @@
|
||||||
$ PVAL( MAXIN )
|
$ PVAL( MAXIN )
|
||||||
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
|
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
|
||||||
$ ISHFTS( MAXIN ), IACC22( MAXIN )
|
$ ISHFTS( MAXIN ), IACC22( MAXIN )
|
||||||
REAL A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ),
|
REAL D( NMAX, 12 ), RESULT( 500 ), TAUA( NMAX ),
|
||||||
$ C( NCMAX*NCMAX, NCMAX*NCMAX ), D( NMAX, 12 ),
|
$ TAUB( NMAX ), X( 5*NMAX )
|
||||||
$ RESULT( 500 ), TAUA( NMAX ), TAUB( NMAX ),
|
* ..
|
||||||
$ WORK( LWORK ), X( 5*NMAX )
|
* .. Allocatable Arrays ..
|
||||||
|
INTEGER AllocateStatus
|
||||||
|
REAL, DIMENSION(:), ALLOCATABLE :: WORK
|
||||||
|
REAL, DIMENSION(:,:), ALLOCATABLE :: A, B, C
|
||||||
* ..
|
* ..
|
||||||
* .. External Functions ..
|
* .. External Functions ..
|
||||||
LOGICAL LSAMEN
|
LOGICAL LSAMEN
|
||||||
|
@ -1134,6 +1141,17 @@
|
||||||
DATA INTSTR / '0123456789' /
|
DATA INTSTR / '0123456789' /
|
||||||
DATA IOLDSD / 0, 0, 0, 1 /
|
DATA IOLDSD / 0, 0, 0, 1 /
|
||||||
* ..
|
* ..
|
||||||
|
* .. Allocate memory dynamically ..
|
||||||
|
*
|
||||||
|
ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( WORK(LWORK), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
* ..
|
||||||
* .. Executable Statements ..
|
* .. Executable Statements ..
|
||||||
*
|
*
|
||||||
A = 0.0
|
A = 0.0
|
||||||
|
@ -1857,8 +1875,16 @@
|
||||||
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
||||||
CALL XLAENV( 1, 1 )
|
CALL XLAENV( 1, 1 )
|
||||||
CALL XLAENV( 9, 25 )
|
CALL XLAENV( 9, 25 )
|
||||||
IF( TSTERR )
|
IF( TSTERR ) THEN
|
||||||
$ CALL SERRST( 'SST', NOUT )
|
#if defined(_OPENMP)
|
||||||
|
N_THREADS = OMP_GET_NUM_THREADS()
|
||||||
|
CALL OMP_SET_NUM_THREADS(1)
|
||||||
|
#endif
|
||||||
|
CALL SERRST( 'SST', NOUT )
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
CALL OMP_SET_NUM_THREADS(N_THREADS)
|
||||||
|
#endif
|
||||||
|
END IF
|
||||||
DO 290 I = 1, NPARMS
|
DO 290 I = 1, NPARMS
|
||||||
CALL XLAENV( 1, NBVAL( I ) )
|
CALL XLAENV( 1, NBVAL( I ) )
|
||||||
CALL XLAENV( 2, NBMIN( I ) )
|
CALL XLAENV( 2, NBMIN( I ) )
|
||||||
|
@ -2440,6 +2466,11 @@
|
||||||
WRITE( NOUT, FMT = 9994 )
|
WRITE( NOUT, FMT = 9994 )
|
||||||
S2 = SECOND( )
|
S2 = SECOND( )
|
||||||
WRITE( NOUT, FMT = 9993 )S2 - S1
|
WRITE( NOUT, FMT = 9993 )S2 - S1
|
||||||
|
*
|
||||||
|
DEALLOCATE (A, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (B, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (C, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (WORK, STAT = AllocateStatus)
|
||||||
*
|
*
|
||||||
9999 FORMAT( / ' Execution not attempted due to input errors' )
|
9999 FORMAT( / ' Execution not attempted due to input errors' )
|
||||||
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )
|
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )
|
|
@ -1034,6 +1034,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
PROGRAM ZCHKEE
|
PROGRAM ZCHKEE
|
||||||
*
|
*
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
use omp_lib
|
||||||
|
#endif
|
||||||
|
*
|
||||||
* -- LAPACK test routine (version 3.7.0) --
|
* -- LAPACK test routine (version 3.7.0) --
|
||||||
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
|
@ -1071,7 +1075,7 @@
|
||||||
CHARACTER*80 LINE
|
CHARACTER*80 LINE
|
||||||
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
|
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
|
||||||
$ NK, NN, NPARMS, NRHS, NTYPES,
|
$ NK, NN, NPARMS, NRHS, NTYPES,
|
||||||
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH
|
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS
|
||||||
DOUBLE PRECISION EPS, S1, S2, THRESH, THRSHN
|
DOUBLE PRECISION EPS, S1, S2, THRESH, THRSHN
|
||||||
* ..
|
* ..
|
||||||
* .. Local Arrays ..
|
* .. Local Arrays ..
|
||||||
|
@ -1084,12 +1088,16 @@
|
||||||
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
|
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
|
||||||
$ ISHFTS( MAXIN ), IACC22( MAXIN )
|
$ ISHFTS( MAXIN ), IACC22( MAXIN )
|
||||||
DOUBLE PRECISION ALPHA( NMAX ), BETA( NMAX ), DR( NMAX, 12 ),
|
DOUBLE PRECISION ALPHA( NMAX ), BETA( NMAX ), DR( NMAX, 12 ),
|
||||||
$ RESULT( 500 ), RWORK( LWORK ), S( NMAX*NMAX )
|
$ RESULT( 500 )
|
||||||
COMPLEX*16 A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ),
|
COMPLEX*16 DC( NMAX, 6 ), TAUA( NMAX ), TAUB( NMAX ),
|
||||||
$ C( NCMAX*NCMAX, NCMAX*NCMAX ), DC( NMAX, 6 ),
|
|
||||||
$ TAUA( NMAX ), TAUB( NMAX ), WORK( LWORK ),
|
|
||||||
$ X( 5*NMAX )
|
$ X( 5*NMAX )
|
||||||
* ..
|
* ..
|
||||||
|
* .. Allocatable Arrays ..
|
||||||
|
INTEGER AllocateStatus
|
||||||
|
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: RWORK, S
|
||||||
|
COMPLEX*16, DIMENSION(:), ALLOCATABLE :: WORK
|
||||||
|
COMPLEX*16, DIMENSION(:,:), ALLOCATABLE :: A, B, C
|
||||||
|
* ..
|
||||||
* .. External Functions ..
|
* .. External Functions ..
|
||||||
LOGICAL LSAMEN
|
LOGICAL LSAMEN
|
||||||
DOUBLE PRECISION DLAMCH, DSECND
|
DOUBLE PRECISION DLAMCH, DSECND
|
||||||
|
@ -1130,6 +1138,21 @@
|
||||||
DATA INTSTR / '0123456789' /
|
DATA INTSTR / '0123456789' /
|
||||||
DATA IOLDSD / 0, 0, 0, 1 /
|
DATA IOLDSD / 0, 0, 0, 1 /
|
||||||
* ..
|
* ..
|
||||||
|
* .. Allocate memory dynamically ..
|
||||||
|
*
|
||||||
|
ALLOCATE ( S(NMAX*NMAX), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( RWORK(LWORK), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
ALLOCATE ( WORK(LWORK), STAT = AllocateStatus )
|
||||||
|
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||||
|
* ..
|
||||||
* .. Executable Statements ..
|
* .. Executable Statements ..
|
||||||
*
|
*
|
||||||
A = 0.0
|
A = 0.0
|
||||||
|
@ -1846,8 +1869,16 @@
|
||||||
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
||||||
CALL XLAENV( 1, 1 )
|
CALL XLAENV( 1, 1 )
|
||||||
CALL XLAENV( 9, 25 )
|
CALL XLAENV( 9, 25 )
|
||||||
IF( TSTERR )
|
IF( TSTERR ) THEN
|
||||||
$ CALL ZERRST( 'ZST', NOUT )
|
#if defined(_OPENMP)
|
||||||
|
N_THREADS = OMP_GET_NUM_THREADS()
|
||||||
|
CALL OMP_SET_NUM_THREADS(1)
|
||||||
|
#endif
|
||||||
|
CALL ZERRST( 'ZST', NOUT )
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
CALL OMP_SET_NUM_THREADS(N_THREADS)
|
||||||
|
#endif
|
||||||
|
END IF
|
||||||
DO 290 I = 1, NPARMS
|
DO 290 I = 1, NPARMS
|
||||||
CALL XLAENV( 1, NBVAL( I ) )
|
CALL XLAENV( 1, NBVAL( I ) )
|
||||||
CALL XLAENV( 2, NBMIN( I ) )
|
CALL XLAENV( 2, NBMIN( I ) )
|
||||||
|
@ -2303,8 +2334,16 @@
|
||||||
MAXTYP = 15
|
MAXTYP = 15
|
||||||
NTYPES = MIN( MAXTYP, NTYPES )
|
NTYPES = MIN( MAXTYP, NTYPES )
|
||||||
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
||||||
IF( TSTERR )
|
IF( TSTERR ) THEN
|
||||||
$ CALL ZERRST( 'ZHB', NOUT )
|
#if defined(_OPENMP)
|
||||||
|
N_THREADS = OMP_GET_NUM_THREADS()
|
||||||
|
CALL OMP_SET_NUM_THREADS(1)
|
||||||
|
#endif
|
||||||
|
CALL ZERRST( 'ZHB', NOUT )
|
||||||
|
#if defined(_OPENMP)
|
||||||
|
CALL OMP_SET_NUM_THREADS(N_THREADS)
|
||||||
|
#endif
|
||||||
|
END IF
|
||||||
* CALL ZCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH,
|
* CALL ZCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH,
|
||||||
* $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ),
|
* $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ),
|
||||||
* $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT,
|
* $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT,
|
||||||
|
@ -2435,6 +2474,13 @@
|
||||||
WRITE( NOUT, FMT = 9994 )
|
WRITE( NOUT, FMT = 9994 )
|
||||||
S2 = DSECND( )
|
S2 = DSECND( )
|
||||||
WRITE( NOUT, FMT = 9993 )S2 - S1
|
WRITE( NOUT, FMT = 9993 )S2 - S1
|
||||||
|
*
|
||||||
|
DEALLOCATE (S, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (A, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (B, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (C, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (RWORK, STAT = AllocateStatus)
|
||||||
|
DEALLOCATE (WORK, STAT = AllocateStatus)
|
||||||
*
|
*
|
||||||
9999 FORMAT( / ' Execution not attempted due to input errors' )
|
9999 FORMAT( / ' Execution not attempted due to input errors' )
|
||||||
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )
|
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )
|
Loading…
Reference in New Issue