commit
b1215f2f8c
|
@ -904,8 +904,8 @@ CCOMMON_OPT += -DF_INTERFACE_FLANG
|
|||
FCOMMON_OPT += -Mrecursive -Kieee
|
||||
ifeq ($(OSNAME), Linux)
|
||||
ifeq ($(ARCH), x86_64)
|
||||
FLANG_VENDOR := $(shell `$(FC) --version|cut -f 1 -d "."|head -1`)
|
||||
ifeq ($(FLANG_VENDOR),AOCC)
|
||||
FLANG_VENDOR := $(shell $(FC) --version|head -1 |cut -f 1 -d " ")
|
||||
ifeq ($(FLANG_VENDOR), AMD)
|
||||
FCOMMON_OPT += -fno-unroll-loops
|
||||
endif
|
||||
endif
|
||||
|
|
|
@ -74,6 +74,9 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||
struct timeval start, stop;
|
||||
#elif defined(__APPLE__)
|
||||
mach_timebase_info_data_t info;
|
||||
uint64_t start = 0, stop = 0;
|
||||
#else
|
||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
||||
#endif
|
||||
|
@ -82,6 +85,9 @@ double getsec()
|
|||
{
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
#elif defined(__APPLE__)
|
||||
mach_timebase_info(&info);
|
||||
return (double)(((stop - start) * info.numer)/info.denom) * 1.e-9;
|
||||
#else
|
||||
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
|
||||
#endif
|
||||
|
@ -90,6 +96,8 @@ double getsec()
|
|||
void begin() {
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
#elif defined(__APPLE__)
|
||||
start = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
|
||||
#else
|
||||
clock_gettime(CLOCK_REALTIME, &start);
|
||||
#endif
|
||||
|
@ -98,6 +106,8 @@ void begin() {
|
|||
void end() {
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
#elif defined(__APPLE__)
|
||||
stop = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
|
||||
#else
|
||||
clock_gettime(CLOCK_REALTIME, &stop);
|
||||
#endif
|
||||
|
|
2
f_check
2
f_check
|
@ -330,7 +330,7 @@ if ($link ne "") {
|
|||
$flags =~ s/\@/\,/g;
|
||||
$linker_L .= "-Wl,". $flags . " " ;
|
||||
}
|
||||
if ($flags =~ /-lgomp/ && $CC =~ /clang/) {
|
||||
if ($flags =~ /-lgomp/ && $ENV{"CC"} =~ /clang/) {
|
||||
$flags = "-lomp";
|
||||
}
|
||||
|
||||
|
|
|
@ -46,9 +46,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#include "dasum_microk_power8.c"
|
||||
#elif defined(POWER10)
|
||||
#include "dasum_microk_power10.c"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -110,6 +112,21 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
if ( inc_x == 1 )
|
||||
{
|
||||
|
||||
#if defined(POWER10)
|
||||
if ( n >= 16 )
|
||||
{
|
||||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3;
|
||||
for (i = 0; i < align; i++) {
|
||||
sumf += ABS(x[i]);
|
||||
}
|
||||
}
|
||||
n1 = (n-i) & -16;
|
||||
if ( n1 > 0 )
|
||||
{
|
||||
sumf += dasum_kernel_16(n1, &x[i]);
|
||||
i+=n1;
|
||||
}
|
||||
#else
|
||||
n1 = n & -16;
|
||||
if ( n1 > 0 )
|
||||
{
|
||||
|
@ -117,6 +134,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
sumf = dasum_kernel_16(n1, x);
|
||||
i=n1;
|
||||
}
|
||||
#endif
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,152 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2021, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#define HAVE_KERNEL_16 1
|
||||
|
||||
static double dasum_kernel_16 (long n, double *x)
|
||||
{
|
||||
double sum;
|
||||
__vector double t0;
|
||||
__vector double t1;
|
||||
__vector double t2;
|
||||
__vector double t3;
|
||||
|
||||
__asm__
|
||||
(
|
||||
"dcbt 0, %2 \n\t"
|
||||
|
||||
"xxlxor 32, 32, 32 \n\t"
|
||||
"xxlxor 33, 33, 33 \n\t"
|
||||
"xxlxor 34, 34, 34 \n\t"
|
||||
"xxlxor 35, 35, 35 \n\t"
|
||||
"xxlxor 36, 36, 36 \n\t"
|
||||
"xxlxor 37, 37, 37 \n\t"
|
||||
"xxlxor 38, 38, 38 \n\t"
|
||||
"xxlxor 39, 39, 39 \n\t"
|
||||
|
||||
"lxvp 40, 0(%2) \n\t"
|
||||
"lxvp 42, 32(%2) \n\t"
|
||||
"lxvp 44, 64(%2) \n\t"
|
||||
"lxvp 46, 96(%2) \n\t"
|
||||
|
||||
"addi %2, %2, 128 \n\t"
|
||||
|
||||
"addic. %1, %1, -16 \n\t"
|
||||
"ble two%= \n\t"
|
||||
|
||||
".align 5 \n"
|
||||
"one%=: \n\t"
|
||||
|
||||
"xvabsdp 48, 40 \n\t"
|
||||
"xvabsdp 49, 41 \n\t"
|
||||
"xvabsdp 50, 42 \n\t"
|
||||
"xvabsdp 51, 43 \n\t"
|
||||
"lxvp 40, 0(%2) \n\t"
|
||||
|
||||
|
||||
"xvabsdp %x3, 44 \n\t"
|
||||
"xvabsdp %x4, 45 \n\t"
|
||||
"lxvp 42, 32(%2) \n\t"
|
||||
|
||||
|
||||
"xvabsdp %x5, 46 \n\t"
|
||||
"xvabsdp %x6, 47 \n\t"
|
||||
"lxvp 44, 64(%2) \n\t"
|
||||
|
||||
|
||||
"xvadddp 32, 32, 48 \n\t"
|
||||
"xvadddp 33, 33, 49 \n\t"
|
||||
|
||||
"lxvp 46, 96(%2) \n\t"
|
||||
|
||||
"xvadddp 34, 34, 50 \n\t"
|
||||
"xvadddp 35, 35, 51 \n\t"
|
||||
"addi %2, %2, 128 \n\t"
|
||||
"xvadddp 36, 36, %x3 \n\t"
|
||||
"xvadddp 37, 37, %x4 \n\t"
|
||||
"addic. %1, %1, -16 \n\t"
|
||||
"xvadddp 38, 38, %x5 \n\t"
|
||||
"xvadddp 39, 39, %x6 \n\t"
|
||||
|
||||
"bgt one%= \n"
|
||||
|
||||
"two%=: \n\t"
|
||||
|
||||
"xvabsdp 48, 40 \n\t"
|
||||
"xvabsdp 49, 41 \n\t"
|
||||
"xvabsdp 50, 42 \n\t"
|
||||
"xvabsdp 51, 43 \n\t"
|
||||
"xvabsdp %x3, 44 \n\t"
|
||||
"xvabsdp %x4, 45 \n\t"
|
||||
"xvabsdp %x5, 46 \n\t"
|
||||
"xvabsdp %x6, 47 \n\t"
|
||||
|
||||
"xvadddp 32, 32, 48 \n\t"
|
||||
"xvadddp 33, 33, 49 \n\t"
|
||||
"xvadddp 34, 34, 50 \n\t"
|
||||
"xvadddp 35, 35, 51 \n\t"
|
||||
"xvadddp 36, 36, %x3 \n\t"
|
||||
"xvadddp 37, 37, %x4 \n\t"
|
||||
"xvadddp 38, 38, %x5 \n\t"
|
||||
"xvadddp 39, 39, %x6 \n\t"
|
||||
|
||||
"xvadddp 32, 32, 33 \n\t"
|
||||
"xvadddp 34, 34, 35 \n\t"
|
||||
"xvadddp 36, 36, 37 \n\t"
|
||||
"xvadddp 38, 38, 39 \n\t"
|
||||
|
||||
"xvadddp 32, 32, 34 \n\t"
|
||||
"xvadddp 36, 36, 38 \n\t"
|
||||
|
||||
"xvadddp 32, 32, 36 \n\t"
|
||||
|
||||
XXSWAPD_S(33,32)
|
||||
"xsadddp %x0, 32, 33 \n"
|
||||
|
||||
"#n=%1 x=%3=%2 sum=%0\n"
|
||||
"#t0=%x3 t1=%x4 t2=%x5 t3=%x6"
|
||||
:
|
||||
"=d" (sum), // 0
|
||||
"+r" (n), // 1
|
||||
"+b" (x), // 2
|
||||
"=wa" (t0), // 3
|
||||
"=wa" (t1), // 4
|
||||
"=wa" (t2), // 5
|
||||
"=wa" (t3) // 6
|
||||
:
|
||||
"m" (*x)
|
||||
:
|
||||
"cr0",
|
||||
"vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39",
|
||||
"vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47",
|
||||
"vs48","vs49","vs50","vs51"
|
||||
);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
|
@ -29,7 +29,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
typedef __vector unsigned char vec_t;
|
||||
typedef FLOAT v4sf_t __attribute__ ((vector_size (16)));
|
||||
typedef FLOAT v2sf_t __attribute__ ((vector_size (8)));
|
||||
#if !__has_builtin(__builtin_vsx_assemble_pair)
|
||||
#define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair
|
||||
#endif
|
||||
|
||||
#if !__has_builtin(__builtin_vsx_disassemble_pair)
|
||||
#define __builtin_vsx_disassemble_pair __builtin_mma_disassemble_pair
|
||||
#endif
|
||||
|
||||
#ifdef TRMMKERNEL
|
||||
#define SAVE_ACC(ACC, J) \
|
||||
|
@ -186,8 +192,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__vector_pair rowB, rowB1;
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
|
||||
|
@ -200,8 +206,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
{
|
||||
rowA = (vec_t *) & AO[l << 3];
|
||||
rb = (vec_t *) & BO[l << 3];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
|
||||
|
@ -242,8 +248,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
__vector_pair rowB, rowB1;
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
|
||||
|
@ -252,8 +258,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
{
|
||||
rowA = (vec_t *) & AO[l << 2];
|
||||
rb = (vec_t *) & BO[l << 3];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
|
||||
|
@ -286,16 +292,16 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
__vector_pair rowB, rowB1;
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
|
||||
for (l = 1; l < temp; l++)
|
||||
{
|
||||
rowA = (vec_t *) & AO[l << 1];
|
||||
rb = (vec_t *) & BO[l << 3];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
|
||||
}
|
||||
|
@ -398,7 +404,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
|
||||
__builtin_mma_xvf64ger (&acc2, rowB, rowA[2]);
|
||||
|
@ -407,7 +413,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
{
|
||||
rowA = (vec_t *) & AO[l << 3];
|
||||
rb = (vec_t *) & BO[l << 2];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
|
||||
|
@ -440,14 +446,14 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
|
||||
for (l = 1; l < temp; l++)
|
||||
{
|
||||
rowA = (vec_t *) & AO[l << 2];
|
||||
rb = (vec_t *) & BO[l << 2];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||
}
|
||||
|
@ -476,13 +482,13 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
for (l = 1; l < temp; l++)
|
||||
{
|
||||
rowA = (vec_t *) & AO[l << 1];
|
||||
rb = (vec_t *) & BO[l << 2];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
}
|
||||
SAVE_ACC (&acc0, 0);
|
||||
|
@ -562,11 +568,9 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
v4sf_t result[4];
|
||||
__vector_quad acc0, acc1, acc2, acc3;
|
||||
BLASLONG l = 0;
|
||||
FLOAT t[4] = { 0, 0, 0, 0 };
|
||||
t[0] = BO[0], t[1] = BO[1];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & t[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
|
||||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
|
||||
|
@ -574,9 +578,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
__builtin_mma_xvf64ger (&acc3, rowB, rowA[3]);
|
||||
for (l = 1; l < temp; l++)
|
||||
{
|
||||
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1];
|
||||
rb = (vec_t *) & t[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
rb = (vec_t *) & BO[l << 1];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
|
||||
rowA = (vec_t *) & AO[l << 3];
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||
|
@ -607,19 +610,16 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
v4sf_t result[4];
|
||||
__vector_quad acc0, acc1;
|
||||
BLASLONG l = 0;
|
||||
FLOAT t[4] = { 0, 0, 0, 0 };
|
||||
t[0] = BO[0], t[1] = BO[1];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & t[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
|
||||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
|
||||
for (l = 1; l < temp; l++)
|
||||
{
|
||||
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1];
|
||||
rb = (vec_t *) & t[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
rb = (vec_t *) & BO[l << 1];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
|
||||
rowA = (vec_t *) & AO[l << 2];
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||
|
@ -646,18 +646,15 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
v4sf_t result[4];
|
||||
__vector_quad acc0;
|
||||
BLASLONG l = 0;
|
||||
FLOAT t[4] = { 0, 0, 0, 0 };
|
||||
t[0] = BO[0], t[1] = BO[1];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & t[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
|
||||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
for (l = 1; l < temp; l++)
|
||||
{
|
||||
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1];
|
||||
rb = (vec_t *) & t[0];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
rb = (vec_t *) & BO[l << 1];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
|
||||
rowA = (vec_t *) & AO[l << 1];
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
}
|
||||
|
|
|
@ -46,9 +46,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#include "sasum_microk_power8.c"
|
||||
#elif defined(POWER10)
|
||||
#include "sasum_microk_power10.c"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -110,6 +112,21 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
if ( inc_x == 1 )
|
||||
{
|
||||
|
||||
#if defined(POWER10)
|
||||
if ( n >= 32 )
|
||||
{
|
||||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7;
|
||||
for (i = 0; i < align; i++) {
|
||||
sumf += ABS(x[i]);
|
||||
}
|
||||
}
|
||||
n1 = (n-i) & -32;
|
||||
if ( n1 > 0 )
|
||||
{
|
||||
sumf += sasum_kernel_32(n1, &x[i]);
|
||||
i+=n1;
|
||||
}
|
||||
#else
|
||||
n1 = n & -32;
|
||||
if ( n1 > 0 )
|
||||
{
|
||||
|
@ -117,6 +134,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
sumf = sasum_kernel_32(n1, x);
|
||||
i=n1;
|
||||
}
|
||||
#endif
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,153 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2021, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#define HAVE_KERNEL_32 1
|
||||
|
||||
static float sasum_kernel_32 (long n, float *x)
|
||||
{
|
||||
float sum;
|
||||
__vector float t0;
|
||||
__vector float t1;
|
||||
__vector float t2;
|
||||
__vector float t3;
|
||||
|
||||
__asm__
|
||||
(
|
||||
"dcbt 0, %2 \n\t"
|
||||
|
||||
"xxlxor 32, 32, 32 \n\t"
|
||||
"xxlxor 33, 33, 33 \n\t"
|
||||
"xxlxor 34, 34, 34 \n\t"
|
||||
"xxlxor 35, 35, 35 \n\t"
|
||||
"xxlxor 36, 36, 36 \n\t"
|
||||
"xxlxor 37, 37, 37 \n\t"
|
||||
"xxlxor 38, 38, 38 \n\t"
|
||||
"xxlxor 39, 39, 39 \n\t"
|
||||
|
||||
"lxvp 40, 0(%2) \n\t"
|
||||
"lxvp 42, 32(%2) \n\t"
|
||||
"lxvp 44, 64(%2) \n\t"
|
||||
"lxvp 46, 96(%2) \n\t"
|
||||
|
||||
"addi %2, %2, 128 \n\t"
|
||||
|
||||
"addic. %1, %1, -32 \n\t"
|
||||
"ble two%= \n\t"
|
||||
|
||||
".align 5 \n"
|
||||
"one%=: \n\t"
|
||||
|
||||
"xvabssp 48, 40 \n\t"
|
||||
"xvabssp 49, 41 \n\t"
|
||||
"xvabssp 50, 42 \n\t"
|
||||
"xvabssp 51, 43 \n\t"
|
||||
"lxvp 40, 0(%2) \n\t"
|
||||
|
||||
"xvabssp %x3, 44 \n\t"
|
||||
"xvabssp %x4, 45 \n\t"
|
||||
"lxvp 42, 32(%2) \n\t"
|
||||
|
||||
"xvabssp %x5, 46 \n\t"
|
||||
"xvabssp %x6, 47 \n\t"
|
||||
"lxvp 44, 64(%2) \n\t"
|
||||
|
||||
"xvaddsp 32, 32, 48 \n\t"
|
||||
"xvaddsp 33, 33, 49 \n\t"
|
||||
|
||||
"lxvp 46, 96(%2) \n\t"
|
||||
|
||||
"xvaddsp 34, 34, 50 \n\t"
|
||||
"xvaddsp 35, 35, 51 \n\t"
|
||||
"addi %2, %2, 128 \n\t"
|
||||
"xvaddsp 36, 36, %x3 \n\t"
|
||||
"xvaddsp 37, 37, %x4 \n\t"
|
||||
"addic. %1, %1, -32 \n\t"
|
||||
"xvaddsp 38, 38, %x5 \n\t"
|
||||
"xvaddsp 39, 39, %x6 \n\t"
|
||||
|
||||
"bgt one%= \n"
|
||||
|
||||
"two%=: \n\t"
|
||||
|
||||
"xvabssp 48, 40 \n\t"
|
||||
"xvabssp 49, 41 \n\t"
|
||||
"xvabssp 50, 42 \n\t"
|
||||
"xvabssp 51, 43 \n\t"
|
||||
"xvabssp %x3, 44 \n\t"
|
||||
"xvabssp %x4, 45 \n\t"
|
||||
"xvabssp %x5, 46 \n\t"
|
||||
"xvabssp %x6, 47 \n\t"
|
||||
|
||||
"xvaddsp 32, 32, 48 \n\t"
|
||||
"xvaddsp 33, 33, 49 \n\t"
|
||||
"xvaddsp 34, 34, 50 \n\t"
|
||||
"xvaddsp 35, 35, 51 \n\t"
|
||||
"xvaddsp 36, 36, %x3 \n\t"
|
||||
"xvaddsp 37, 37, %x4 \n\t"
|
||||
"xvaddsp 38, 38, %x5 \n\t"
|
||||
"xvaddsp 39, 39, %x6 \n\t"
|
||||
|
||||
"xvaddsp 32, 32, 33 \n\t"
|
||||
"xvaddsp 34, 34, 35 \n\t"
|
||||
"xvaddsp 36, 36, 37 \n\t"
|
||||
"xvaddsp 38, 38, 39 \n\t"
|
||||
|
||||
"xvaddsp 32, 32, 34 \n\t"
|
||||
"xvaddsp 36, 36, 38 \n\t"
|
||||
|
||||
"xvaddsp 32, 32, 36 \n\t"
|
||||
|
||||
"xxsldwi 33, 32, 32, 2 \n\t"
|
||||
"xvaddsp 32, 32, 33 \n\t"
|
||||
|
||||
"xxsldwi 33, 32, 32, 1 \n\t"
|
||||
"xvaddsp 32, 32, 33 \n\t"
|
||||
|
||||
"xscvspdp %x0, 32 \n"
|
||||
|
||||
"#n=%1 x=%3=%2 sum=%0\n"
|
||||
"#t0=%x3 t1=%x4 t2=%x5 t3=%x6"
|
||||
:
|
||||
"=f" (sum), // 0
|
||||
"+r" (n), // 1
|
||||
"+b" (x), // 2
|
||||
"=wa" (t0), // 3
|
||||
"=wa" (t1), // 4
|
||||
"=wa" (t2), // 5
|
||||
"=wa" (t3) // 6
|
||||
:
|
||||
"m" (*x)
|
||||
:
|
||||
"cr0",
|
||||
"vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39",
|
||||
"vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47",
|
||||
"vs48","vs49","vs50","vs51"
|
||||
);
|
||||
|
||||
return sum;
|
||||
}
|
|
@ -45,7 +45,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#elif defined(POWER10)
|
||||
#if defined(DOUBLE)
|
||||
#include "zscal_microk_power8.c"
|
||||
#include "zscal_microk_power10.c"
|
||||
#else
|
||||
#include "cscal_microk_power10.c"
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,195 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2021, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#define HAVE_KERNEL_8 1
|
||||
|
||||
static void zscal_kernel_8 (long n, double *x, double alpha_r, double alpha_i)
|
||||
{
|
||||
__vector double t0;
|
||||
__vector double t1;
|
||||
__vector double t2;
|
||||
__vector double t3;
|
||||
__vector double t4;
|
||||
__vector double t5;
|
||||
|
||||
__asm__
|
||||
(
|
||||
"dcbt 0, %2 \n\t"
|
||||
|
||||
"xsnegdp 33, %x10 \n\t" // -alpha_i
|
||||
XXSPLTD_S(32,%x9,0) // alpha_r , alpha_r
|
||||
XXMRGHD_S(33,%x10, 33) // -alpha_i , alpha_i
|
||||
|
||||
"lxvp 40, 0(%2) \n\t"
|
||||
"lxvp 42, 32(%2) \n\t"
|
||||
"lxvp 44, 64(%2) \n\t"
|
||||
"lxvp 46, 96(%2) \n\t"
|
||||
|
||||
"addic. %1, %1, -8 \n\t"
|
||||
"ble two%= \n\t"
|
||||
|
||||
".align 5 \n"
|
||||
"one%=: \n\t"
|
||||
|
||||
"xvmuldp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r
|
||||
"xvmuldp 49, 41, 32 \n\t"
|
||||
"xvmuldp 50, 42, 32 \n\t"
|
||||
"xvmuldp 51, 43, 32 \n\t"
|
||||
"xvmuldp 34, 44, 32 \n\t"
|
||||
"xvmuldp 35, 45, 32 \n\t"
|
||||
"xvmuldp 36, 46, 32 \n\t"
|
||||
"xvmuldp 37, 47, 32 \n\t"
|
||||
|
||||
XXSWAPD_S(38,40)
|
||||
XXSWAPD_S(39,41)
|
||||
XXSWAPD_S(%x3,42)
|
||||
XXSWAPD_S(%x4,43)
|
||||
XXSWAPD_S(%x5,44)
|
||||
XXSWAPD_S(%x6,45)
|
||||
XXSWAPD_S(%x7,46)
|
||||
XXSWAPD_S(%x8,47)
|
||||
|
||||
"xvmuldp 38, 38, 33 \n\t" // x0_i * -alpha_i, x0_r * alpha_i
|
||||
"xvmuldp 39, 39, 33 \n\t"
|
||||
|
||||
|
||||
"xvmuldp %x3, %x3, 33 \n\t"
|
||||
"xvmuldp %x4, %x4, 33 \n\t"
|
||||
|
||||
|
||||
"lxvp 40, 128(%2) \n\t"
|
||||
"lxvp 42, 160(%2) \n\t"
|
||||
"xvmuldp %x5, %x5, 33 \n\t"
|
||||
"xvmuldp %x6, %x6, 33 \n\t"
|
||||
|
||||
|
||||
"xvmuldp %x7, %x7, 33 \n\t"
|
||||
"xvmuldp %x8, %x8, 33 \n\t"
|
||||
"lxvp 44, 192(%2) \n\t"
|
||||
"lxvp 46, 224(%2) \n\t"
|
||||
|
||||
|
||||
"xvadddp 48, 48, 38 \n\t"
|
||||
"xvadddp 49, 49, 39 \n\t"
|
||||
"xvadddp 50, 50, %x3 \n\t"
|
||||
"xvadddp 51, 51, %x4 \n\t"
|
||||
"stxv 49, 0(%2) \n\t"
|
||||
"stxv 48, 16(%2) \n\t"
|
||||
"stxv 51, 32(%2) \n\t"
|
||||
"stxv 50, 48(%2) \n\t"
|
||||
|
||||
|
||||
"xvadddp 34, 34, %x5 \n\t"
|
||||
"xvadddp 35, 35, %x6 \n\t"
|
||||
|
||||
|
||||
"xvadddp 36, 36, %x7 \n\t"
|
||||
"xvadddp 37, 37, %x8 \n\t"
|
||||
|
||||
"stxv 35, 64(%2) \n\t"
|
||||
"stxv 34, 80(%2) \n\t"
|
||||
"stxv 37, 96(%2) \n\t"
|
||||
"stxv 36, 112(%2) \n\t"
|
||||
|
||||
"addi %2, %2, 128 \n\t"
|
||||
|
||||
"addic. %1, %1, -8 \n\t"
|
||||
"bgt one%= \n"
|
||||
|
||||
"two%=: \n\t"
|
||||
|
||||
"xvmuldp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r
|
||||
"xvmuldp 49, 41, 32 \n\t"
|
||||
"xvmuldp 50, 42, 32 \n\t"
|
||||
"xvmuldp 51, 43, 32 \n\t"
|
||||
"xvmuldp 34, 44, 32 \n\t"
|
||||
"xvmuldp 35, 45, 32 \n\t"
|
||||
"xvmuldp 36, 46, 32 \n\t"
|
||||
"xvmuldp 37, 47, 32 \n\t"
|
||||
|
||||
XXSWAPD_S(38,40)
|
||||
XXSWAPD_S(39,41)
|
||||
XXSWAPD_S(%x3,42)
|
||||
XXSWAPD_S(%x4,43)
|
||||
XXSWAPD_S(%x5,44)
|
||||
XXSWAPD_S(%x6,45)
|
||||
XXSWAPD_S(%x7,46)
|
||||
XXSWAPD_S(%x8,47)
|
||||
|
||||
|
||||
"xvmuldp 38, 38, 33 \n\t" // x0_i * -alpha_i, x0_r * alpha_i
|
||||
"xvmuldp 39, 39, 33 \n\t"
|
||||
"xvmuldp %x3, %x3, 33 \n\t"
|
||||
"xvmuldp %x4, %x4, 33 \n\t"
|
||||
"xvmuldp %x5, %x5, 33 \n\t"
|
||||
"xvmuldp %x6, %x6, 33 \n\t"
|
||||
"xvmuldp %x7, %x7, 33 \n\t"
|
||||
"xvmuldp %x8, %x8, 33 \n\t"
|
||||
|
||||
"xvadddp 48, 48, 38 \n\t"
|
||||
"xvadddp 49, 49, 39 \n\t"
|
||||
|
||||
"xvadddp 50, 50, %x3 \n\t"
|
||||
"xvadddp 51, 51, %x4 \n\t"
|
||||
"stxv 49, 0(%2) \n\t"
|
||||
"stxv 48, 16(%2) \n\t"
|
||||
"stxv 51, 32(%2) \n\t"
|
||||
"stxv 50, 48(%2) \n\t"
|
||||
|
||||
"xvadddp 34, 34, %x5 \n\t"
|
||||
"xvadddp 35, 35, %x6 \n\t"
|
||||
|
||||
|
||||
"xvadddp 36, 36, %x7 \n\t"
|
||||
"xvadddp 37, 37, %x8 \n\t"
|
||||
|
||||
"stxv 35, 64(%2) \n\t"
|
||||
"stxv 34, 80(%2) \n\t"
|
||||
"stxv 37, 96(%2) \n\t"
|
||||
"stxv 36, 112(%2) \n\t"
|
||||
|
||||
"#n=%1 x=%0=%2 alpha=(%9,%10) \n"
|
||||
:
|
||||
"+m" (*x),
|
||||
"+r" (n), // 1
|
||||
"+b" (x), // 2
|
||||
"=wa" (t0), // 3
|
||||
"=wa" (t1), // 4
|
||||
"=wa" (t2), // 5
|
||||
"=wa" (t3), // 6
|
||||
"=wa" (t4), // 7
|
||||
"=wa" (t5) // 8
|
||||
:
|
||||
"d" (alpha_r), // 9
|
||||
"d" (alpha_i) // 10
|
||||
:
|
||||
"cr0",
|
||||
"vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39",
|
||||
"vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47",
|
||||
"vs48","vs49","vs50","vs51"
|
||||
);
|
||||
}
|
|
@ -1,5 +1,4 @@
|
|||
/* need a new enough GCC for avx512 support */
|
||||
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
|
||||
#if defined(HAVE_FMA3) && defined(HAVE_AVX2)
|
||||
|
||||
#define HAVE_SROT_KERNEL 1
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ set(AEIGTST
|
|||
set(SCIGTST slafts.f slahd2.f slasum.f slatb9.f sstech.f sstect.f
|
||||
ssvdch.f ssvdct.f ssxt1.f)
|
||||
|
||||
set(SEIGTST schkee.f
|
||||
set(SEIGTST schkee.F
|
||||
sbdt01.f sbdt02.f sbdt03.f sbdt04.f sbdt05.f
|
||||
schkbb.f schkbd.f schkbk.f schkbl.f schkec.f
|
||||
schkgg.f schkgk.f schkgl.f schkhs.f schksb.f schkst.f schkst2stg.f schksb2stg.f
|
||||
|
@ -42,7 +42,7 @@ set(SEIGTST schkee.f
|
|||
sort03.f ssbt21.f ssgt01.f sslect.f sspt21.f sstt21.f
|
||||
sstt22.f ssyt21.f ssyt22.f)
|
||||
|
||||
set(CEIGTST cchkee.f
|
||||
set(CEIGTST cchkee.F
|
||||
cbdt01.f cbdt02.f cbdt03.f cbdt05.f
|
||||
cchkbb.f cchkbd.f cchkbk.f cchkbl.f cchkec.f
|
||||
cchkgg.f cchkgk.f cchkgl.f cchkhb.f cchkhs.f cchkst.f cchkst2stg.f cchkhb2stg.f
|
||||
|
@ -62,7 +62,7 @@ set(CEIGTST cchkee.f
|
|||
set(DZIGTST dlafts.f dlahd2.f dlasum.f dlatb9.f dstech.f dstect.f
|
||||
dsvdch.f dsvdct.f dsxt1.f)
|
||||
|
||||
set(DEIGTST dchkee.f
|
||||
set(DEIGTST dchkee.F
|
||||
dbdt01.f dbdt02.f dbdt03.f dbdt04.f dbdt05.f
|
||||
dchkbb.f dchkbd.f dchkbk.f dchkbl.f dchkec.f
|
||||
dchkgg.f dchkgk.f dchkgl.f dchkhs.f dchksb.f dchkst.f dchkst2stg.f dchksb2stg.f
|
||||
|
@ -79,7 +79,7 @@ set(DEIGTST dchkee.f
|
|||
dort03.f dsbt21.f dsgt01.f dslect.f dspt21.f dstt21.f
|
||||
dstt22.f dsyt21.f dsyt22.f)
|
||||
|
||||
set(ZEIGTST zchkee.f
|
||||
set(ZEIGTST zchkee.F
|
||||
zbdt01.f zbdt02.f zbdt03.f zbdt05.f
|
||||
zchkbb.f zchkbd.f zchkbk.f zchkbl.f zchkec.f
|
||||
zchkgg.f zchkgk.f zchkgl.f zchkhb.f zchkhs.f zchkst.f zchkst2stg.f zchkhb2stg.f
|
||||
|
|
|
@ -157,11 +157,11 @@ cleanobj:
|
|||
cleanexe:
|
||||
rm -f xeigtst*
|
||||
|
||||
schkee.o: schkee.f
|
||||
schkee.o: schkee.F
|
||||
$(FC) $(FFLAGS_DRV) -c -o $@ $<
|
||||
dchkee.o: dchkee.f
|
||||
dchkee.o: dchkee.F
|
||||
$(FC) $(FFLAGS_DRV) -c -o $@ $<
|
||||
cchkee.o: cchkee.f
|
||||
cchkee.o: cchkee.F
|
||||
$(FC) $(FFLAGS_DRV) -c -o $@ $<
|
||||
zchkee.o: zchkee.f
|
||||
zchkee.o: zchkee.F
|
||||
$(FC) $(FFLAGS_DRV) -c -o $@ $<
|
||||
|
|
|
@ -1034,6 +1034,10 @@
|
|||
* =====================================================================
|
||||
PROGRAM CCHKEE
|
||||
*
|
||||
#if defined(_OPENMP)
|
||||
use omp_lib
|
||||
#endif
|
||||
*
|
||||
* -- LAPACK test routine (version 3.7.0) --
|
||||
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
|
@ -1071,7 +1075,7 @@
|
|||
CHARACTER*80 LINE
|
||||
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
|
||||
$ NK, NN, NPARMS, NRHS, NTYPES,
|
||||
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH
|
||||
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS
|
||||
REAL EPS, S1, S2, THRESH, THRSHN
|
||||
* ..
|
||||
* .. Local Arrays ..
|
||||
|
@ -1084,12 +1088,16 @@
|
|||
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
|
||||
$ ISHFTS( MAXIN ), IACC22( MAXIN )
|
||||
REAL ALPHA( NMAX ), BETA( NMAX ), DR( NMAX, 12 ),
|
||||
$ RESULT( 500 ), RWORK( LWORK ), S( NMAX*NMAX )
|
||||
COMPLEX A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ),
|
||||
$ C( NCMAX*NCMAX, NCMAX*NCMAX ), DC( NMAX, 6 ),
|
||||
$ TAUA( NMAX ), TAUB( NMAX ), WORK( LWORK ),
|
||||
$ RESULT( 500 )
|
||||
COMPLEX DC( NMAX, 6 ), TAUA( NMAX ), TAUB( NMAX ),
|
||||
$ X( 5*NMAX )
|
||||
* ..
|
||||
* .. Allocatable Arrays ..
|
||||
INTEGER AllocateStatus
|
||||
REAL, DIMENSION(:), ALLOCATABLE :: RWORK, S
|
||||
COMPLEX, DIMENSION(:), ALLOCATABLE :: WORK
|
||||
COMPLEX, DIMENSION(:,:), ALLOCATABLE :: A, B, C
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
LOGICAL LSAMEN
|
||||
REAL SECOND, SLAMCH
|
||||
|
@ -1130,6 +1138,21 @@
|
|||
DATA INTSTR / '0123456789' /
|
||||
DATA IOLDSD / 0, 0, 0, 1 /
|
||||
* ..
|
||||
* .. Allocate memory dynamically ..
|
||||
*
|
||||
ALLOCATE ( S(NMAX*NMAX), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( RWORK(LWORK), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( WORK(LWORK), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
* ..
|
||||
* .. Executable Statements ..
|
||||
*
|
||||
A = 0.0
|
||||
|
@ -1846,8 +1869,16 @@
|
|||
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
||||
CALL XLAENV( 1, 1 )
|
||||
CALL XLAENV( 9, 25 )
|
||||
IF( TSTERR )
|
||||
$ CALL CERRST( 'CST', NOUT )
|
||||
IF( TSTERR ) THEN
|
||||
#if defined(_OPENMP)
|
||||
N_THREADS = OMP_GET_NUM_THREADS()
|
||||
CALL OMP_SET_NUM_THREADS(1)
|
||||
#endif
|
||||
CALL CERRST( 'CST', NOUT )
|
||||
#if defined(_OPENMP)
|
||||
CALL OMP_SET_NUM_THREADS(N_THREADS)
|
||||
#endif
|
||||
END IF
|
||||
DO 290 I = 1, NPARMS
|
||||
CALL XLAENV( 1, NBVAL( I ) )
|
||||
CALL XLAENV( 2, NBMIN( I ) )
|
||||
|
@ -2305,8 +2336,16 @@
|
|||
MAXTYP = 15
|
||||
NTYPES = MIN( MAXTYP, NTYPES )
|
||||
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
||||
IF( TSTERR )
|
||||
$ CALL CERRST( 'CHB', NOUT )
|
||||
IF( TSTERR ) THEN
|
||||
#if defined(_OPENMP)
|
||||
N_THREADS = OMP_GET_NUM_THREADS()
|
||||
CALL OMP_SET_NUM_THREADS(1)
|
||||
#endif
|
||||
CALL CERRST( 'CHB', NOUT )
|
||||
#if defined(_OPENMP)
|
||||
CALL OMP_SET_NUM_THREADS(N_THREADS)
|
||||
#endif
|
||||
END IF
|
||||
* CALL CCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH,
|
||||
* $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ),
|
||||
* $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT,
|
||||
|
@ -2437,6 +2476,13 @@
|
|||
WRITE( NOUT, FMT = 9994 )
|
||||
S2 = SECOND( )
|
||||
WRITE( NOUT, FMT = 9993 )S2 - S1
|
||||
*
|
||||
DEALLOCATE (S, STAT = AllocateStatus)
|
||||
DEALLOCATE (A, STAT = AllocateStatus)
|
||||
DEALLOCATE (B, STAT = AllocateStatus)
|
||||
DEALLOCATE (C, STAT = AllocateStatus)
|
||||
DEALLOCATE (RWORK, STAT = AllocateStatus)
|
||||
DEALLOCATE (WORK, STAT = AllocateStatus)
|
||||
*
|
||||
9999 FORMAT( / ' Execution not attempted due to input errors' )
|
||||
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )
|
|
@ -1040,6 +1040,10 @@
|
|||
* =====================================================================
|
||||
PROGRAM DCHKEE
|
||||
*
|
||||
#if defined(_OPENMP)
|
||||
use omp_lib
|
||||
#endif
|
||||
*
|
||||
* -- LAPACK test routine (version 3.7.0) --
|
||||
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
|
@ -1077,7 +1081,7 @@
|
|||
CHARACTER*80 LINE
|
||||
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
|
||||
$ NK, NN, NPARMS, NRHS, NTYPES,
|
||||
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH
|
||||
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS
|
||||
DOUBLE PRECISION EPS, S1, S2, THRESH, THRSHN
|
||||
* ..
|
||||
* .. Local Arrays ..
|
||||
|
@ -1089,10 +1093,13 @@
|
|||
$ PVAL( MAXIN )
|
||||
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
|
||||
$ ISHFTS( MAXIN ), IACC22( MAXIN )
|
||||
DOUBLE PRECISION A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ),
|
||||
$ C( NCMAX*NCMAX, NCMAX*NCMAX ), D( NMAX, 12 ),
|
||||
$ RESULT( 500 ), TAUA( NMAX ), TAUB( NMAX ),
|
||||
$ WORK( LWORK ), X( 5*NMAX )
|
||||
DOUBLE PRECISION D( NMAX, 12 ), RESULT( 500 ), TAUA( NMAX ),
|
||||
$ TAUB( NMAX ), X( 5*NMAX )
|
||||
* ..
|
||||
* .. Allocatable Arrays ..
|
||||
INTEGER AllocateStatus
|
||||
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: WORK
|
||||
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: A, B, C
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
LOGICAL LSAMEN
|
||||
|
@ -1134,6 +1141,17 @@
|
|||
DATA INTSTR / '0123456789' /
|
||||
DATA IOLDSD / 0, 0, 0, 1 /
|
||||
* ..
|
||||
* .. Allocate memory dynamically ..
|
||||
*
|
||||
ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( WORK(LWORK), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
* ..
|
||||
* .. Executable Statements ..
|
||||
*
|
||||
A = 0.0
|
||||
|
@ -1856,8 +1874,16 @@
|
|||
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
||||
CALL XLAENV( 1, 1 )
|
||||
CALL XLAENV( 9, 25 )
|
||||
IF( TSTERR )
|
||||
$ CALL DERRST( 'DST', NOUT )
|
||||
IF( TSTERR ) THEN
|
||||
#if defined(_OPENMP)
|
||||
N_THREADS = OMP_GET_NUM_THREADS()
|
||||
CALL OMP_SET_NUM_THREADS(1)
|
||||
#endif
|
||||
CALL DERRST( 'DST', NOUT )
|
||||
#if defined(_OPENMP)
|
||||
CALL OMP_SET_NUM_THREADS(N_THREADS)
|
||||
#endif
|
||||
END IF
|
||||
DO 290 I = 1, NPARMS
|
||||
CALL XLAENV( 1, NBVAL( I ) )
|
||||
CALL XLAENV( 2, NBMIN( I ) )
|
||||
|
@ -2437,6 +2463,11 @@
|
|||
WRITE( NOUT, FMT = 9994 )
|
||||
S2 = DSECND( )
|
||||
WRITE( NOUT, FMT = 9993 )S2 - S1
|
||||
*
|
||||
DEALLOCATE (A, STAT = AllocateStatus)
|
||||
DEALLOCATE (B, STAT = AllocateStatus)
|
||||
DEALLOCATE (C, STAT = AllocateStatus)
|
||||
DEALLOCATE (WORK, STAT = AllocateStatus)
|
||||
*
|
||||
9999 FORMAT( / ' Execution not attempted due to input errors' )
|
||||
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )
|
|
@ -1040,6 +1040,10 @@
|
|||
* =====================================================================
|
||||
PROGRAM SCHKEE
|
||||
*
|
||||
#if defined(_OPENMP)
|
||||
use omp_lib
|
||||
#endif
|
||||
*
|
||||
* -- LAPACK test routine (version 3.7.0) --
|
||||
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
|
@ -1077,7 +1081,7 @@
|
|||
CHARACTER*80 LINE
|
||||
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
|
||||
$ NK, NN, NPARMS, NRHS, NTYPES,
|
||||
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH
|
||||
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS
|
||||
REAL EPS, S1, S2, THRESH, THRSHN
|
||||
* ..
|
||||
* .. Local Arrays ..
|
||||
|
@ -1089,10 +1093,13 @@
|
|||
$ PVAL( MAXIN )
|
||||
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
|
||||
$ ISHFTS( MAXIN ), IACC22( MAXIN )
|
||||
REAL A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ),
|
||||
$ C( NCMAX*NCMAX, NCMAX*NCMAX ), D( NMAX, 12 ),
|
||||
$ RESULT( 500 ), TAUA( NMAX ), TAUB( NMAX ),
|
||||
$ WORK( LWORK ), X( 5*NMAX )
|
||||
REAL D( NMAX, 12 ), RESULT( 500 ), TAUA( NMAX ),
|
||||
$ TAUB( NMAX ), X( 5*NMAX )
|
||||
* ..
|
||||
* .. Allocatable Arrays ..
|
||||
INTEGER AllocateStatus
|
||||
REAL, DIMENSION(:), ALLOCATABLE :: WORK
|
||||
REAL, DIMENSION(:,:), ALLOCATABLE :: A, B, C
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
LOGICAL LSAMEN
|
||||
|
@ -1134,6 +1141,17 @@
|
|||
DATA INTSTR / '0123456789' /
|
||||
DATA IOLDSD / 0, 0, 0, 1 /
|
||||
* ..
|
||||
* .. Allocate memory dynamically ..
|
||||
*
|
||||
ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( WORK(LWORK), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
* ..
|
||||
* .. Executable Statements ..
|
||||
*
|
||||
A = 0.0
|
||||
|
@ -1857,8 +1875,16 @@
|
|||
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
||||
CALL XLAENV( 1, 1 )
|
||||
CALL XLAENV( 9, 25 )
|
||||
IF( TSTERR )
|
||||
$ CALL SERRST( 'SST', NOUT )
|
||||
IF( TSTERR ) THEN
|
||||
#if defined(_OPENMP)
|
||||
N_THREADS = OMP_GET_NUM_THREADS()
|
||||
CALL OMP_SET_NUM_THREADS(1)
|
||||
#endif
|
||||
CALL SERRST( 'SST', NOUT )
|
||||
#if defined(_OPENMP)
|
||||
CALL OMP_SET_NUM_THREADS(N_THREADS)
|
||||
#endif
|
||||
END IF
|
||||
DO 290 I = 1, NPARMS
|
||||
CALL XLAENV( 1, NBVAL( I ) )
|
||||
CALL XLAENV( 2, NBMIN( I ) )
|
||||
|
@ -2440,6 +2466,11 @@
|
|||
WRITE( NOUT, FMT = 9994 )
|
||||
S2 = SECOND( )
|
||||
WRITE( NOUT, FMT = 9993 )S2 - S1
|
||||
*
|
||||
DEALLOCATE (A, STAT = AllocateStatus)
|
||||
DEALLOCATE (B, STAT = AllocateStatus)
|
||||
DEALLOCATE (C, STAT = AllocateStatus)
|
||||
DEALLOCATE (WORK, STAT = AllocateStatus)
|
||||
*
|
||||
9999 FORMAT( / ' Execution not attempted due to input errors' )
|
||||
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )
|
|
@ -1034,6 +1034,10 @@
|
|||
* =====================================================================
|
||||
PROGRAM ZCHKEE
|
||||
*
|
||||
#if defined(_OPENMP)
|
||||
use omp_lib
|
||||
#endif
|
||||
*
|
||||
* -- LAPACK test routine (version 3.7.0) --
|
||||
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
|
@ -1071,7 +1075,7 @@
|
|||
CHARACTER*80 LINE
|
||||
INTEGER I, I1, IC, INFO, ITMP, K, LENP, MAXTYP, NEWSD,
|
||||
$ NK, NN, NPARMS, NRHS, NTYPES,
|
||||
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH
|
||||
$ VERS_MAJOR, VERS_MINOR, VERS_PATCH, N_THREADS
|
||||
DOUBLE PRECISION EPS, S1, S2, THRESH, THRSHN
|
||||
* ..
|
||||
* .. Local Arrays ..
|
||||
|
@ -1084,12 +1088,16 @@
|
|||
INTEGER INMIN( MAXIN ), INWIN( MAXIN ), INIBL( MAXIN ),
|
||||
$ ISHFTS( MAXIN ), IACC22( MAXIN )
|
||||
DOUBLE PRECISION ALPHA( NMAX ), BETA( NMAX ), DR( NMAX, 12 ),
|
||||
$ RESULT( 500 ), RWORK( LWORK ), S( NMAX*NMAX )
|
||||
COMPLEX*16 A( NMAX*NMAX, NEED ), B( NMAX*NMAX, 5 ),
|
||||
$ C( NCMAX*NCMAX, NCMAX*NCMAX ), DC( NMAX, 6 ),
|
||||
$ TAUA( NMAX ), TAUB( NMAX ), WORK( LWORK ),
|
||||
$ RESULT( 500 )
|
||||
COMPLEX*16 DC( NMAX, 6 ), TAUA( NMAX ), TAUB( NMAX ),
|
||||
$ X( 5*NMAX )
|
||||
* ..
|
||||
* .. Allocatable Arrays ..
|
||||
INTEGER AllocateStatus
|
||||
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: RWORK, S
|
||||
COMPLEX*16, DIMENSION(:), ALLOCATABLE :: WORK
|
||||
COMPLEX*16, DIMENSION(:,:), ALLOCATABLE :: A, B, C
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
LOGICAL LSAMEN
|
||||
DOUBLE PRECISION DLAMCH, DSECND
|
||||
|
@ -1130,6 +1138,21 @@
|
|||
DATA INTSTR / '0123456789' /
|
||||
DATA IOLDSD / 0, 0, 0, 1 /
|
||||
* ..
|
||||
* .. Allocate memory dynamically ..
|
||||
*
|
||||
ALLOCATE ( S(NMAX*NMAX), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( A(NMAX*NMAX,NEED), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( B(NMAX*NMAX,5), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( C(NCMAX*NCMAX,NCMAX*NCMAX), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( RWORK(LWORK), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
ALLOCATE ( WORK(LWORK), STAT = AllocateStatus )
|
||||
IF (AllocateStatus /= 0) STOP "*** Not enough memory ***"
|
||||
* ..
|
||||
* .. Executable Statements ..
|
||||
*
|
||||
A = 0.0
|
||||
|
@ -1846,8 +1869,16 @@
|
|||
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
||||
CALL XLAENV( 1, 1 )
|
||||
CALL XLAENV( 9, 25 )
|
||||
IF( TSTERR )
|
||||
$ CALL ZERRST( 'ZST', NOUT )
|
||||
IF( TSTERR ) THEN
|
||||
#if defined(_OPENMP)
|
||||
N_THREADS = OMP_GET_NUM_THREADS()
|
||||
CALL OMP_SET_NUM_THREADS(1)
|
||||
#endif
|
||||
CALL ZERRST( 'ZST', NOUT )
|
||||
#if defined(_OPENMP)
|
||||
CALL OMP_SET_NUM_THREADS(N_THREADS)
|
||||
#endif
|
||||
END IF
|
||||
DO 290 I = 1, NPARMS
|
||||
CALL XLAENV( 1, NBVAL( I ) )
|
||||
CALL XLAENV( 2, NBMIN( I ) )
|
||||
|
@ -2303,8 +2334,16 @@
|
|||
MAXTYP = 15
|
||||
NTYPES = MIN( MAXTYP, NTYPES )
|
||||
CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT )
|
||||
IF( TSTERR )
|
||||
$ CALL ZERRST( 'ZHB', NOUT )
|
||||
IF( TSTERR ) THEN
|
||||
#if defined(_OPENMP)
|
||||
N_THREADS = OMP_GET_NUM_THREADS()
|
||||
CALL OMP_SET_NUM_THREADS(1)
|
||||
#endif
|
||||
CALL ZERRST( 'ZHB', NOUT )
|
||||
#if defined(_OPENMP)
|
||||
CALL OMP_SET_NUM_THREADS(N_THREADS)
|
||||
#endif
|
||||
END IF
|
||||
* CALL ZCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH,
|
||||
* $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ),
|
||||
* $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT,
|
||||
|
@ -2435,6 +2474,13 @@
|
|||
WRITE( NOUT, FMT = 9994 )
|
||||
S2 = DSECND( )
|
||||
WRITE( NOUT, FMT = 9993 )S2 - S1
|
||||
*
|
||||
DEALLOCATE (S, STAT = AllocateStatus)
|
||||
DEALLOCATE (A, STAT = AllocateStatus)
|
||||
DEALLOCATE (B, STAT = AllocateStatus)
|
||||
DEALLOCATE (C, STAT = AllocateStatus)
|
||||
DEALLOCATE (RWORK, STAT = AllocateStatus)
|
||||
DEALLOCATE (WORK, STAT = AllocateStatus)
|
||||
*
|
||||
9999 FORMAT( / ' Execution not attempted due to input errors' )
|
||||
9997 FORMAT( / / 1X, A3, ': NB =', I4, ', NBMIN =', I4, ', NX =', I4 )
|
Loading…
Reference in New Issue