commit
ede3cab6e6
|
@ -25,6 +25,15 @@ matrix:
|
||||||
- TARGET_BOX=LINUX64
|
- TARGET_BOX=LINUX64
|
||||||
- BTYPE="BINARY=64"
|
- BTYPE="BINARY=64"
|
||||||
|
|
||||||
|
- <<: *test-ubuntu
|
||||||
|
os: linux-ppc64le
|
||||||
|
before_script:
|
||||||
|
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32"
|
||||||
|
env:
|
||||||
|
# for matrix annotation only
|
||||||
|
- TARGET_BOX=PPC64LE_LINUX
|
||||||
|
- BTYPE="BINARY=64 USE_OPENMP=1"
|
||||||
|
|
||||||
- <<: *test-ubuntu
|
- <<: *test-ubuntu
|
||||||
env:
|
env:
|
||||||
- TARGET_BOX=LINUX64
|
- TARGET_BOX=LINUX64
|
||||||
|
|
|
@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
|
||||||
project(OpenBLAS C ASM)
|
project(OpenBLAS C ASM)
|
||||||
set(OpenBLAS_MAJOR_VERSION 0)
|
set(OpenBLAS_MAJOR_VERSION 0)
|
||||||
set(OpenBLAS_MINOR_VERSION 3)
|
set(OpenBLAS_MINOR_VERSION 3)
|
||||||
set(OpenBLAS_PATCH_VERSION 6.dev)
|
set(OpenBLAS_PATCH_VERSION 7.dev)
|
||||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||||
|
|
||||||
# Adhere to GNU filesystem layout conventions
|
# Adhere to GNU filesystem layout conventions
|
||||||
|
|
|
@ -167,4 +167,7 @@ In chronological order:
|
||||||
* [2017-02-26] ztrmm kernel for IBM z13
|
* [2017-02-26] ztrmm kernel for IBM z13
|
||||||
* [2017-03-13] strmm and ctrmm kernel for IBM z13
|
* [2017-03-13] strmm and ctrmm kernel for IBM z13
|
||||||
* [2017-09-01] initial Blas Level-1,2 (double precision) for IBM z13
|
* [2017-09-01] initial Blas Level-1,2 (double precision) for IBM z13
|
||||||
|
* [2018-03-07] added missing Blas Level 1-2 (double precision) simd codes
|
||||||
|
* [2019-02-01] added missing Blas Level-1,2 (single precision) simd codes
|
||||||
|
* [2019-03-14] power9 dgemm/dtrmm kernel
|
||||||
|
* [2019-04-29] power9 sgemm/strmm kernel
|
||||||
|
|
|
@ -1,4 +1,82 @@
|
||||||
OpenBLAS ChangeLog
|
OpenBLAS ChangeLog
|
||||||
|
====================================================================
|
||||||
|
Version 0.3.6
|
||||||
|
29-Apr-2019
|
||||||
|
|
||||||
|
common:
|
||||||
|
* the build tools now check that a given cpu TARGET is actually valid
|
||||||
|
* the build-time check of system features (c_check) has been made
|
||||||
|
less dependent on particular perl features (this should mainly
|
||||||
|
benefit building on Windows)
|
||||||
|
* several problem with the ReLAPACK integration were fixed,
|
||||||
|
including INTERFACE64 support and building a shared library
|
||||||
|
* building with CMAKE on BSD systems was improved
|
||||||
|
* a non-absolute SUM function was added based on the
|
||||||
|
existing optimized code for ASUM
|
||||||
|
* CBLAS interfaces to the IxMIN and IxMAX functions were added
|
||||||
|
* a name clash between LAPACKE and BOOST headers was resolved
|
||||||
|
* CMAKE builds with OpenMP failed to include the appropriate getrf_parallel
|
||||||
|
kernels
|
||||||
|
* a crash on thread (key) deletion with the USE_TLS=1 memory management
|
||||||
|
option was fixed
|
||||||
|
* restored several earlier fixes, in particular for OpenMP performance,
|
||||||
|
building on BSD, and calling fork on CYGWIN, which had inadvertently
|
||||||
|
been dropped in the 0.3.3 rewrite of the memory management code.
|
||||||
|
|
||||||
|
x86_64:
|
||||||
|
* the AVX512 DGEMM kernel has been disabled again due to unsolved problems
|
||||||
|
* building with old versions of MSVC was fixed
|
||||||
|
* it is now possible to build a static library on Windows with CMAKE
|
||||||
|
* accessing environment variables on CYGWIN at run time was fixed
|
||||||
|
* the CMAKE build system now recognizes 32bit userspace on 64bit hardware
|
||||||
|
* Intel "Denverton" atom and Hygon "Dhyana" zen CPUs are now autodetected
|
||||||
|
* building for DYNAMIC_ARCH with a DYNAMIC_LIST of targets is now supported
|
||||||
|
with CMAKE as well
|
||||||
|
* building for DYNAMIC_ARCH with GENERIC as the default target is now supported
|
||||||
|
* a buffer overflow in the SSE GEMM kernel for Intel Nano targets was fixed
|
||||||
|
* assembly bugs involving undeclared modification of input operands were fixed
|
||||||
|
in the AXPY, DOT, GEMV, GER, SCAL, SYMV and TRSM microkernels for Nehalem,
|
||||||
|
Sandybridge, Haswell, Bulldozer and Piledriver. These would typically cause
|
||||||
|
test failures or segfaults when compiled with recent versions of gcc from 8 onward.
|
||||||
|
* a similar bug was fixed in the blas_quickdivide code used to split workloads
|
||||||
|
in most functions
|
||||||
|
* a bug in the IxMIN implementation for the GENERIC target made it return the result of IxMAX
|
||||||
|
* fixed building on SkylakeX systems when either the compiler or the (emulated) operating
|
||||||
|
environment does not support AVX512
|
||||||
|
* improved GEMM performance on ZEN targets
|
||||||
|
|
||||||
|
x86:
|
||||||
|
* build failures caused by the recently added checks for AVX512 were fixed
|
||||||
|
* an inline assembly bug involving undeclared modification of an input argument was
|
||||||
|
fixed in the blas_quickdivide code used to split workloads in most functions
|
||||||
|
* a bug in the IMIN implementation for the GENERIC target made it return the result of IMAX
|
||||||
|
|
||||||
|
MIPS32:
|
||||||
|
* a bug in the IMIN implementation made it return the result of IMAX
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* single precision BLAS1/2 functions have received optimized POWER8 kernels
|
||||||
|
* POWER9 is now a separate target, with an optimized DGEMM/DTRMM kernel
|
||||||
|
* building on PPC970 systems under OSX Leopard or Tiger is now supported
|
||||||
|
* out-of-bounds memory accesses in the gemm_beta microkernels were fixed
|
||||||
|
* building a shared library on AIX is now supported for POWER6
|
||||||
|
* DYNAMIC_ARCH support has been added for POWER6 and newer
|
||||||
|
|
||||||
|
ARMv7:
|
||||||
|
* corrected xDOT behaviour with zero INC_X or INC_Y
|
||||||
|
* a bug in the IMIN implementation made it return the result of IMAX
|
||||||
|
|
||||||
|
ARMv8:
|
||||||
|
* added support for HiSilicon TSV110 cpus
|
||||||
|
* the CMAKE build system now recognizes 32bit userspace on 64bit hardware
|
||||||
|
* cross-compilation with CMAKE now works again
|
||||||
|
* a bug in the IMIN implementation made it return the result of IMAX
|
||||||
|
* ARMV8 builds with the BINARY=32 option are now automatically handled as ARMV7
|
||||||
|
|
||||||
|
IBM Z:
|
||||||
|
* optimized microkernels for single precicion BLAS1/2 functions have been added
|
||||||
|
for both Z13 and Z14
|
||||||
|
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.3.5
|
Version 0.3.5
|
||||||
31-Dec-2018
|
31-Dec-2018
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
# This library's version
|
# This library's version
|
||||||
VERSION = 0.3.6.dev
|
VERSION = 0.3.7.dev
|
||||||
|
|
||||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||||
|
@ -181,17 +181,17 @@ NO_AFFINITY = 1
|
||||||
# time out to improve performance. This number should be from 4 to 30
|
# time out to improve performance. This number should be from 4 to 30
|
||||||
# which corresponds to (1 << n) cycles. For example, if you set to 26,
|
# which corresponds to (1 << n) cycles. For example, if you set to 26,
|
||||||
# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz
|
# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz
|
||||||
# system). Also you can control this mumber by THREAD_TIMEOUT
|
# system). Also you can control this number by THREAD_TIMEOUT
|
||||||
# CCOMMON_OPT += -DTHREAD_TIMEOUT=26
|
# CCOMMON_OPT += -DTHREAD_TIMEOUT=26
|
||||||
|
|
||||||
# Using special device driver for mapping physically contigous memory
|
# Using special device driver for mapping physically contiguous memory
|
||||||
# to the user space. If bigphysarea is enabled, it will use it.
|
# to the user space. If bigphysarea is enabled, it will use it.
|
||||||
# DEVICEDRIVER_ALLOCATION = 1
|
# DEVICEDRIVER_ALLOCATION = 1
|
||||||
|
|
||||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
||||||
# CONSISTENT_FPCSR = 1
|
# CONSISTENT_FPCSR = 1
|
||||||
|
|
||||||
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
|
# If any gemm argument m, n or k is less or equal this threshold, gemm will be execute
|
||||||
# with single thread. (Actually in recent versions this is a factor proportional to the
|
# with single thread. (Actually in recent versions this is a factor proportional to the
|
||||||
# number of floating point operations necessary for the given problem size, no longer
|
# number of floating point operations necessary for the given problem size, no longer
|
||||||
# an individual dimension). You can use this setting to avoid the overhead of multi-
|
# an individual dimension). You can use this setting to avoid the overhead of multi-
|
||||||
|
|
13
README.md
13
README.md
|
@ -10,7 +10,7 @@ AppVeyor: [
|
### Compile with MASS support on Power CPU (optional)
|
||||||
|
|
||||||
The [IBM MASS](http://www-01.ibm.com/software/awdtools/mass/linux/mass-linux.html) library
|
The [IBM MASS](https://www.ibm.com/support/home/product/W511326D80541V01/other_software/mathematical_acceleration_subsystem) library consists of a set of mathematical functions for C, C++, and Fortran applications that are tuned for optimum performance on POWER architectures.
|
||||||
consists of a set of mathematical functions for C, C++, and Fortran applications that are
|
|
||||||
are tuned for optimum performance on POWER architectures.
|
|
||||||
OpenBLAS with MASS requires a 64-bit, little-endian OS on POWER.
|
OpenBLAS with MASS requires a 64-bit, little-endian OS on POWER.
|
||||||
The library can be installed as shown:
|
The library can be installed as shown:
|
||||||
|
|
||||||
|
@ -115,6 +113,7 @@ Please read `GotoBLAS_01Readme.txt`.
|
||||||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
|
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
|
||||||
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
|
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
|
||||||
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
|
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
|
||||||
|
- **AMD ZEN**: Uses Haswell codes with some optimizations.
|
||||||
|
|
||||||
#### MIPS64
|
#### MIPS64
|
||||||
|
|
||||||
|
@ -133,11 +132,13 @@ Please read `GotoBLAS_01Readme.txt`.
|
||||||
|
|
||||||
#### PPC/PPC64
|
#### PPC/PPC64
|
||||||
|
|
||||||
- **POWER8**: Optmized Level-3 BLAS and some Level-1, only with `USE_OPENMP=1`
|
- **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1`
|
||||||
|
- **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only.
|
||||||
|
|
||||||
#### IBM zEnterprise System
|
#### IBM zEnterprise System
|
||||||
|
|
||||||
- **Z13**: Optimized Level-3 BLAS and Level-1,2 (double precision)
|
- **Z13**: Optimized Level-3 BLAS and Level-1,2 (double precision)
|
||||||
|
- **Z14**: Optimized Level-3 BLAS and Level-1,2 (single precision)
|
||||||
|
|
||||||
### Supported OS
|
### Supported OS
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
# Starter pipeline
|
||||||
|
# Start with a minimal pipeline that you can customize to build and deploy your code.
|
||||||
|
# Add steps that build, run tests, deploy, and more:
|
||||||
|
# https://aka.ms/yaml
|
||||||
|
|
||||||
|
trigger:
|
||||||
|
- master
|
||||||
|
|
||||||
|
pool:
|
||||||
|
vmImage: 'ubuntu-latest'
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- script: echo Hello, world!
|
||||||
|
displayName: 'Run a one-line script'
|
||||||
|
|
||||||
|
#- script: |
|
||||||
|
# docker run --rm --privileged multiarch/qemu-user-static:register --reset
|
||||||
|
# ls /proc/sys/fs/binfmt_misc/
|
||||||
|
# condition: not(startsWith(variables['CONFIG'], 'linux_64'))
|
||||||
|
# displayName: 'Configure binfmt_misc'
|
||||||
|
|
||||||
|
- script: |
|
||||||
|
echo "FROM openblas/alpine:arm32
|
||||||
|
COPY . /tmp/openblas
|
||||||
|
RUN mkdir /tmp/openblas/build && \
|
||||||
|
cd /tmp/openblas/build && \
|
||||||
|
CC=gcc cmake -D DYNAMIC_ARCH=OFF \
|
||||||
|
-D TARGET=ARMV6 \
|
||||||
|
-D BUILD_SHARED_LIBS=ON \
|
||||||
|
-D BUILD_WITHOUT_LAPACK=ON \
|
||||||
|
-D BUILD_WITHOUT_CBLAS=ON \
|
||||||
|
-D CMAKE_BUILD_TYPE=Release ../ && \
|
||||||
|
cmake --build ." > Dockerfile
|
||||||
|
docker build .
|
||||||
|
displayName: Run ARMV6 docker build
|
||||||
|
|
||||||
|
#- script: |
|
||||||
|
# echo Add other tasks to build, test, and deploy your project.
|
||||||
|
# echo See https://aka.ms/yaml
|
||||||
|
# displayName: 'Run a multi-line script'
|
|
@ -1,7 +1,7 @@
|
||||||
# helper functions for the kernel CMakeLists.txt
|
# helper functions for the kernel CMakeLists.txt
|
||||||
|
|
||||||
|
|
||||||
# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file.
|
# Set the default filenames for L1 objects. Most of these will be overridden by the appropriate KERNEL file.
|
||||||
macro(SetDefaultL1)
|
macro(SetDefaultL1)
|
||||||
set(SAMAXKERNEL amax.S)
|
set(SAMAXKERNEL amax.S)
|
||||||
set(DAMAXKERNEL amax.S)
|
set(DAMAXKERNEL amax.S)
|
||||||
|
|
|
@ -283,7 +283,7 @@ endif ()
|
||||||
|
|
||||||
set(KERNELDIR "${PROJECT_SOURCE_DIR}/kernel/${ARCH}")
|
set(KERNELDIR "${PROJECT_SOURCE_DIR}/kernel/${ARCH}")
|
||||||
|
|
||||||
# TODO: nead to convert these Makefiles
|
# TODO: need to convert these Makefiles
|
||||||
# include ${PROJECT_SOURCE_DIR}/cmake/${ARCH}.cmake
|
# include ${PROJECT_SOURCE_DIR}/cmake/${ARCH}.cmake
|
||||||
|
|
||||||
if (${CORE} STREQUAL "PPC440")
|
if (${CORE} STREQUAL "PPC440")
|
||||||
|
|
|
@ -89,7 +89,7 @@ function(AllCombinations list_in absent_codes_in)
|
||||||
set(CODES_OUT ${CODES_OUT} PARENT_SCOPE)
|
set(CODES_OUT ${CODES_OUT} PARENT_SCOPE)
|
||||||
endfunction ()
|
endfunction ()
|
||||||
|
|
||||||
# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition
|
# generates object files for each of the sources, using the BLAS naming scheme to pass the function name as a preprocessor definition
|
||||||
# @param sources_in the source files to build from
|
# @param sources_in the source files to build from
|
||||||
# @param defines_in (optional) preprocessor definitions that will be applied to all objects
|
# @param defines_in (optional) preprocessor definitions that will be applied to all objects
|
||||||
# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended.
|
# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended.
|
||||||
|
|
|
@ -45,7 +45,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
* SIZE must be carefully chosen to be:
|
* SIZE must be carefully chosen to be:
|
||||||
* - as small as possible to maximize the number of stack allocation
|
* - as small as possible to maximize the number of stack allocation
|
||||||
* - large enough to support all architectures and kernel
|
* - large enough to support all architectures and kernel
|
||||||
* Chosing a too small SIZE will lead to a stack smashing.
|
* Choosing a SIZE too small will lead to a stack smashing.
|
||||||
*/
|
*/
|
||||||
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
|
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
|
||||||
/* make it volatile because some function (ex: dgemv_n.S) */ \
|
/* make it volatile because some function (ex: dgemv_n.S) */ \
|
||||||
|
|
|
@ -214,7 +214,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||||
//Enable some optimazation for barcelona.
|
//Enable some optimization for barcelona.
|
||||||
#define BARCELONA_OPTIMIZATION
|
#define BARCELONA_OPTIMIZATION
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -276,7 +276,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||||
#ifdef ASSEMBLER
|
#ifdef ASSEMBLER
|
||||||
|
|
||||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||||
//Enable some optimazation for barcelona.
|
//Enable some optimization for barcelona.
|
||||||
#define BARCELONA_OPTIMIZATION
|
#define BARCELONA_OPTIMIZATION
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -577,7 +577,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -653,7 +653,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -653,7 +653,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -577,7 +577,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -109,7 +109,7 @@ extern unsigned int openblas_thread_timeout();
|
||||||
/* equal to "OMP_NUM_THREADS - 1" and thread only wakes up when */
|
/* equal to "OMP_NUM_THREADS - 1" and thread only wakes up when */
|
||||||
/* jobs is queued. */
|
/* jobs is queued. */
|
||||||
|
|
||||||
/* We need this grobal for cheking if initialization is finished. */
|
/* We need this global for checking if initialization is finished. */
|
||||||
int blas_server_avail __attribute__((aligned(ATTRIBUTE_SIZE))) = 0;
|
int blas_server_avail __attribute__((aligned(ATTRIBUTE_SIZE))) = 0;
|
||||||
|
|
||||||
/* Local Variables */
|
/* Local Variables */
|
||||||
|
@ -150,8 +150,8 @@ static unsigned int thread_timeout = (1U << (THREAD_TIMEOUT));
|
||||||
|
|
||||||
#ifdef MONITOR
|
#ifdef MONITOR
|
||||||
|
|
||||||
/* Monitor is a function to see thread's status for every seconds. */
|
/* Monitor is a function to see thread's status for every second. */
|
||||||
/* Usually it turns off and it's for debugging. */
|
/* Usually it turns off and it's for debugging. */
|
||||||
|
|
||||||
static pthread_t monitor_thread;
|
static pthread_t monitor_thread;
|
||||||
static int main_status[MAX_CPU_NUMBER];
|
static int main_status[MAX_CPU_NUMBER];
|
||||||
|
|
|
@ -50,7 +50,7 @@
|
||||||
|
|
||||||
/* This is a thread implementation for Win32 lazy implementation */
|
/* This is a thread implementation for Win32 lazy implementation */
|
||||||
|
|
||||||
/* Thread server common infomation */
|
/* Thread server common information */
|
||||||
typedef struct{
|
typedef struct{
|
||||||
CRITICAL_SECTION lock;
|
CRITICAL_SECTION lock;
|
||||||
HANDLE filled;
|
HANDLE filled;
|
||||||
|
@ -61,7 +61,7 @@ typedef struct{
|
||||||
|
|
||||||
} blas_pool_t;
|
} blas_pool_t;
|
||||||
|
|
||||||
/* We need this global for cheking if initialization is finished. */
|
/* We need this global for checking if initialization is finished. */
|
||||||
int blas_server_avail = 0;
|
int blas_server_avail = 0;
|
||||||
|
|
||||||
/* Local Variables */
|
/* Local Variables */
|
||||||
|
|
|
@ -765,7 +765,7 @@ int gotoblas_set_affinity(int pos) {
|
||||||
|
|
||||||
int mynode = 1;
|
int mynode = 1;
|
||||||
|
|
||||||
/* if number of threads is larger than inital condition */
|
/* if number of threads is larger than initial condition */
|
||||||
if (pos < 0) {
|
if (pos < 0) {
|
||||||
sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
|
sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -857,7 +857,14 @@ void gotoblas_affinity_init(void) {
|
||||||
common -> shmid = pshmid;
|
common -> shmid = pshmid;
|
||||||
|
|
||||||
if (common -> magic != SH_MAGIC) {
|
if (common -> magic != SH_MAGIC) {
|
||||||
|
|
||||||
|
#if defined(__GLIBC_PREREQ)
|
||||||
|
#if __GLIBC_PREREQ(2, 7)
|
||||||
cpu_set_t *cpusetp;
|
cpu_set_t *cpusetp;
|
||||||
|
#else
|
||||||
|
cpu_set_t cpuset;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
int nums;
|
int nums;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
@ -890,7 +897,7 @@ void gotoblas_affinity_init(void) {
|
||||||
}
|
}
|
||||||
CPU_FREE(cpusetp);
|
CPU_FREE(cpusetp);
|
||||||
#else
|
#else
|
||||||
ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp);
|
ret = sched_getaffinity(0,sizeof(cpu_set_t), &cpuset);
|
||||||
if (ret!=0) {
|
if (ret!=0) {
|
||||||
common->num_procs = nums;
|
common->num_procs = nums;
|
||||||
} else {
|
} else {
|
||||||
|
@ -898,11 +905,11 @@ void gotoblas_affinity_init(void) {
|
||||||
int i;
|
int i;
|
||||||
int n = 0;
|
int n = 0;
|
||||||
for (i=0;i<nums;i++)
|
for (i=0;i<nums;i++)
|
||||||
if (CPU_ISSET(i,cpusetp)) n++;
|
if (CPU_ISSET(i,&cpuset)) n++;
|
||||||
common->num_procs = n;
|
common->num_procs = n;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
common->num_procs = CPU_COUNT(sizeof(cpu_set_t),cpusetp);
|
common->num_procs = CPU_COUNT(&cpuset);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -229,7 +229,7 @@ int get_num_procs(void) {
|
||||||
n=0;
|
n=0;
|
||||||
#if !__GLIBC_PREREQ(2, 6)
|
#if !__GLIBC_PREREQ(2, 6)
|
||||||
for (i=0;i<nums;i++)
|
for (i=0;i<nums;i++)
|
||||||
if (CPU_ISSET(i,cpuset)) n++;
|
if (CPU_ISSET(i,&cpuset)) n++;
|
||||||
nums=n;
|
nums=n;
|
||||||
#else
|
#else
|
||||||
nums = CPU_COUNT(sizeof(cpuset),&cpuset);
|
nums = CPU_COUNT(sizeof(cpuset),&cpuset);
|
||||||
|
@ -1772,7 +1772,7 @@ int get_num_procs(void) {
|
||||||
n=0;
|
n=0;
|
||||||
#if !__GLIBC_PREREQ(2, 6)
|
#if !__GLIBC_PREREQ(2, 6)
|
||||||
for (i=0;i<nums;i++)
|
for (i=0;i<nums;i++)
|
||||||
if (CPU_ISSET(i,cpuset)) n++;
|
if (CPU_ISSET(i,&cpuset)) n++;
|
||||||
nums=n;
|
nums=n;
|
||||||
#else
|
#else
|
||||||
nums = CPU_COUNT(sizeof(cpuset),&cpuset);
|
nums = CPU_COUNT(sizeof(cpuset),&cpuset);
|
||||||
|
@ -2751,7 +2751,7 @@ void *blas_memory_alloc(int procpos){
|
||||||
|
|
||||||
#ifdef ALLOC_DEVICEDRIVER
|
#ifdef ALLOC_DEVICEDRIVER
|
||||||
if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) {
|
if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) {
|
||||||
fprintf(stderr, "OpenBLAS Warning ... Physically contigous allocation was failed.\n");
|
fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
2
f_check
2
f_check
|
@ -125,7 +125,7 @@ if ($compiler eq "") {
|
||||||
$openmp = "-openmp";
|
$openmp = "-openmp";
|
||||||
}
|
}
|
||||||
|
|
||||||
# for embeded underscore name, e.g. zho_ge, it may append 2 underscores.
|
# for embedded underscore name, e.g. zho_ge, it may append 2 underscores.
|
||||||
$data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`;
|
$data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`;
|
||||||
if ($data =~ / zho_ge__/) {
|
if ($data =~ / zho_ge__/) {
|
||||||
$need2bu = 1;
|
$need2bu = 1;
|
||||||
|
|
|
@ -24,7 +24,7 @@ set(BLAS1_MANGLED_SOURCES
|
||||||
axpby.c
|
axpby.c
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f
|
# TODO: USE_NETLIB_GEMV should switch gemv.c to netlib/*gemv.f
|
||||||
# these all have 'z' sources for complex versions
|
# these all have 'z' sources for complex versions
|
||||||
set(BLAS2_SOURCES
|
set(BLAS2_SOURCES
|
||||||
gemv.c ger.c
|
gemv.c ger.c
|
||||||
|
|
|
@ -91,7 +91,7 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
|
||||||
//disable multi-thread when incx==0 or incy==0
|
//disable multi-thread when incx==0 or incy==0
|
||||||
//In that case, the threads would be dependent.
|
//In that case, the threads would be dependent.
|
||||||
//
|
//
|
||||||
//Temporarily work-around the low performance issue with small imput size &
|
//Temporarily work-around the low performance issue with small input size &
|
||||||
//multithreads.
|
//multithreads.
|
||||||
if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
|
if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
|
||||||
nthreads = 1;
|
nthreads = 1;
|
||||||
|
|
|
@ -99,7 +99,7 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
|
||||||
//disable multi-thread when incx==0 or incy==0
|
//disable multi-thread when incx==0 or incy==0
|
||||||
//In that case, the threads would be dependent.
|
//In that case, the threads would be dependent.
|
||||||
//
|
//
|
||||||
//Temporarily work-around the low performance issue with small imput size &
|
//Temporarily work-around the low performance issue with small input size &
|
||||||
//multithreads.
|
//multithreads.
|
||||||
if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
|
if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
|
||||||
nthreads = 1;
|
nthreads = 1;
|
||||||
|
|
|
@ -3,12 +3,12 @@
|
||||||
#CGEMM_BETA = ../generic/zgemm_beta.c
|
#CGEMM_BETA = ../generic/zgemm_beta.c
|
||||||
#ZGEMM_BETA = ../generic/zgemm_beta.c
|
#ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||||
|
|
||||||
STRMMKERNEL = strmm_kernel_16x8_power8.S
|
STRMMKERNEL = sgemm_kernel_power9.S
|
||||||
DTRMMKERNEL = dgemm_kernel_power9.S
|
DTRMMKERNEL = dgemm_kernel_power9.S
|
||||||
CTRMMKERNEL = ctrmm_kernel_8x4_power8.S
|
CTRMMKERNEL = ctrmm_kernel_8x4_power8.S
|
||||||
ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S
|
ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S
|
||||||
|
|
||||||
SGEMMKERNEL = sgemm_kernel_16x8_power8.S
|
SGEMMKERNEL = sgemm_kernel_power9.S
|
||||||
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||||
SGEMMITCOPY = sgemm_tcopy_16_power8.S
|
SGEMMITCOPY = sgemm_tcopy_16_power8.S
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||||
|
|
|
@ -75,7 +75,7 @@ static inline __attribute__((always_inline)) __vector float mvec_mergeo(__vector
|
||||||
static BLASLONG ciamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *maxf) {
|
static BLASLONG ciamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *maxf) {
|
||||||
|
|
||||||
BLASLONG index;
|
BLASLONG index;
|
||||||
BLASLONG i;
|
BLASLONG i=0;
|
||||||
#if defined(USE_MASK_PERMUTATIONS)
|
#if defined(USE_MASK_PERMUTATIONS)
|
||||||
register __vector unsigned int static_index0 = {0,1,2,3};
|
register __vector unsigned int static_index0 = {0,1,2,3};
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -50,7 +50,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
static BLASLONG ciamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
|
static BLASLONG ciamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *minf) {
|
||||||
|
|
||||||
BLASLONG index;
|
BLASLONG index;
|
||||||
BLASLONG i;
|
BLASLONG i=0;
|
||||||
register __vector unsigned int static_index0 = {0,1,2,3};
|
register __vector unsigned int static_index0 = {0,1,2,3};
|
||||||
register __vector unsigned int temp0 = {4,4,4, 4}; //temporary vector register
|
register __vector unsigned int temp0 = {4,4,4, 4}; //temporary vector register
|
||||||
register __vector unsigned int temp1= temp0<<1; //{8,8,8,8}
|
register __vector unsigned int temp1= temp0<<1; //{8,8,8,8}
|
||||||
|
|
|
@ -0,0 +1,286 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2013-2019, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#define ASSEMBLER
|
||||||
|
#include "common.h"
|
||||||
|
#include "def_vsx.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define LOAD ld
|
||||||
|
#define STACKSIZE (512 )
|
||||||
|
|
||||||
|
#define M r3
|
||||||
|
#define N r4
|
||||||
|
#define K r5
|
||||||
|
|
||||||
|
|
||||||
|
#define A r7
|
||||||
|
#define B r8
|
||||||
|
#define C r9
|
||||||
|
#define LDC r10
|
||||||
|
#define OFFSET r6
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define alpha_r vs20
|
||||||
|
#define save_permute_1 vs21
|
||||||
|
#define save_permute_2 vs22
|
||||||
|
#define permute_mask vs23
|
||||||
|
#define o0 0
|
||||||
|
|
||||||
|
|
||||||
|
#define T1 r11
|
||||||
|
#define T2 r12
|
||||||
|
#define T3 r14
|
||||||
|
#define T4 r15
|
||||||
|
#define T5 r16
|
||||||
|
#define T6 r17
|
||||||
|
#define L r18
|
||||||
|
#define T7 r19
|
||||||
|
#define T8 r20
|
||||||
|
#define TEMP_REG r21
|
||||||
|
#define I r22
|
||||||
|
#define J r23
|
||||||
|
#define AO r24
|
||||||
|
#define BO r25
|
||||||
|
#define CO r26
|
||||||
|
#define T9 r27
|
||||||
|
#define T10 r28
|
||||||
|
#define T11 r29
|
||||||
|
|
||||||
|
#define T12 r30
|
||||||
|
#define T13 r31
|
||||||
|
|
||||||
|
#include "sgemm_macros_power9.S"
|
||||||
|
|
||||||
|
.equ perm_const1, 0x0405060700010203
|
||||||
|
.equ perm_const2, 0x0c0d0e0f08090a0b
|
||||||
|
.equ save_permute_11, 0x1415161718191a1b
|
||||||
|
.equ save_permute_12, 0x0405060708090a0b
|
||||||
|
.equ save_permute_21, 0x101112131c1d1e1f
|
||||||
|
.equ save_permute_22, 0x000102030c0d0e0f
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef NEEDPARAM
|
||||||
|
|
||||||
|
PROLOGUE
|
||||||
|
PROFCODE
|
||||||
|
|
||||||
|
addi SP, SP, -STACKSIZE
|
||||||
|
li r0, 0
|
||||||
|
|
||||||
|
stfd f14, 0(SP)
|
||||||
|
stfd f15, 8(SP)
|
||||||
|
stfd f16, 16(SP)
|
||||||
|
stfd f17, 24(SP)
|
||||||
|
|
||||||
|
stfd f18, 32(SP)
|
||||||
|
stfd f19, 40(SP)
|
||||||
|
stfd f20, 48(SP)
|
||||||
|
stfd f21, 56(SP)
|
||||||
|
|
||||||
|
stfd f22, 64(SP)
|
||||||
|
stfd f23, 72(SP)
|
||||||
|
stfd f24, 80(SP)
|
||||||
|
stfd f25, 88(SP)
|
||||||
|
|
||||||
|
stfd f26, 96(SP)
|
||||||
|
stfd f27, 104(SP)
|
||||||
|
stfd f28, 112(SP)
|
||||||
|
stfd f29, 120(SP)
|
||||||
|
|
||||||
|
stfd f30, 128(SP)
|
||||||
|
stfd f31, 136(SP)
|
||||||
|
|
||||||
|
|
||||||
|
std r31, 144(SP)
|
||||||
|
std r30, 152(SP)
|
||||||
|
std r29, 160(SP)
|
||||||
|
std r28, 168(SP)
|
||||||
|
std r27, 176(SP)
|
||||||
|
std r26, 184(SP)
|
||||||
|
std r25, 192(SP)
|
||||||
|
std r24, 200(SP)
|
||||||
|
std r23, 208(SP)
|
||||||
|
std r22, 216(SP)
|
||||||
|
std r21, 224(SP)
|
||||||
|
std r20, 232(SP)
|
||||||
|
std r19, 240(SP)
|
||||||
|
std r18, 248(SP)
|
||||||
|
std r17, 256(SP)
|
||||||
|
std r16, 264(SP)
|
||||||
|
std r15, 272(SP)
|
||||||
|
std r14, 280(SP)
|
||||||
|
|
||||||
|
|
||||||
|
stxv v20, 288(SP)
|
||||||
|
stxv v21, 304(SP)
|
||||||
|
stxv v22, 320(SP)
|
||||||
|
stxv v23, 336(SP)
|
||||||
|
stxv v24, 352(SP)
|
||||||
|
stxv v25, 368(SP)
|
||||||
|
stxv v26, 384(SP)
|
||||||
|
stxv v27, 400(SP)
|
||||||
|
stxv v28, 416(SP)
|
||||||
|
stxv v29, 432(SP)
|
||||||
|
stxv v30, 448(SP)
|
||||||
|
stxv v31, 464(SP)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(TRMMKERNEL)
|
||||||
|
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP)
|
||||||
|
#endif
|
||||||
|
slwi LDC, LDC, 2
|
||||||
|
|
||||||
|
|
||||||
|
/* cmpwi cr0, M, 0
|
||||||
|
ble .L999_H1
|
||||||
|
cmpwi cr0, N, 0
|
||||||
|
ble .L999_H1
|
||||||
|
cmpwi cr0, K, 0
|
||||||
|
ble .L999_H1
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/*alpha is stored in f1. convert to single and splat*/
|
||||||
|
xscvdpspn alpha_r,vs1
|
||||||
|
xxspltw alpha_r,alpha_r,0
|
||||||
|
|
||||||
|
|
||||||
|
/*load reverse permute mask for big endian
|
||||||
|
uint128 = 0xc0d0e0f08090a0b0405060700010203
|
||||||
|
*/
|
||||||
|
|
||||||
|
lis T2, perm_const2@highest
|
||||||
|
ori T2, T2, perm_const2@higher
|
||||||
|
rldicr T2, T2, 32, 31
|
||||||
|
oris T2, T2, perm_const2@h
|
||||||
|
ori T2, T2, perm_const2@l
|
||||||
|
|
||||||
|
lis T1, perm_const1@highest
|
||||||
|
ori T1, T1, perm_const1@higher
|
||||||
|
rldicr T1, T1, 32, 31
|
||||||
|
oris T1, T1, perm_const1@h
|
||||||
|
ori T1, T1, perm_const1@l
|
||||||
|
|
||||||
|
mtvsrdd permute_mask,T2,T1
|
||||||
|
|
||||||
|
lis T2, save_permute_12@highest
|
||||||
|
ori T2, T2, save_permute_12@higher
|
||||||
|
rldicr T2, T2, 32, 31
|
||||||
|
oris T2, T2, save_permute_12@h
|
||||||
|
ori T2, T2, save_permute_12@l
|
||||||
|
|
||||||
|
lis T1, save_permute_11@highest
|
||||||
|
ori T1, T1, save_permute_11@higher
|
||||||
|
rldicr T1, T1, 32, 31
|
||||||
|
oris T1, T1, save_permute_11@h
|
||||||
|
ori T1, T1, save_permute_11@l
|
||||||
|
|
||||||
|
mtvsrdd save_permute_1,T2,T1
|
||||||
|
|
||||||
|
lis T2, save_permute_22@highest
|
||||||
|
ori T2, T2, save_permute_22@higher
|
||||||
|
rldicr T2, T2, 32, 31
|
||||||
|
oris T2, T2, save_permute_22@h
|
||||||
|
ori T2, T2, save_permute_22@l
|
||||||
|
|
||||||
|
lis T1, save_permute_21@highest
|
||||||
|
ori T1, T1, save_permute_21@higher
|
||||||
|
rldicr T1, T1, 32, 31
|
||||||
|
oris T1, T1, save_permute_21@h
|
||||||
|
ori T1, T1, save_permute_21@l
|
||||||
|
|
||||||
|
mtvsrdd save_permute_2,T2,T1
|
||||||
|
|
||||||
|
#include "sgemm_logic_power9.S"
|
||||||
|
|
||||||
|
.L999:
|
||||||
|
addi r3, 0, 0
|
||||||
|
|
||||||
|
lfd f14, 0(SP)
|
||||||
|
lfd f15, 8(SP)
|
||||||
|
lfd f16, 16(SP)
|
||||||
|
lfd f17, 24(SP)
|
||||||
|
|
||||||
|
lfd f18, 32(SP)
|
||||||
|
lfd f19, 40(SP)
|
||||||
|
lfd f20, 48(SP)
|
||||||
|
lfd f21, 56(SP)
|
||||||
|
|
||||||
|
lfd f22, 64(SP)
|
||||||
|
lfd f23, 72(SP)
|
||||||
|
lfd f24, 80(SP)
|
||||||
|
lfd f25, 88(SP)
|
||||||
|
|
||||||
|
lfd f26, 96(SP)
|
||||||
|
lfd f27, 104(SP)
|
||||||
|
lfd f28, 112(SP)
|
||||||
|
lfd f29, 120(SP)
|
||||||
|
|
||||||
|
lfd f30, 128(SP)
|
||||||
|
lfd f31, 136(SP)
|
||||||
|
|
||||||
|
ld r31, 144(SP)
|
||||||
|
ld r30, 152(SP)
|
||||||
|
ld r29, 160(SP)
|
||||||
|
ld r28, 168(SP)
|
||||||
|
ld r27, 176(SP)
|
||||||
|
ld r26, 184(SP)
|
||||||
|
ld r25, 192(SP)
|
||||||
|
ld r24, 200(SP)
|
||||||
|
ld r23, 208(SP)
|
||||||
|
ld r22, 216(SP)
|
||||||
|
ld r21, 224(SP)
|
||||||
|
ld r20, 232(SP)
|
||||||
|
ld r19, 240(SP)
|
||||||
|
ld r18, 248(SP)
|
||||||
|
ld r17, 256(SP)
|
||||||
|
ld r16, 264(SP)
|
||||||
|
ld r15, 272(SP)
|
||||||
|
ld r14, 280(SP)
|
||||||
|
|
||||||
|
lxv v20, 288(SP)
|
||||||
|
lxv v21, 304(SP)
|
||||||
|
lxv v22, 320(SP)
|
||||||
|
lxv v23, 336(SP)
|
||||||
|
lxv v24, 352(SP)
|
||||||
|
lxv v25, 368(SP)
|
||||||
|
lxv v26, 384(SP)
|
||||||
|
lxv v27, 400(SP)
|
||||||
|
lxv v28, 416(SP)
|
||||||
|
lxv v29, 432(SP)
|
||||||
|
lxv v30, 448(SP)
|
||||||
|
lxv v31, 464(SP)
|
||||||
|
|
||||||
|
|
||||||
|
addi SP, SP, STACKSIZE
|
||||||
|
blr
|
||||||
|
|
||||||
|
EPILOGUE
|
||||||
|
#endif
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -9,8 +9,8 @@ SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
|
|
||||||
#DGEMMKERNEL = dgemm_kernel_4x8_skylakex.c
|
#DGEMMKERNEL = dgemm_kernel_4x8_skylakex.c
|
||||||
|
|
||||||
DGEMMINCOPY = dgemm_ncopy_8_skylakex.c
|
#DGEMMINCOPY = dgemm_ncopy_8_skylakex.c
|
||||||
DGEMMITCOPY = dgemm_tcopy_8_skylakex.c
|
#DGEMMITCOPY = dgemm_tcopy_8_skylakex.c
|
||||||
DGEMMONCOPY = dgemm_ncopy_8_skylakex.c
|
DGEMMONCOPY = dgemm_ncopy_8_skylakex.c
|
||||||
DGEMMOTCOPY = dgemm_tcopy_8_skylakex.c
|
DGEMMOTCOPY = dgemm_tcopy_8_skylakex.c
|
||||||
|
|
||||||
|
|
4
param.h
4
param.h
|
@ -2248,12 +2248,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ZGEMM_DEFAULT_UNROLL_M 8
|
#define ZGEMM_DEFAULT_UNROLL_M 8
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_P 1280
|
#define SGEMM_DEFAULT_P 640
|
||||||
#define DGEMM_DEFAULT_P 128
|
#define DGEMM_DEFAULT_P 128
|
||||||
#define CGEMM_DEFAULT_P 640
|
#define CGEMM_DEFAULT_P 640
|
||||||
#define ZGEMM_DEFAULT_P 320
|
#define ZGEMM_DEFAULT_P 320
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_Q 640
|
#define SGEMM_DEFAULT_Q 1408
|
||||||
#define DGEMM_DEFAULT_Q 384
|
#define DGEMM_DEFAULT_Q 384
|
||||||
#define CGEMM_DEFAULT_Q 640
|
#define CGEMM_DEFAULT_Q 640
|
||||||
#define ZGEMM_DEFAULT_Q 640
|
#define ZGEMM_DEFAULT_Q 640
|
||||||
|
|
|
@ -36,8 +36,8 @@
|
||||||
// allow malloc in xsygst for improved performance
|
// allow malloc in xsygst for improved performance
|
||||||
#define XSYGST_ALLOW_MALLOC ALLOW_MALLOC
|
#define XSYGST_ALLOW_MALLOC ALLOW_MALLOC
|
||||||
// allow malloc in xsytrf if the passed work buffer is too small
|
// allow malloc in xsytrf if the passed work buffer is too small
|
||||||
#define XSYTRF_ALLOW_MALLOC ALLOW_MALLOC
|
//#define XSYTRF_ALLOW_MALLOC ALLOW_MALLOC
|
||||||
|
#define XSYTRF_ALLOW_MALLOC 0
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// LAPACK routine replacement //
|
// LAPACK routine replacement //
|
||||||
|
|
|
@ -221,7 +221,9 @@ static void RELAPACK_cgbtrf_rec(
|
||||||
}
|
}
|
||||||
|
|
||||||
// recursion(Ab_BR, ipiv_B)
|
// recursion(Ab_BR, ipiv_B)
|
||||||
RELAPACK_cgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
//RELAPACK_cgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
||||||
|
LAPACK(cgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info);
|
||||||
|
|
||||||
if (*info)
|
if (*info)
|
||||||
*info += n1;
|
*info += n1;
|
||||||
// shift pivots
|
// shift pivots
|
||||||
|
|
|
@ -22,7 +22,7 @@ void RELAPACK_cgetrf(
|
||||||
*info = -1;
|
*info = -1;
|
||||||
else if (*n < 0)
|
else if (*n < 0)
|
||||||
*info = -2;
|
*info = -2;
|
||||||
else if (*ldA < MAX(1, *n))
|
else if (*ldA < MAX(1, *m))
|
||||||
*info = -4;
|
*info = -4;
|
||||||
if (*info) {
|
if (*info) {
|
||||||
const blasint minfo = -*info;
|
const blasint minfo = -*info;
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#include "relapack.h"
|
#include "relapack.h"
|
||||||
#include "stdlib.h"
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
static void RELAPACK_dgbtrf_rec(const blasint *, const blasint *, const blasint *,
|
static void RELAPACK_dgbtrf_rec(const blasint *, const blasint *, const blasint *,
|
||||||
const blasint *, double *, const blasint *, blasint *, double *, const blasint *, double *,
|
const blasint *, double *, const blasint *, blasint *, double *, const blasint *, double *,
|
||||||
const blasint *, blasint *);
|
const blasint *, blasint *);
|
||||||
|
@ -218,7 +219,8 @@ static void RELAPACK_dgbtrf_rec(
|
||||||
}
|
}
|
||||||
|
|
||||||
// recursion(Ab_BR, ipiv_B)
|
// recursion(Ab_BR, ipiv_B)
|
||||||
RELAPACK_dgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
// RELAPACK_dgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
||||||
|
LAPACK(dgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info);
|
||||||
if (*info)
|
if (*info)
|
||||||
*info += n1;
|
*info += n1;
|
||||||
// shift pivots
|
// shift pivots
|
||||||
|
|
|
@ -15,16 +15,15 @@ void RELAPACK_dgetrf(
|
||||||
double *A, const blasint *ldA, blasint *ipiv,
|
double *A, const blasint *ldA, blasint *ipiv,
|
||||||
blasint *info
|
blasint *info
|
||||||
) {
|
) {
|
||||||
|
|
||||||
// Check arguments
|
// Check arguments
|
||||||
*info = 0;
|
*info = 0;
|
||||||
if (*m < 0)
|
if (*m < 0)
|
||||||
*info = -1;
|
*info = -1;
|
||||||
else if (*n < 0)
|
else if (*n < 0)
|
||||||
*info = -2;
|
*info = -2;
|
||||||
else if (*ldA < MAX(1, *n))
|
else if (*ldA < MAX(1, *m))
|
||||||
*info = -4;
|
*info = -4;
|
||||||
if (*info) {
|
if (*info!=0) {
|
||||||
const blasint minfo = -*info;
|
const blasint minfo = -*info;
|
||||||
LAPACK(xerbla)("DGETRF", &minfo, strlen("DGETRF"));
|
LAPACK(xerbla)("DGETRF", &minfo, strlen("DGETRF"));
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -27,7 +27,7 @@ void RELAPACK_sgbtrf(
|
||||||
*info = -3;
|
*info = -3;
|
||||||
else if (*ku < 0)
|
else if (*ku < 0)
|
||||||
*info = -4;
|
*info = -4;
|
||||||
else if (*ldAb < 2 * *kl + *ku + 1)
|
else if (*ldAb < 2 * *kl + *ku + 1)
|
||||||
*info = -6;
|
*info = -6;
|
||||||
if (*info) {
|
if (*info) {
|
||||||
const blasint minfo = -*info;
|
const blasint minfo = -*info;
|
||||||
|
@ -55,15 +55,16 @@ void RELAPACK_sgbtrf(
|
||||||
|
|
||||||
// Allocate work space
|
// Allocate work space
|
||||||
const blasint n1 = SREC_SPLIT(*n);
|
const blasint n1 = SREC_SPLIT(*n);
|
||||||
const blasint mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv;
|
const blasint mWorkl = abs( (kv > n1) ? MAX(1, *m - *kl) : kv );
|
||||||
const blasint nWorkl = (kv > n1) ? n1 : kv;
|
const blasint nWorkl = abs( (kv > n1) ? n1 : kv );
|
||||||
const blasint mWorku = (*kl > n1) ? n1 : *kl;
|
const blasint mWorku = abs( (*kl > n1) ? n1 : *kl );
|
||||||
const blasint nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl;
|
const blasint nWorku = abs( (*kl > n1) ? MAX(0, *n - *kl) : *kl );
|
||||||
float *Workl = malloc(mWorkl * nWorkl * sizeof(float));
|
float *Workl = malloc(mWorkl * nWorkl * sizeof(float));
|
||||||
float *Worku = malloc(mWorku * nWorku * sizeof(float));
|
float *Worku = malloc(mWorku * nWorku * sizeof(float));
|
||||||
LAPACK(slaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl);
|
LAPACK(slaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl);
|
||||||
LAPACK(slaset)("U", &mWorku, &nWorku, ZERO, ZERO, Worku, &mWorku);
|
LAPACK(slaset)("U", &mWorku, &nWorku, ZERO, ZERO, Worku, &mWorku);
|
||||||
|
|
||||||
|
|
||||||
// Recursive kernel
|
// Recursive kernel
|
||||||
RELAPACK_sgbtrf_rec(m, n, kl, ku, Ab, ldAb, ipiv, Workl, &mWorkl, Worku, &mWorku, info);
|
RELAPACK_sgbtrf_rec(m, n, kl, ku, Ab, ldAb, ipiv, Workl, &mWorkl, Worku, &mWorku, info);
|
||||||
|
|
||||||
|
@ -81,6 +82,7 @@ static void RELAPACK_sgbtrf_rec(
|
||||||
blasint *info
|
blasint *info
|
||||||
) {
|
) {
|
||||||
|
|
||||||
|
|
||||||
if (*n <= MAX(CROSSOVER_SGBTRF, 1)) {
|
if (*n <= MAX(CROSSOVER_SGBTRF, 1)) {
|
||||||
// Unblocked
|
// Unblocked
|
||||||
LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
||||||
|
@ -127,7 +129,7 @@ static void RELAPACK_sgbtrf_rec(
|
||||||
float *const A_BR = A + *ldA * n1 + m1;
|
float *const A_BR = A + *ldA * n1 + m1;
|
||||||
|
|
||||||
// ipiv_T
|
// ipiv_T
|
||||||
// ipiv_B
|
// ipiv_B
|
||||||
blasint *const ipiv_T = ipiv;
|
blasint *const ipiv_T = ipiv;
|
||||||
blasint *const ipiv_B = ipiv + n1;
|
blasint *const ipiv_B = ipiv + n1;
|
||||||
|
|
||||||
|
@ -155,6 +157,7 @@ static void RELAPACK_sgbtrf_rec(
|
||||||
float *const A_BRbl = A_BR + m21;
|
float *const A_BRbl = A_BR + m21;
|
||||||
float *const A_BRbr = A_BR + *ldA * n21 + m21;
|
float *const A_BRbr = A_BR + *ldA * n21 + m21;
|
||||||
|
|
||||||
|
|
||||||
// recursion(Ab_L, ipiv_T)
|
// recursion(Ab_L, ipiv_T)
|
||||||
RELAPACK_sgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info);
|
RELAPACK_sgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info);
|
||||||
|
|
||||||
|
@ -216,8 +219,11 @@ static void RELAPACK_sgbtrf_rec(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// recursion(Ab_BR, ipiv_B)
|
// recursion(Ab_BR, ipiv_B)
|
||||||
RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
//cause of infinite recursion here ?
|
||||||
|
// RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
||||||
|
LAPACK(sgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info);
|
||||||
if (*info)
|
if (*info)
|
||||||
*info += n1;
|
*info += n1;
|
||||||
// shift pivots
|
// shift pivots
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
#include "relapack.h"
|
#include "relapack.h"
|
||||||
|
|
||||||
static void RELAPACK_sgetrf_rec(const blasint *, const blasint *, float *, const blasint *,
|
static void RELAPACK_sgetrf_rec(const blasint *, const blasint *, float *, const blasint *,
|
||||||
blasint *, blasint *);
|
blasint *, blasint *);
|
||||||
|
|
||||||
|
@ -22,16 +21,14 @@ void RELAPACK_sgetrf(
|
||||||
*info = -1;
|
*info = -1;
|
||||||
else if (*n < 0)
|
else if (*n < 0)
|
||||||
*info = -2;
|
*info = -2;
|
||||||
else if (*ldA < MAX(1, *n))
|
else if (*ldA < MAX(1, *m))
|
||||||
*info = -4;
|
*info = -4;
|
||||||
if (*info) {
|
if (*info) {
|
||||||
const blasint minfo = -*info;
|
const blasint minfo = -*info;
|
||||||
LAPACK(xerbla)("SGETRF", &minfo, strlen("SGETRF"));
|
LAPACK(xerbla)("SGETRF", &minfo, strlen("SGETRF"));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const blasint sn = MIN(*m, *n);
|
const blasint sn = MIN(*m, *n);
|
||||||
|
|
||||||
RELAPACK_sgetrf_rec(m, &sn, A, ldA, ipiv, info);
|
RELAPACK_sgetrf_rec(m, &sn, A, ldA, ipiv, info);
|
||||||
|
|
||||||
// Right remainder
|
// Right remainder
|
||||||
|
@ -61,7 +58,6 @@ static void RELAPACK_sgetrf_rec(
|
||||||
float *A, const blasint *ldA, blasint *ipiv,
|
float *A, const blasint *ldA, blasint *ipiv,
|
||||||
blasint *info
|
blasint *info
|
||||||
) {
|
) {
|
||||||
|
|
||||||
if (*n <= MAX(CROSSOVER_SGETRF, 1)) {
|
if (*n <= MAX(CROSSOVER_SGETRF, 1)) {
|
||||||
// Unblocked
|
// Unblocked
|
||||||
LAPACK(sgetf2)(m, n, A, ldA, ipiv, info);
|
LAPACK(sgetf2)(m, n, A, ldA, ipiv, info);
|
||||||
|
@ -77,7 +73,6 @@ static void RELAPACK_sgetrf_rec(
|
||||||
const blasint n1 = SREC_SPLIT(*n);
|
const blasint n1 = SREC_SPLIT(*n);
|
||||||
const blasint n2 = *n - n1;
|
const blasint n2 = *n - n1;
|
||||||
const blasint m2 = *m - n1;
|
const blasint m2 = *m - n1;
|
||||||
|
|
||||||
// A_L A_R
|
// A_L A_R
|
||||||
float *const A_L = A;
|
float *const A_L = A;
|
||||||
float *const A_R = A + *ldA * n1;
|
float *const A_R = A + *ldA * n1;
|
||||||
|
|
|
@ -56,10 +56,10 @@ void RELAPACK_zgbtrf(
|
||||||
|
|
||||||
// Allocate work space
|
// Allocate work space
|
||||||
const blasint n1 = ZREC_SPLIT(*n);
|
const blasint n1 = ZREC_SPLIT(*n);
|
||||||
const blasint mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv;
|
const blasint mWorkl = abs ( (kv > n1) ? MAX(1, *m - *kl) : kv);
|
||||||
const blasint nWorkl = (kv > n1) ? n1 : kv;
|
const blasint nWorkl = abs ( (kv > n1) ? n1 : kv);
|
||||||
const blasint mWorku = (*kl > n1) ? n1 : *kl;
|
const blasint mWorku = abs ( (*kl > n1) ? n1 : *kl);
|
||||||
const blasint nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl;
|
const blasint nWorku = abs ( (*kl > n1) ? MAX(0, *n - *kl) : *kl);
|
||||||
double *Workl = malloc(mWorkl * nWorkl * 2 * sizeof(double));
|
double *Workl = malloc(mWorkl * nWorkl * 2 * sizeof(double));
|
||||||
double *Worku = malloc(mWorku * nWorku * 2 * sizeof(double));
|
double *Worku = malloc(mWorku * nWorku * 2 * sizeof(double));
|
||||||
LAPACK(zlaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl);
|
LAPACK(zlaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl);
|
||||||
|
@ -221,7 +221,9 @@ static void RELAPACK_zgbtrf_rec(
|
||||||
}
|
}
|
||||||
|
|
||||||
// recursion(Ab_BR, ipiv_B)
|
// recursion(Ab_BR, ipiv_B)
|
||||||
RELAPACK_zgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
// RELAPACK_zgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
||||||
|
LAPACK(zgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info);
|
||||||
|
|
||||||
if (*info)
|
if (*info)
|
||||||
*info += n1;
|
*info += n1;
|
||||||
// shift pivots
|
// shift pivots
|
||||||
|
|
|
@ -22,7 +22,7 @@ void RELAPACK_zgetrf(
|
||||||
*info = -1;
|
*info = -1;
|
||||||
else if (*n < 0)
|
else if (*n < 0)
|
||||||
*info = -2;
|
*info = -2;
|
||||||
else if (*ldA < MAX(1, *n))
|
else if (*ldA < MAX(1, *m))
|
||||||
*info = -4;
|
*info = -4;
|
||||||
if (*info) {
|
if (*info) {
|
||||||
const blasint minfo = -*info;
|
const blasint minfo = -*info;
|
||||||
|
|
|
@ -576,7 +576,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -991,7 +991,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -946,7 +946,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -576,7 +576,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in New Issue