Merge pull request #4316 from OpenMathLib/develop
Merge develop into release-0.3.0 for 0.3.25
This commit is contained in:
commit
5e1a429eab
10
.cirrus.yml
10
.cirrus.yml
|
|
@ -148,6 +148,16 @@ FreeBSD_task:
|
|||
- ls -l /usr/local/lib
|
||||
- gmake CC=gcc INTERFACE64=1
|
||||
|
||||
FreeBSD_task:
|
||||
name: FreeBSD-clang-openmp
|
||||
freebsd_instance:
|
||||
image_family: freebsd-13-2
|
||||
install_script:
|
||||
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
|
||||
- ln -s /usr/local/lib/gcc12/libgfortran.so.5.0.0 /usr/lib/libgfortran.so
|
||||
compile_script:
|
||||
- gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1
|
||||
|
||||
#task:
|
||||
# name: Windows/LLVM16 --- too slow ---
|
||||
# windows_container:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,16 @@
|
|||
# Self-Hosted Github Action Runners on AWS via Cirun.io
|
||||
# Reference: https://docs.cirun.io/reference/yaml
|
||||
runners:
|
||||
- name: "aws-runner-graviton"
|
||||
# Cloud Provider: AWS
|
||||
cloud: "aws"
|
||||
region: "us-east-1"
|
||||
# Cheapest VM on AWS
|
||||
instance_type: "c7g.large"
|
||||
# Ubuntu-22.04, ami image
|
||||
machine_image: "ami-0a0c8eebcdd6dcbd0"
|
||||
preemptible: false
|
||||
# Add this label in the "runs-on" param in .github/workflows/<workflow-name>.yml
|
||||
# So that this runner is created for running the workflow
|
||||
labels:
|
||||
- "cirun-aws-runner-graviton"
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
name: arm64 graviton cirun
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- develop
|
||||
- release-**
|
||||
pull_request:
|
||||
branches:
|
||||
- develop
|
||||
- release-**
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}"
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
fortran: [gfortran]
|
||||
build: [cmake, make]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Print system information
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
cat /proc/cpuinfo
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
sudo apt update
|
||||
sudo apt-get install -y gfortran cmake ccache libtinfo5
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
# We include the commit sha in the cache key, as new cache entries are
|
||||
# only created if there is no existing entry for the key yet.
|
||||
# GNU make and cmake call the compilers differently. It looks like
|
||||
# that causes the cache to mismatch. Keep the ccache for both build
|
||||
# tools separate to avoid polluting each other.
|
||||
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
|
||||
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
if [ "${{ matrix.build }}" = "make" ]; then
|
||||
# Add ccache to path
|
||||
if [ "$RUNNER_OS" = "Linux" ]; then
|
||||
echo "/usr/lib/ccache" >> $GITHUB_PATH
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: |
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
|
||||
;;
|
||||
"cmake")
|
||||
mkdir build && cd build
|
||||
cmake -DDYNAMIC_ARCH=1 \
|
||||
-DNOFORTRAN=0 \
|
||||
-DBUILD_WITHOUT_LAPACK=0 \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
|
||||
..
|
||||
cmake --build .
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Show ccache status
|
||||
continue-on-error: true
|
||||
run: ccache -s
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
|
||||
echo "::group::Tests in 'test' directory"
|
||||
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
echo "::group::Tests in 'ctest' directory"
|
||||
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
echo "::group::Tests in 'utest' directory"
|
||||
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
;;
|
||||
"cmake")
|
||||
cd build && ctest
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
|
@ -2,11 +2,16 @@ name: c910v qemu test
|
|||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1663142514282
|
||||
|
|
|
|||
|
|
@ -2,11 +2,16 @@ name: continuous build
|
|||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
strategy:
|
||||
|
|
@ -146,18 +151,19 @@ jobs:
|
|||
|
||||
|
||||
msys2:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: windows-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
msystem: [MINGW64, MINGW32, CLANG64, CLANG32]
|
||||
msystem: [UCRT64, MINGW32, CLANG64, CLANG32]
|
||||
idx: [int32, int64]
|
||||
build-type: [Release]
|
||||
include:
|
||||
- msystem: MINGW64
|
||||
- msystem: UCRT64
|
||||
idx: int32
|
||||
target-prefix: mingw-w64-x86_64
|
||||
target-prefix: mingw-w64-ucrt-x86_64
|
||||
fc-pkg: fc
|
||||
- msystem: MINGW32
|
||||
idx: int32
|
||||
|
|
@ -175,10 +181,10 @@ jobs:
|
|||
target-prefix: mingw-w64-clang-i686
|
||||
fc-pkg: cc
|
||||
c-lapack-flags: -DC_LAPACK=ON
|
||||
- msystem: MINGW64
|
||||
- msystem: UCRT64
|
||||
idx: int64
|
||||
idx64-flags: -DBINARY=64 -DINTERFACE64=1
|
||||
target-prefix: mingw-w64-x86_64
|
||||
target-prefix: mingw-w64-ucrt-x86_64
|
||||
fc-pkg: fc
|
||||
- msystem: CLANG64
|
||||
idx: int64
|
||||
|
|
@ -188,9 +194,9 @@ jobs:
|
|||
# Compiling with Flang 16 seems to cause test errors on machines
|
||||
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
|
||||
no-avx512-flags: -DNO_AVX512=1
|
||||
- msystem: MINGW64
|
||||
- msystem: UCRT64
|
||||
idx: int32
|
||||
target-prefix: mingw-w64-x86_64
|
||||
target-prefix: mingw-w64-ucrt-x86_64
|
||||
fc-pkg: fc
|
||||
build-type: None
|
||||
exclude:
|
||||
|
|
@ -312,6 +318,7 @@ jobs:
|
|||
|
||||
|
||||
cross_build:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
|
|
|
|||
|
|
@ -2,8 +2,13 @@ name: loongarch64 qemu test
|
|||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
|
|
@ -18,6 +23,9 @@ jobs:
|
|||
- target: LOONGSON2K1000
|
||||
triple: loongarch64-unknown-linux-gnu
|
||||
opts: NO_SHARED=1 TARGET=LOONGSON2K1000
|
||||
- target: DYNAMIC_ARCH
|
||||
triple: loongarch64-unknown-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
|
|
|
|||
|
|
@ -2,11 +2,16 @@ name: mips64 qemu test
|
|||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
|
|
|
|||
|
|
@ -18,11 +18,16 @@ on:
|
|||
|
||||
name: Nightly-Homebrew-Build
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
build-OpenBLAS-with-Homebrew:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: macos-latest
|
||||
env:
|
||||
DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer
|
||||
|
|
|
|||
|
|
@ -1,4 +1,50 @@
|
|||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.3.25
|
||||
12-Nov-2023
|
||||
|
||||
general:
|
||||
- improved the error message shown on exceeding the maximum thread count
|
||||
- improved the code to add supplementary thread buffers in case of overflow
|
||||
- fixed a potential division by zero in ?ROTG
|
||||
- improved the ?MATCOPY functions to accept zero-sized rows or columns
|
||||
- corrected empty prototypes in function declarations
|
||||
- cleaned up unused declarations in the f2c-converted versions of the LAPACK sources
|
||||
- fixed compilation with the Cray CCE Compiler suite
|
||||
- improved link line rewriting to avoid mixed libgomp/libomp builds with clang&gfortran
|
||||
- worked around OPENMP builds with LLVM14's libomp hanging on FreeBSD
|
||||
- improved the Makefiles to require less option duplication on "make install"
|
||||
- imported the following changes from the upcoming release 3.12 of Reference-LAPACK
|
||||
- deprecate utility functions ?GELQS and ?GEQRS (LAPACK PR 900)
|
||||
- apply rounding up to workspace calculations done in floating point (LAPACK PR 904)
|
||||
- avoid overflow in STGEX2/DTGEX2 (LAPACK PR 907)
|
||||
- fix accumulation in ?LASSQ (LAPACK PR 909)
|
||||
- fix handling of NaN values in ?GECON (LAPACK PR 926)
|
||||
- avoid overflow in CBDSQR/ZBDSQR (LAPACK PR 927)
|
||||
- fix poor vector orthogonalizations in ?ORBDB5/?UNBDB5 (LAPACK PR 928 & 930)
|
||||
|
||||
x86-64:
|
||||
- fixed compile-time autodetection of AMD Ryzen3 and Ryzen4 cpus
|
||||
- fixed capability-based fallback selection for unknown cpus in DYNAMIC_ARCH
|
||||
- added AVX512 optimizations for ?ASUM on Sapphire Rapids and Cooper Lake
|
||||
|
||||
ARM64:
|
||||
- fixed building on Apple with homebrew gcc
|
||||
- fixed building with XCODE 15
|
||||
- fixed building on A64FX and Cortex A710/X1/X2
|
||||
- increased the default buffer size for recent ARM server cpus
|
||||
|
||||
POWER:
|
||||
- fixed building with the IBM xlf 16.1.1 compiler
|
||||
- fixed building with IBM XL C
|
||||
- added support for DYNAMIC_ARCH builds with clang
|
||||
- fixed union declaration in the BFLOAT16 test case
|
||||
- enable optimizations for the AIX assembler on POWER10
|
||||
|
||||
LOONGARCH64:
|
||||
- added an optimized SGEMV kernel
|
||||
- added an optimized DTRSM kernel
|
||||
|
||||
====================================================================
|
||||
Version 0.3.24
|
||||
03-Sep-2023
|
||||
|
|
|
|||
20
Makefile
20
Makefile
|
|
@ -35,7 +35,11 @@ export NO_LAPACK
|
|||
export C_LAPACK
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER),CRAY)
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -Og -Os,$(LAPACK_FFLAGS))
|
||||
else
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS))
|
||||
endif
|
||||
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test
|
||||
|
||||
|
|
@ -206,9 +210,25 @@ ifeq ($(DYNAMIC_OLDER), 1)
|
|||
@echo DYNAMIC_OLDER=1 >> Makefile.conf_last
|
||||
endif
|
||||
endif
|
||||
@echo TARGET=$(CORE) >> Makefile.conf_last
|
||||
ifdef USE_THREAD
|
||||
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
|
||||
endif
|
||||
ifdef SMP
|
||||
ifdef NUM_THREADS
|
||||
@echo NUM_THREADS=$(NUM_THREADS) >> Makefile.conf_last
|
||||
else
|
||||
@echo NUM_THREADS=$(NUM_CORES) >> Makefile.conf_last
|
||||
endif
|
||||
endif
|
||||
ifeq ($(USE_OPENMP),1)
|
||||
@echo USE_OPENMP=1 >> Makefile.conf_last
|
||||
endif
|
||||
ifeq ($(INTERFACE64),1)
|
||||
@echo INTERFACE64=1 >> Makefile.conf_last
|
||||
endif
|
||||
@echo THELIBNAME=$(LIBNAME) >> Makefile.conf_last
|
||||
@echo THELIBSONAME=$(LIBSONAME) >> Makefile.conf_last
|
||||
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
@touch lib.grd
|
||||
|
||||
|
|
|
|||
5089
Makefile.L3
5089
Makefile.L3
File diff suppressed because it is too large
Load Diff
|
|
@ -3,6 +3,14 @@ export GOTOBLAS_MAKEFILE = 1
|
|||
-include $(TOPDIR)/Makefile.conf_last
|
||||
include ./Makefile.system
|
||||
|
||||
ifdef THELIBNAME
|
||||
LIBNAME=$(THELIBNAME)
|
||||
LIBSONAME=$(THELIBSONAME)
|
||||
endif
|
||||
ifeq ($(INTERFACE64),1)
|
||||
USE_64BITINT=1
|
||||
endif
|
||||
|
||||
PREFIX ?= /opt/OpenBLAS
|
||||
|
||||
OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
|
||||
|
|
|
|||
|
|
@ -13,9 +13,9 @@ ifeq ($(CORE), POWER10)
|
|||
ifneq ($(C_COMPILER), PGI)
|
||||
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize
|
||||
else
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
|
@ -38,19 +38,18 @@ CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
|
|||
endif
|
||||
ifneq ($(F_COMPILER), PGI)
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr9 -qtune=pwr9 -qfloat=nomaf -qzerosize
|
||||
else
|
||||
FCOMMON_OPT += -O2 -frecursive -fno-fast-math
|
||||
FCOMMON_OPT += -O2 -frecursive -fno-fast-math -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifneq ($(GCCVERSIONGT4), 1)
|
||||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
|
||||
FCOMMON_OPT += -mcpu=power8 -mtune=power8
|
||||
else
|
||||
FCOMMON_OPT += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -O2 -Mrecursive
|
||||
|
|
@ -66,12 +65,16 @@ endif
|
|||
ifneq ($(F_COMPILER), PGI)
|
||||
ifeq ($(OSNAME), AIX)
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
|
||||
else
|
||||
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
|
||||
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
|
||||
else
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
|
||||
endif
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -O2 -Mrecursive
|
||||
|
|
@ -84,13 +87,20 @@ CCOMMON_OPT += -DUSE_OPENMP -fopenmp
|
|||
else
|
||||
CCOMMON_OPT += -DUSE_OPENMP -mp
|
||||
endif
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -DUSE_OPENMP
|
||||
else
|
||||
ifneq ($(F_COMPILER), PGI)
|
||||
FCOMMON_OPT += -DUSE_OPENMP -fopenmp
|
||||
else
|
||||
FCOMMON_OPT += -DUSE_OPENMP -mp
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
CCOMMON_OPT += -fno-integrated-as
|
||||
endif
|
||||
# workaround for C->FORTRAN ABI violation in LAPACKE
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
FCOMMON_OPT += -fno-optimize-sibling-calls
|
||||
|
|
@ -125,8 +135,19 @@ endif
|
|||
ifdef BINARY64
|
||||
|
||||
|
||||
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), GCCIBMAIX)
|
||||
$(error Using GCC and XLF on AIX is not a supported combination.)
|
||||
endif
|
||||
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), CLANGGFORTRANAIX)
|
||||
$(error Using Clang and gFortran on AIX is not a supported combination.)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -mpowerpc64 -maix64
|
||||
else
|
||||
CCOMMON_OPT += -m64
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), g77)
|
||||
FCOMMON_OPT += -mpowerpc64 -maix64
|
||||
endif
|
||||
|
|
|
|||
|
|
@ -277,10 +277,6 @@ endif
|
|||
ifndef GOTOBLAS_MAKEFILE
|
||||
export GOTOBLAS_MAKEFILE = 1
|
||||
|
||||
# Determine if the assembler is GNU Assembler
|
||||
HAVE_GAS := $(shell $(AS) -v < /dev/null 2>&1 | grep GNU 2>&1 >/dev/null ; echo $$?)
|
||||
GETARCH_FLAGS += -DHAVE_GAS=$(HAVE_GAS)
|
||||
|
||||
# Generating Makefile.conf and config.h
|
||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" HOST_CFLAGS="$(GETARCH_FLAGS)" CFLAGS="$(CFLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) DYNAMIC_ARCH=$(DYNAMIC_ARCH) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
|
||||
|
||||
|
|
@ -397,11 +393,21 @@ ifeq ($(OSNAME), Darwin)
|
|||
ifndef MACOSX_DEPLOYMENT_TARGET
|
||||
ifeq ($(ARCH), arm64)
|
||||
export MACOSX_DEPLOYMENT_TARGET=11.0
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
export NO_SVE = 1
|
||||
endif
|
||||
else
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.8
|
||||
endif
|
||||
endif
|
||||
MD5SUM = md5 -r
|
||||
XCVER = $(shell pkgutil --pkg-info=com.apple.pkg.Xcode |awk '/version:/ {print $2}'|cut -d: -f2|cut -f1 -d.)
|
||||
ifeq (x$(XCVER)x,xx)
|
||||
XCVER = $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables |awk '/version:/ {print $2}'|cut -d: -f2|cut -f1 -d.)
|
||||
endif
|
||||
ifeq (x$(XCVER), x 15)
|
||||
CCOMMON_OPT += -Wl,-ld_classic
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq (,$(findstring $(OSNAME), FreeBSD OpenBSD DragonFly))
|
||||
|
|
@ -602,6 +608,9 @@ endif
|
|||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
CCOMMON_OPT += -fopenmp
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
FEXTRALIB := $(subst -lgomp,-lomp,$(FEXTRALIB))
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), INTEL)
|
||||
|
|
@ -750,7 +759,11 @@ DYNAMIC_CORE += POWER9
|
|||
else
|
||||
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
|
||||
endif
|
||||
ifeq ($(OSNAME), AIX)
|
||||
LDVERSIONGTEQ35 := 1
|
||||
else
|
||||
LDVERSIONGTEQ35 := $(shell expr `$(CC) -Wl,--version 2> /dev/null | head -1 | cut -f2 -d "." | cut -f1 -d "-"` \>= 35)
|
||||
endif
|
||||
ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11)
|
||||
DYNAMIC_CORE += POWER10
|
||||
CCOMMON_OPT += -DHAVE_P10_SUPPORT
|
||||
|
|
@ -1164,6 +1177,10 @@ endif
|
|||
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
CCOMMON_OPT += -DF_INTERFACE_IBM
|
||||
FEXTRALIB += -lxlf90
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG))
|
||||
FCOMMON_OPT += -qextname
|
||||
endif
|
||||
# FCOMMON_OPT += -qarch=440
|
||||
ifdef BINARY64
|
||||
FCOMMON_OPT += -q64
|
||||
|
|
@ -1360,6 +1377,8 @@ ifeq ($(F_COMPILER), SUN)
|
|||
FCOMMON_OPT += -pic
|
||||
else ifeq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -PIC
|
||||
else ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -qpic=large
|
||||
else
|
||||
FCOMMON_OPT += -fPIC
|
||||
endif
|
||||
|
|
@ -1612,9 +1631,11 @@ override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
|
|||
|
||||
ifeq ($(NEED_PIC), 1)
|
||||
ifeq (,$(findstring PIC,$(FFLAGS)))
|
||||
ifneq ($(F_COMPILER),IBM)
|
||||
override FFLAGS += -fPIC
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
#For LAPACK Fortran codes.
|
||||
#Disable -fopenmp for LAPACK Fortran codes on Windows.
|
||||
|
|
@ -1628,11 +1649,11 @@ endif
|
|||
|
||||
ifeq ($(F_COMPILER),NAG)
|
||||
LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
|
||||
FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
|
||||
override FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
|
||||
endif
|
||||
ifeq ($(F_COMPILER),CRAY)
|
||||
LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
|
||||
FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
|
||||
override FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
|
||||
endif
|
||||
|
||||
LAPACK_CFLAGS = $(CFLAGS)
|
||||
|
|
|
|||
24
README.md
24
README.md
|
|
@ -54,10 +54,15 @@ Building OpenBLAS requires the following to be installed:
|
|||
|
||||
Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically.
|
||||
To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`.
|
||||
The full target list is in the file `TargetList.txt`. For building with `cmake`, the
|
||||
usual conventions apply, i.e. create a build directory either underneath the toplevel
|
||||
OpenBLAS source directory or separate from it, and invoke `cmake` there with the path
|
||||
to the source tree and any build options you plan to set.
|
||||
The full target list is in the file `TargetList.txt`, other build optionss are documented in Makefile.rule and
|
||||
can either be set there (typically by removing the comment character from the respective line), or used on the
|
||||
`make` command line.
|
||||
Note that when you run `make install` after building, you need to repeat all command line options you provided to `make`
|
||||
in the build step, as some settings like the supported maximum number of threads are automatically derived from the
|
||||
build host by default, which might not be what you want.
|
||||
For building with `cmake`, the usual conventions apply, i.e. create a build directory either underneath the toplevel
|
||||
OpenBLAS source directory or separate from it, and invoke `cmake` there with the path to the source tree and any
|
||||
build options you plan to set.
|
||||
|
||||
### Cross compile
|
||||
|
||||
|
|
@ -117,7 +122,7 @@ Use `PREFIX=` when invoking `make`, for example
|
|||
```sh
|
||||
make install PREFIX=your_installation_directory
|
||||
```
|
||||
|
||||
(along with all options you added on the `make` command line in the preceding build step)
|
||||
The default installation directory is `/opt/OpenBLAS`.
|
||||
|
||||
## Supported CPUs and Operating Systems
|
||||
|
|
@ -137,7 +142,7 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
|
|||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
|
||||
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD ZEN**: Uses Haswell codes with some optimizations.
|
||||
- **AMD ZEN**: Uses Haswell codes with some optimizations for Zen 2/3 (use SkylakeX for Zen4)
|
||||
|
||||
#### MIPS32
|
||||
|
||||
|
|
@ -169,13 +174,16 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
|
|||
- **TSV110**: Optimized some Level-3 helper functions
|
||||
- **EMAG 8180**: preliminary support based on A57
|
||||
- **Neoverse N1**: (AWS Graviton2) preliminary support
|
||||
- **Apple Vortex**: preliminary support based on ARMV8
|
||||
- **Neoverse V1**: (AWS Graviton3) optimized Level-3 BLAS
|
||||
- **Apple Vortex**: preliminary support based on ThunderX2/3
|
||||
- **A64FX**: preliminary support, optimized Level-3 BLAS
|
||||
- **ARMV8SVE**: any ARMV8 cpu with SVE extensions
|
||||
|
||||
#### PPC/PPC64
|
||||
|
||||
- **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1`
|
||||
- **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only.
|
||||
- **POWER10**:
|
||||
- **POWER10**: Optimized Level-3 BLAS including SBGEMM and some Level-1,2.
|
||||
|
||||
#### IBM zEnterprise System
|
||||
|
||||
|
|
|
|||
|
|
@ -167,11 +167,10 @@ jobs:
|
|||
|
||||
- job: OSX_OpenMP_Clang
|
||||
pool:
|
||||
vmImage: 'macOS-11'
|
||||
vmImage: 'macOS-latest'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
MACOSX_DEPLOYMENT_TARGET: 11.0
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
|
|
@ -180,7 +179,7 @@ jobs:
|
|||
|
||||
- job: OSX_OpenMP_Clang_cmake
|
||||
pool:
|
||||
vmImage: 'macOS-11'
|
||||
vmImage: 'macOS-latest'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
|
|
@ -210,7 +209,7 @@ jobs:
|
|||
|
||||
- job: OSX_Ifort_Clang
|
||||
pool:
|
||||
vmImage: 'macOS-11'
|
||||
vmImage: 'macOS-latest'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/17643/m_HPCKit_p_2021.2.0.2903_offline.dmg
|
||||
|
|
|
|||
18
c_check
18
c_check
|
|
@ -96,11 +96,19 @@ esac
|
|||
defined=0
|
||||
|
||||
if [ "$os" = "AIX" ]; then
|
||||
case "$BINARY" in
|
||||
32) compiler_name="$compiler_name -maix32" ;;
|
||||
64) compiler_name="$compiler_name -maix64" ;;
|
||||
esac
|
||||
defined=1
|
||||
if [ "$compiler" = "GCC" ]; then
|
||||
case "$BINARY" in
|
||||
32) compiler_name="$compiler_name -maix32" ;;
|
||||
64) compiler_name="$compiler_name -maix64" ;;
|
||||
esac
|
||||
defined=1
|
||||
else
|
||||
case "$BINARY" in
|
||||
32) compiler_name="$compiler_name -m32" ;;
|
||||
64) compiler_name="$compiler_name -m64" ;;
|
||||
esac
|
||||
defined=1
|
||||
fi
|
||||
fi
|
||||
|
||||
case "$architecture" in
|
||||
|
|
|
|||
|
|
@ -438,15 +438,19 @@ endif()
|
|||
|
||||
if(BUILD_LAPACK_DEPRECATED)
|
||||
list(APPEND SLASRC DEPRECATED/sgegs.f DEPRECATED/sgegv.f
|
||||
DEPRECATED/sgelqs.f DEPRECATED/sgeqrs.f
|
||||
DEPRECATED/sgeqpf.f DEPRECATED/sgelsx.f DEPRECATED/sggsvd.f
|
||||
DEPRECATED/sggsvp.f DEPRECATED/slahrd.f DEPRECATED/slatzm.f DEPRECATED/stzrqf.f)
|
||||
list(APPEND DLASRC DEPRECATED/dgegs.f DEPRECATED/dgegv.f
|
||||
DEPRECATED/dgelqs.f DEPRECATED/dgeqrs.f
|
||||
DEPRECATED/dgeqpf.f DEPRECATED/dgelsx.f DEPRECATED/dggsvd.f
|
||||
DEPRECATED/dggsvp.f DEPRECATED/dlahrd.f DEPRECATED/dlatzm.f DEPRECATED/dtzrqf.f)
|
||||
list(APPEND CLASRC DEPRECATED/cgegs.f DEPRECATED/cgegv.f
|
||||
DEPRECATED/cgelqs.f DEPRECATED/cgeqrs.f
|
||||
DEPRECATED/cgeqpf.f DEPRECATED/cgelsx.f DEPRECATED/cggsvd.f
|
||||
DEPRECATED/cggsvp.f DEPRECATED/clahrd.f DEPRECATED/clatzm.f DEPRECATED/ctzrqf.f)
|
||||
list(APPEND ZLASRC DEPRECATED/zgegs.f DEPRECATED/zgegv.f
|
||||
DEPRECATED/zgelqs.f DEPRECATED/zgeqrs.f
|
||||
DEPRECATED/zgeqpf.f DEPRECATED/zgelsx.f DEPRECATED/zggsvd.f
|
||||
DEPRECATED/zggsvp.f DEPRECATED/zlahrd.f DEPRECATED/zlatzm.f DEPRECATED/ztzrqf.f)
|
||||
message(STATUS "Building deprecated routines")
|
||||
|
|
@ -935,15 +939,19 @@ endif()
|
|||
|
||||
if(BUILD_LAPACK_DEPRECATED)
|
||||
list(APPEND SLASRC DEPRECATED/sgegs.c DEPRECATED/sgegv.c
|
||||
DEPRECATED/sgelqs.c DEPRECATED/sgeqrs.c
|
||||
DEPRECATED/sgeqpf.c DEPRECATED/sgelsx.c DEPRECATED/sggsvd.c
|
||||
DEPRECATED/sggsvp.c DEPRECATED/slahrd.c DEPRECATED/slatzm.c DEPRECATED/stzrqf.c)
|
||||
list(APPEND DLASRC DEPRECATED/dgegs.c DEPRECATED/dgegv.c
|
||||
DEPRECATED/dgelqs.c DEPRECATED/dgeqrs.c
|
||||
DEPRECATED/dgeqpf.c DEPRECATED/dgelsx.c DEPRECATED/dggsvd.c
|
||||
DEPRECATED/dggsvp.c DEPRECATED/dlahrd.c DEPRECATED/dlatzm.c DEPRECATED/dtzrqf.c)
|
||||
list(APPEND CLASRC DEPRECATED/cgegs.c DEPRECATED/cgegv.c
|
||||
DEPRECATED/cgelqs.c DEPRECATED/cgeqrs.c
|
||||
DEPRECATED/cgeqpf.c DEPRECATED/cgelsx.c DEPRECATED/cggsvd.c
|
||||
DEPRECATED/cggsvp.c DEPRECATED/clahrd.c DEPRECATED/clatzm.c DEPRECATED/ctzrqf.c)
|
||||
list(APPEND ZLASRC DEPRECATED/zgegs.c DEPRECATED/zgegv.c
|
||||
DEPRECATED/zgelqs.c DEPRECATED/zgeqrs.c
|
||||
DEPRECATED/zgeqpf.c DEPRECATED/zgelsx.c DEPRECATED/zggsvd.c
|
||||
DEPRECATED/zggsvp.c DEPRECATED/zlahrd.c DEPRECATED/zlatzm.c DEPRECATED/ztzrqf.c)
|
||||
message(STATUS "Building deprecated routines")
|
||||
|
|
|
|||
|
|
@ -162,7 +162,11 @@ REALNAME:
|
|||
#define HUGE_PAGESIZE ( 4 << 20)
|
||||
|
||||
#ifndef BUFFERSIZE
|
||||
#if defined(NEOVERSEN1) || defined(NEOVERSEN2) || defined(NEOVERSEV1) || defined(A64FX) || defined(ARMV8SVE)
|
||||
#define BUFFER_SIZE (32 << 22)
|
||||
#else
|
||||
#define BUFFER_SIZE (32 << 20)
|
||||
#endif
|
||||
#else
|
||||
#define BUFFER_SIZE (32 << BUFFERSIZE)
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -192,27 +192,27 @@ int exec_blas(BLASLONG num_cpu, blas_param_t *param, void *buffer);
|
|||
int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
|
||||
void *a, BLASLONG lda,
|
||||
void *b, BLASLONG ldb,
|
||||
void *c, BLASLONG ldc, int (*function)(), int threads);
|
||||
void *c, BLASLONG ldc, int (*function)(void), int threads);
|
||||
|
||||
int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
|
||||
int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *, void *, BLASLONG);
|
||||
|
||||
int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
|
||||
int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT*, FLOAT*, BLASLONG), void *, void *, BLASLONG);
|
||||
|
||||
int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
|
||||
int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *, void *, BLASLONG);
|
||||
|
||||
int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG, BLASLONG);
|
||||
int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *, void *, BLASLONG, BLASLONG);
|
||||
|
||||
int trsm_thread(int mode, BLASLONG m, BLASLONG n,
|
||||
double alpha_r, double alpha_i,
|
||||
void *a, BLASLONG lda,
|
||||
void *c, BLASLONG ldc, int (*function)(), void *buffer);
|
||||
void *c, BLASLONG ldc, int (*function)(void), void *buffer);
|
||||
|
||||
int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
|
||||
int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*, FLOAT *, FLOAT *, BLASLONG), void*, void*, BLASLONG);
|
||||
|
||||
int getrf_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k,
|
||||
void *offsetA, BLASLONG lda,
|
||||
void *offsetB, BLASLONG jb,
|
||||
void *ipiv, BLASLONG offset, int (*function)(), void *buffer);
|
||||
void *ipiv, BLASLONG offset, int (*function)(void), void *buffer);
|
||||
|
||||
#endif /* ENDIF ASSEMBLER */
|
||||
|
||||
|
|
|
|||
|
|
@ -270,6 +270,7 @@ int detect(void)
|
|||
sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0);
|
||||
if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1
|
||||
if (value64 == 3660830781) return CPU_VORTEX; //A15/M2
|
||||
if (value64 == 2271604202) return CPU_VORTEX; //A16/M3
|
||||
#endif
|
||||
return CPU_ARMV8;
|
||||
#endif
|
||||
|
|
|
|||
24
cpuid_x86.c
24
cpuid_x86.c
|
|
@ -194,7 +194,7 @@ static C_INLINE void xgetbv(int op, int * eax, int * edx){
|
|||
}
|
||||
#endif
|
||||
|
||||
int support_avx(){
|
||||
int support_avx(void){
|
||||
#ifndef NO_AVX
|
||||
int eax, ebx, ecx, edx;
|
||||
int ret=0;
|
||||
|
|
@ -212,7 +212,7 @@ int support_avx(){
|
|||
#endif
|
||||
}
|
||||
|
||||
int support_avx2(){
|
||||
int support_avx2(void){
|
||||
#ifndef NO_AVX2
|
||||
int eax, ebx, ecx=0, edx;
|
||||
int ret=0;
|
||||
|
|
@ -228,7 +228,7 @@ int support_avx2(){
|
|||
#endif
|
||||
}
|
||||
|
||||
int support_avx512(){
|
||||
int support_avx512(void){
|
||||
#if !defined(NO_AVX) && !defined(NO_AVX512)
|
||||
int eax, ebx, ecx, edx;
|
||||
int ret=0;
|
||||
|
|
@ -250,7 +250,7 @@ int support_avx512(){
|
|||
#endif
|
||||
}
|
||||
|
||||
int support_avx512_bf16(){
|
||||
int support_avx512_bf16(void){
|
||||
#if !defined(NO_AVX) && !defined(NO_AVX512)
|
||||
int eax, ebx, ecx, edx;
|
||||
int ret=0;
|
||||
|
|
@ -271,7 +271,7 @@ int support_avx512_bf16(){
|
|||
#define BIT_AMX_BF16 0x00400000
|
||||
#define BIT_AMX_ENBD 0x00060000
|
||||
|
||||
int support_amx_bf16() {
|
||||
int support_amx_bf16(void) {
|
||||
#if !defined(NO_AVX) && !defined(NO_AVX512)
|
||||
int eax, ebx, ecx, edx;
|
||||
int ret=0;
|
||||
|
|
@ -1660,7 +1660,13 @@ int get_cpuname(void){
|
|||
else
|
||||
return CPUTYPE_BARCELONA;
|
||||
}
|
||||
case 10: // Zen3
|
||||
case 10: // Zen3/4
|
||||
#ifndef NO_AVX512
|
||||
if(support_avx512_bf16())
|
||||
return CPUTYPE_COOPERLAKE;
|
||||
if(support_avx512())
|
||||
return CPUTYPE_SKYLAKEX;
|
||||
#endif
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_ZEN;
|
||||
|
|
@ -2438,6 +2444,12 @@ int get_coretype(void){
|
|||
// Ryzen 2
|
||||
default:
|
||||
// Matisse,Renoir Ryzen2 models
|
||||
#ifndef NO_AVX512
|
||||
if(support_avx512_bf16())
|
||||
return CORE_COOPERLAKE;
|
||||
if(support_avx512())
|
||||
return CORE_SKYLAKEX;
|
||||
#endif
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_ZEN;
|
||||
|
|
|
|||
|
|
@ -214,6 +214,11 @@ endif
|
|||
ifeq ($(F_COMPILER), NAG)
|
||||
CEXTRALIB = -lgomp
|
||||
endif
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CEXTRALIB += -lgomp
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_SINGLE),1)
|
||||
|
|
|
|||
|
|
@ -242,251 +242,6 @@ typedef struct Namelist Namelist;
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
#endif
|
||||
#if 0
|
||||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i]) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i]) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Common Block Declarations */
|
||||
|
||||
|
|
@ -503,16 +258,16 @@ static integer c__1 = 1;
|
|||
static integer c__5 = 5;
|
||||
static real c_b43 = (float)1.;
|
||||
|
||||
/* Main program */ int main()
|
||||
/* Main program */ int main(void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
static real sfac = (float)9.765625e-4;
|
||||
|
||||
/* Local variables */
|
||||
extern /* Subroutine */ int check1_(), check2_();
|
||||
extern /* Subroutine */ int check1_(real*), check2_(real*);
|
||||
static integer ic;
|
||||
extern /* Subroutine */ int header_();
|
||||
extern /* Subroutine */ int header_(void);
|
||||
|
||||
/* Test program for the COMPLEX Level 1 CBLAS. */
|
||||
/* Based upon the original CBLAS test routine together with: */
|
||||
|
|
@ -553,7 +308,7 @@ static real c_b43 = (float)1.;
|
|||
|
||||
} /* MAIN__ */
|
||||
|
||||
/* Subroutine */ int header_()
|
||||
/* Subroutine */ int header_(void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -564,7 +319,7 @@ static real c_b43 = (float)1.;
|
|||
/* Format strings */
|
||||
|
||||
/* Builtin functions */
|
||||
integer s_wsfe(), do_fio(), e_wsfe();
|
||||
integer s_wsfe(void), do_fio(void), e_wsfe(void);
|
||||
|
||||
/* .. Parameters .. */
|
||||
/* .. Scalars in Common .. */
|
||||
|
|
@ -577,8 +332,7 @@ static real c_b43 = (float)1.;
|
|||
|
||||
} /* header_ */
|
||||
|
||||
/* Subroutine */ int check1_(sfac)
|
||||
real *sfac;
|
||||
/* Subroutine */ int check1_(real* sfac)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -683,15 +437,15 @@ real *sfac;
|
|||
|
||||
/* Local variables */
|
||||
static integer i__;
|
||||
extern /* Subroutine */ int ctest_();
|
||||
extern /* Subroutine */ int ctest_(integer*, complex*, complex*, complex*, real*);
|
||||
static complex mwpcs[5], mwpct[5];
|
||||
extern /* Subroutine */ int itest1_(), stest1_();
|
||||
extern /* Subroutine */ int itest1_(integer*, integer*), stest1_(real*,real*,real*,real*);
|
||||
static complex cx[8];
|
||||
extern real scnrm2test_();
|
||||
extern real scnrm2test_(integer*, complex*, integer*);
|
||||
static integer np1;
|
||||
extern integer icamaxtest_();
|
||||
extern /* Subroutine */ int csscaltest_();
|
||||
extern real scasumtest_();
|
||||
extern integer icamaxtest_(integer*, complex*, integer*);
|
||||
extern /* Subroutine */ int csscaltest_(integer*, real*, complex*, integer*);
|
||||
extern real scasumtest_(integer*, complex*, integer*);
|
||||
static integer len;
|
||||
|
||||
/* .. Parameters .. */
|
||||
|
|
@ -808,8 +562,7 @@ real *sfac;
|
|||
return 0;
|
||||
} /* check1_ */
|
||||
|
||||
/* Subroutine */ int check2_(sfac)
|
||||
real *sfac;
|
||||
/* Subroutine */ int check2_(real* sfac)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -981,10 +734,10 @@ real *sfac;
|
|||
static complex cdot[1];
|
||||
static integer lenx, leny, i__;
|
||||
static complex ctemp;
|
||||
extern /* Subroutine */ int ctest_();
|
||||
extern /* Subroutine */ int ctest_(integer*, complex*, complex*, complex*, real*);
|
||||
static integer ksize;
|
||||
extern /* Subroutine */ int cdotctest_(), ccopytest_(), cdotutest_(),
|
||||
cswaptest_(), caxpytest_();
|
||||
extern /* Subroutine */ int cdotctest_(integer*, complex*, integer*, complex*, integer*,complex*), ccopytest_(integer*, complex*, integer*, complex*, integer*), cdotutest_(integer*, complex*, integer*, complex*, integer*, complex*),
|
||||
cswaptest_(integer*, complex*, integer*, complex*, integer*), caxpytest_(integer*, complex*, complex*, integer*, complex*, integer*);
|
||||
static integer ki, kn;
|
||||
static complex cx[7], cy[7];
|
||||
static integer mx, my;
|
||||
|
|
@ -1067,9 +820,7 @@ real *sfac;
|
|||
return 0;
|
||||
} /* check2_ */
|
||||
|
||||
/* Subroutine */ int stest_(len, scomp, strue, ssize, sfac)
|
||||
integer *len;
|
||||
real *scomp, *strue, *ssize, *sfac;
|
||||
/* Subroutine */ int stest_(integer* len, real* scomp, real* strue, real* ssize,real* sfac)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer i__1;
|
||||
|
|
@ -1077,7 +828,7 @@ real *scomp, *strue, *ssize, *sfac;
|
|||
|
||||
/* Local variables */
|
||||
static integer i__;
|
||||
extern doublereal sdiff_();
|
||||
extern doublereal sdiff_(real*, real*);
|
||||
static real sd;
|
||||
|
||||
/* ********************************* STEST ************************** */
|
||||
|
|
@ -1133,11 +884,10 @@ L40:
|
|||
|
||||
} /* stest_ */
|
||||
|
||||
/* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac)
|
||||
real *scomp1, *strue1, *ssize, *sfac;
|
||||
/* Subroutine */ int stest1_(real* scomp1, real* strue1, real* ssize, real* sfac)
|
||||
{
|
||||
static real scomp[1], strue[1];
|
||||
extern /* Subroutine */ int stest_();
|
||||
extern /* Subroutine */ int stest_(integer*, real*, real*, real*, real*);
|
||||
|
||||
/* ************************* STEST1 ***************************** */
|
||||
|
||||
|
|
@ -1164,8 +914,7 @@ real *scomp1, *strue1, *ssize, *sfac;
|
|||
return 0;
|
||||
} /* stest1_ */
|
||||
|
||||
doublereal sdiff_(sa, sb)
|
||||
real *sa, *sb;
|
||||
doublereal sdiff_(real* sa, real* sb)
|
||||
{
|
||||
/* System generated locals */
|
||||
real ret_val;
|
||||
|
|
@ -1179,10 +928,7 @@ real *sa, *sb;
|
|||
return ret_val;
|
||||
} /* sdiff_ */
|
||||
|
||||
/* Subroutine */ int ctest_(len, ccomp, ctrue, csize, sfac)
|
||||
integer *len;
|
||||
complex *ccomp, *ctrue, *csize;
|
||||
real *sfac;
|
||||
/* Subroutine */ int ctest_(integer* len, complex* ccomp, complex* ctrue, complex* csize, real* sfac)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer i__1, i__2;
|
||||
|
|
@ -1193,7 +939,7 @@ real *sfac;
|
|||
/* Local variables */
|
||||
static integer i__;
|
||||
static real scomp[20], ssize[20], strue[20];
|
||||
extern /* Subroutine */ int stest_();
|
||||
extern /* Subroutine */ int stest_(integer*, real*,real*,real*,real*);
|
||||
|
||||
/* **************************** CTEST ***************************** */
|
||||
|
||||
|
|
@ -1231,8 +977,7 @@ real *sfac;
|
|||
return 0;
|
||||
} /* ctest_ */
|
||||
|
||||
/* Subroutine */ int itest1_(icomp, itrue)
|
||||
integer *icomp, *itrue;
|
||||
/* Subroutine */ int itest1_(integer* icomp, integer* itrue)
|
||||
{
|
||||
/* Local variables */
|
||||
static integer id;
|
||||
|
|
|
|||
|
|
@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
#if 0
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
#endif
|
||||
/* -- translated by f2c (version 20000121).
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* Common Block Declarations */
|
||||
|
|
@ -396,7 +273,7 @@ static integer c_n1 = -1;
|
|||
static integer c__0 = 0;
|
||||
static logical c_false = FALSE_;
|
||||
|
||||
/* Main program */ int main()
|
||||
/* Main program */ int main(void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -414,17 +291,21 @@ static logical c_false = FALSE_;
|
|||
static logical same;
|
||||
static integer ninc, nbet, ntra;
|
||||
static logical rewi;
|
||||
extern /* Subroutine */ int cchk1_(), cchk2_(), cchk3_(), cchk4_(),
|
||||
cchk5_(), cchk6_();
|
||||
extern /* Subroutine */ int cchk1_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int cchk2_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int cchk3_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int cchk4_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int cchk5_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int cchk6_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen);
|
||||
static complex a[4225] /* was [65][65] */;
|
||||
static real g[65];
|
||||
static integer i__, j, n;
|
||||
static logical fatal;
|
||||
static complex x[65], y[65], z__[130];
|
||||
extern doublereal sdiff_();
|
||||
extern doublereal sdiff_(real*, real*);
|
||||
static logical trace;
|
||||
static integer nidim;
|
||||
extern /* Subroutine */ int cmvch_();
|
||||
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
|
||||
static char snaps[32], trans[1];
|
||||
static integer isnum;
|
||||
static logical ltest[17];
|
||||
|
|
@ -438,11 +319,11 @@ static logical c_false = FALSE_;
|
|||
static char snamet[12];
|
||||
static real thresh;
|
||||
static logical rorder;
|
||||
extern /* Subroutine */ int cc2chke_();
|
||||
extern /* Subroutine */ void cc2chke_(char*, ftnlen);
|
||||
static integer layout;
|
||||
static logical ltestt, tsterr;
|
||||
static complex alf[7];
|
||||
extern logical lce_();
|
||||
extern logical lce_(complex*, complex*, integer*);
|
||||
static integer inc[7], nkb;
|
||||
static complex bet[7];
|
||||
static real eps, err;
|
||||
|
|
@ -983,22 +864,7 @@ L240:
|
|||
|
||||
} /* MAIN__ */
|
||||
|
||||
/* Subroutine */ int cchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
|
||||
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nkb, *kb, *nalf;
|
||||
complex *alf;
|
||||
integer *nbet;
|
||||
complex *bet;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
|
||||
real *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int cchk1_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, complex* alf, integer* nbet, complex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1015,10 +881,10 @@ ftnlen sname_len;
|
|||
static integer incx, incy;
|
||||
static logical full, tran, null;
|
||||
static integer i__, m, n;
|
||||
extern /* Subroutine */ int cmake_();
|
||||
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
|
||||
static complex alpha;
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int cmvch_();
|
||||
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static integer incxs, incys;
|
||||
|
|
@ -1026,14 +892,15 @@ ftnlen sname_len;
|
|||
static integer ia, ib, ic;
|
||||
static logical banded;
|
||||
static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns;
|
||||
extern /* Subroutine */ int ccgbmv_(), ccgemv_();
|
||||
extern logical lceres_();
|
||||
extern /* Subroutine */ int ccgbmv_(integer*, char*, integer*, integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen);
|
||||
extern /* Subroutine */ void ccgemv_(integer*, char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen);
|
||||
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
|
||||
static char ctrans[14];
|
||||
static real errmax;
|
||||
static complex transl;
|
||||
static char transs[1];
|
||||
static integer laa, lda;
|
||||
extern logical lce_();
|
||||
extern logical lce_(complex*, complex*, integer*);
|
||||
static complex als, bls;
|
||||
static real err;
|
||||
static integer iku, kls, kus;
|
||||
|
|
@ -1448,22 +1315,7 @@ L140:
|
|||
|
||||
} /* cchk1_ */
|
||||
|
||||
/* Subroutine */ int cchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
|
||||
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nkb, *kb, *nalf;
|
||||
complex *alf;
|
||||
integer *nbet;
|
||||
complex *bet;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
|
||||
real *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int cchk2_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, complex* alf, integer* nbet, complex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1481,10 +1333,10 @@ ftnlen sname_len;
|
|||
static logical full, null;
|
||||
static char uplo[1];
|
||||
static integer i__, k, n;
|
||||
extern /* Subroutine */ int cmake_();
|
||||
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
|
||||
static complex alpha;
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int cmvch_();
|
||||
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
|
|
@ -1495,13 +1347,14 @@ ftnlen sname_len;
|
|||
static integer nc, ik, in;
|
||||
static logical packed;
|
||||
static integer nk, ks, ix, iy, ns, lx, ly;
|
||||
extern /* Subroutine */ int cchbmv_(), cchemv_();
|
||||
extern logical lceres_();
|
||||
extern /* Subroutine */ int cchpmv_();
|
||||
extern /* Subroutine */ void cchbmv_(integer*, char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen);
|
||||
extern /* Subroutine */ void cchemv_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen);
|
||||
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cchpmv_(integer*, char*, integer*, complex*, complex*, complex*, integer*, complex*, complex*, integer*, ftnlen);
|
||||
static real errmax;
|
||||
static complex transl;
|
||||
static integer laa, lda;
|
||||
extern logical lce_();
|
||||
extern logical lce_(complex*, complex*, integer*);
|
||||
static complex als, bls;
|
||||
static real err;
|
||||
|
||||
|
|
@ -1906,19 +1759,7 @@ L130:
|
|||
|
||||
} /* cchk2_ */
|
||||
|
||||
/* Subroutine */ int cchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, xt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax;
|
||||
complex *a, *aa, *as, *x, *xx, *xs, *xt;
|
||||
real *g;
|
||||
complex *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int cchk3_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* xt, real* g, complex* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1937,10 +1778,10 @@ ftnlen sname_len;
|
|||
static logical full, null;
|
||||
static char uplo[1], cdiag[14];
|
||||
static integer i__, k, n;
|
||||
extern /* Subroutine */ int cmake_();
|
||||
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
|
||||
static char diags[1];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int cmvch_();
|
||||
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
|
|
@ -1950,17 +1791,19 @@ ftnlen sname_len;
|
|||
static integer nc, ik, in;
|
||||
static logical packed;
|
||||
static integer nk, ks, ix, ns, lx;
|
||||
extern logical lceres_();
|
||||
extern /* Subroutine */ int cctbmv_(), cctbsv_();
|
||||
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cctbmv_(integer*, char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cctbsv_(integer*, char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static char ctrans[14];
|
||||
extern /* Subroutine */ int cctpmv_();
|
||||
extern /* Subroutine */ void cctpmv_(integer*, char*, char*, char*, integer*, complex*, complex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static real errmax;
|
||||
extern /* Subroutine */ int cctrmv_(), cctpsv_();
|
||||
extern /* Subroutine */ void cctrmv_(integer*, char*, char*, char*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cctpsv_(integer*, char*, char*, char*, integer*, complex*, complex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static complex transl;
|
||||
extern /* Subroutine */ int cctrsv_();
|
||||
extern /* Subroutine */ void cctrsv_(integer*, char*, char*, char*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static char transs[1];
|
||||
static integer laa, icd, lda;
|
||||
extern logical lce_();
|
||||
extern logical lce_(complex*, complex*, integer*);
|
||||
static integer ict, icu;
|
||||
static real err;
|
||||
|
||||
|
|
@ -2418,21 +2261,7 @@ L130:
|
|||
|
||||
} /* cchk3_ */
|
||||
|
||||
/* Subroutine */ int cchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
complex *alf;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
|
||||
real *g;
|
||||
complex *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int cchk4_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
|
||||
|
|
@ -2444,21 +2273,21 @@ ftnlen sname_len;
|
|||
static integer incx, incy;
|
||||
static logical null;
|
||||
static integer i__, j, m, n;
|
||||
extern /* Subroutine */ int cmake_();
|
||||
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
|
||||
static complex alpha, w[1];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int cmvch_();
|
||||
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static integer incxs, incys, ia, nc, nd, im, in;
|
||||
extern /* Subroutine */ int ccgerc_();
|
||||
extern /* Subroutine */ void ccgerc_(integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*);
|
||||
static integer ms, ix, iy, ns, lx, ly;
|
||||
extern /* Subroutine */ int ccgeru_();
|
||||
extern logical lceres_();
|
||||
extern /* Subroutine */ void ccgeru_(integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*);
|
||||
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
|
||||
static real errmax;
|
||||
static complex transl;
|
||||
static integer laa, lda;
|
||||
extern logical lce_();
|
||||
extern logical lce_(complex*, complex*, integer*);
|
||||
static complex als;
|
||||
static real err;
|
||||
|
||||
|
|
@ -2786,21 +2615,7 @@ L150:
|
|||
|
||||
} /* cchk4_ */
|
||||
|
||||
/* Subroutine */ int cchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
complex *alf;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
|
||||
real *g;
|
||||
complex *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int cchk5_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -2818,10 +2633,12 @@ ftnlen sname_len;
|
|||
static logical full, null;
|
||||
static char uplo[1];
|
||||
static integer i__, j, n;
|
||||
extern /* Subroutine */ int cmake_(), ccher_();
|
||||
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void ccher_(integer*, char*, integer*, real*, complex*, integer*, complex*, integer*, ftnlen);
|
||||
static complex alpha, w[1];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int cchpr_(), cmvch_();
|
||||
extern /* Subroutine */ void cchpr_(integer*, char*, integer*, real*, complex*, integer*, complex*, ftnlen);
|
||||
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
|
|
@ -2832,11 +2649,11 @@ ftnlen sname_len;
|
|||
static logical packed;
|
||||
static integer ix, ns, lx;
|
||||
static real ralpha;
|
||||
extern logical lceres_();
|
||||
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
|
||||
static real errmax;
|
||||
static complex transl;
|
||||
static integer laa, lda;
|
||||
extern logical lce_();
|
||||
extern logical lce_(complex*, complex*, integer*);
|
||||
static real err;
|
||||
|
||||
/* Tests CHER and CHPR. */
|
||||
|
|
@ -3160,21 +2977,7 @@ L130:
|
|||
|
||||
} /* cchk5_ */
|
||||
|
||||
/* Subroutine */ int cchk6_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
complex *alf;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
|
||||
real *g;
|
||||
complex *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int cchk6_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -3192,25 +2995,26 @@ ftnlen sname_len;
|
|||
static logical full, null;
|
||||
static char uplo[1];
|
||||
static integer i__, j, n;
|
||||
extern /* Subroutine */ int cmake_();
|
||||
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
|
||||
static complex alpha, w[2];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int cmvch_();
|
||||
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
static integer incxs, incys;
|
||||
static logical upper;
|
||||
static char uplos[1];
|
||||
extern /* Subroutine */ int ccher2_(), cchpr2_();
|
||||
extern /* Subroutine */ void ccher2_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*, ftnlen);
|
||||
extern /* Subroutine */ void cchpr2_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, ftnlen);
|
||||
static integer ia, ja, ic, nc, jj, lj, in;
|
||||
static logical packed;
|
||||
static integer ix, iy, ns, lx, ly;
|
||||
extern logical lceres_();
|
||||
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
|
||||
static real errmax;
|
||||
static complex transl;
|
||||
static integer laa, lda;
|
||||
extern logical lce_();
|
||||
extern logical lce_(complex*, complex*, integer*);
|
||||
static complex als;
|
||||
static real err;
|
||||
|
||||
|
|
@ -3597,24 +3401,7 @@ L170:
|
|||
|
||||
} /* cchk6_ */
|
||||
|
||||
/* Subroutine */ int cmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y,
|
||||
incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len)
|
||||
char *trans;
|
||||
integer *m, *n;
|
||||
complex *alpha, *a;
|
||||
integer *nmax;
|
||||
complex *x;
|
||||
integer *incx;
|
||||
complex *beta, *y;
|
||||
integer *incy;
|
||||
complex *yt;
|
||||
real *g;
|
||||
complex *yy;
|
||||
real *eps, *err;
|
||||
logical *fatal;
|
||||
integer *nout;
|
||||
logical *mv;
|
||||
ftnlen trans_len;
|
||||
/* Subroutine */ int cmvch_(char* trans, integer* m, integer* n, complex* alpha, complex* a, integer* nmax, complex* x, integer* incx, complex* beta, complex* y, integer* incy, complex* yt, real* g, complex* yy, real* eps, real* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len)
|
||||
{
|
||||
|
||||
/* System generated locals */
|
||||
|
|
@ -3812,9 +3599,7 @@ L80:
|
|||
|
||||
} /* cmvch_ */
|
||||
|
||||
logical lce_(ri, rj, lr)
|
||||
complex *ri, *rj;
|
||||
integer *lr;
|
||||
logical lce_(complex* ri, complex* rj, integer* lr)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer i__1, i__2, i__3;
|
||||
|
|
@ -3861,13 +3646,7 @@ L30:
|
|||
|
||||
} /* lce_ */
|
||||
|
||||
logical lceres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
|
||||
char *type__, *uplo;
|
||||
integer *m, *n;
|
||||
complex *aa, *as;
|
||||
integer *lda;
|
||||
ftnlen type_len;
|
||||
ftnlen uplo_len;
|
||||
logical lceres_(char* type__, char* uplo, integer* m, integer* n, complex* aa, complex* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4;
|
||||
|
|
@ -3960,9 +3739,7 @@ L80:
|
|||
|
||||
} /* lceres_ */
|
||||
|
||||
/* Complex */ VOID cbeg_( ret_val, reset)
|
||||
complex * ret_val;
|
||||
logical *reset;
|
||||
/* Complex */ VOID cbeg_(complex* ret_val, logical* reset)
|
||||
{
|
||||
/* System generated locals */
|
||||
real r__1, r__2;
|
||||
|
|
@ -4023,8 +3800,7 @@ L10:
|
|||
|
||||
} /* cbeg_ */
|
||||
|
||||
doublereal sdiff_(x, y)
|
||||
real *x, *y;
|
||||
doublereal sdiff_(real* x, real* y)
|
||||
{
|
||||
/* System generated locals */
|
||||
real ret_val;
|
||||
|
|
@ -4044,19 +3820,7 @@ real *x, *y;
|
|||
|
||||
} /* sdiff_ */
|
||||
|
||||
/* Subroutine */ int cmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl,
|
||||
ku, reset, transl, type_len, uplo_len, diag_len)
|
||||
char *type__, *uplo, *diag;
|
||||
integer *m, *n;
|
||||
complex *a;
|
||||
integer *nmax;
|
||||
complex *aa;
|
||||
integer *lda, *kl, *ku;
|
||||
logical *reset;
|
||||
complex *transl;
|
||||
ftnlen type_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen diag_len;
|
||||
/* Subroutine */ int cmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, complex* a, integer* nmax, complex* aa, integer* lda, integer* kl, integer* ku, logical* reset, complex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
|
||||
|
|
@ -4064,7 +3828,7 @@ ftnlen diag_len;
|
|||
complex q__1, q__2;
|
||||
|
||||
/* Local variables */
|
||||
extern /* Complex */ VOID cbeg_();
|
||||
extern /* Complex */ VOID cbeg_(complex*, logical*);
|
||||
static integer ibeg, iend, ioff;
|
||||
static logical unit;
|
||||
static integer i__, j;
|
||||
|
|
|
|||
|
|
@ -242,130 +242,6 @@ typedef struct Namelist Namelist;
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
#if 0
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
#endif
|
||||
/* -- translated by f2c (version 20000121).
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
/* Common Block Declarations */
|
||||
|
|
|
|||
|
|
@ -21,19 +21,6 @@ typedef float real;
|
|||
typedef double doublereal;
|
||||
typedef struct { real r, i; } complex;
|
||||
typedef struct { doublereal r, i; } doublecomplex;
|
||||
#ifdef _MSC_VER
|
||||
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
|
||||
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
|
||||
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
|
||||
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
|
||||
#else
|
||||
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
|
||||
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
|
||||
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
|
||||
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
|
||||
#endif
|
||||
#define pCf(z) (*_pCf(z))
|
||||
#define pCd(z) (*_pCd(z))
|
||||
typedef int logical;
|
||||
typedef short int shortlogical;
|
||||
typedef char logical1;
|
||||
|
|
@ -242,124 +229,6 @@ typedef struct Namelist Namelist;
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
#if 0
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Common Block Declarations */
|
||||
|
||||
|
|
@ -375,16 +244,16 @@ struct {
|
|||
static integer c__1 = 1;
|
||||
static doublereal c_b34 = 1.;
|
||||
|
||||
/* Main program */ int main()
|
||||
/* Main program */ int main(void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
static doublereal sfac = 9.765625e-4;
|
||||
|
||||
/* Local variables */
|
||||
extern /* Subroutine */ int check0_(), check1_(), check2_(), check3_();
|
||||
extern /* Subroutine */ int check0_(doublereal*), check1_(doublereal*), check2_(doublereal*), check3_(doublereal*);
|
||||
static integer ic;
|
||||
extern /* Subroutine */ int header_();
|
||||
extern /* Subroutine */ int header_(void);
|
||||
|
||||
/* Test program for the DOUBLE PRECISION Level 1 CBLAS. */
|
||||
/* Based upon the original CBLAS test routine together with: */
|
||||
|
|
@ -431,7 +300,7 @@ static doublereal c_b34 = 1.;
|
|||
|
||||
} /* MAIN__ */
|
||||
|
||||
/* Subroutine */ int header_()
|
||||
/* Subroutine */ int header_(void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -450,8 +319,7 @@ static doublereal c_b34 = 1.;
|
|||
|
||||
} /* header_ */
|
||||
|
||||
/* Subroutine */ int check0_(sfac)
|
||||
doublereal *sfac;
|
||||
/* Subroutine */ int check0_(doublereal* sfac)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -464,7 +332,7 @@ doublereal *sfac;
|
|||
|
||||
/* Local variables */
|
||||
static integer k;
|
||||
extern /* Subroutine */ int drotgtest_(), stest1_();
|
||||
extern /* Subroutine */ int drotgtest_(doublereal*,doublereal*,doublereal*,doublereal*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*);
|
||||
static doublereal sa, sb, sc, ss;
|
||||
|
||||
/* .. Parameters .. */
|
||||
|
|
@ -509,8 +377,7 @@ L40:
|
|||
return 0;
|
||||
} /* check0_ */
|
||||
|
||||
/* Subroutine */ int check1_(sfac)
|
||||
doublereal *sfac;
|
||||
/* Subroutine */ int check1_(doublereal* sfac)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -535,14 +402,14 @@ doublereal *sfac;
|
|||
|
||||
/* Local variables */
|
||||
static integer i__;
|
||||
extern doublereal dnrm2test_();
|
||||
extern doublereal dnrm2test_(integer*, doublereal*, integer*);
|
||||
static doublereal stemp[1], strue[8];
|
||||
extern /* Subroutine */ int stest_(), dscaltest_();
|
||||
extern doublereal dasumtest_();
|
||||
extern /* Subroutine */ int itest1_(), stest1_();
|
||||
extern /* Subroutine */ int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), dscaltest_(integer*,doublereal*,doublereal*,integer*);
|
||||
extern doublereal dasumtest_(integer*,doublereal*,integer*);
|
||||
extern /* Subroutine */ int itest1_(integer*,integer*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*);
|
||||
static doublereal sx[8];
|
||||
static integer np1;
|
||||
extern integer idamaxtest_();
|
||||
extern integer idamaxtest_(integer*,doublereal*,integer*);
|
||||
static integer len;
|
||||
|
||||
/* .. Parameters .. */
|
||||
|
|
@ -603,8 +470,7 @@ doublereal *sfac;
|
|||
return 0;
|
||||
} /* check1_ */
|
||||
|
||||
/* Subroutine */ int check2_(sfac)
|
||||
doublereal *sfac;
|
||||
/* Subroutine */ int check2_(doublereal* sfac)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -649,10 +515,10 @@ doublereal *sfac;
|
|||
|
||||
/* Local variables */
|
||||
static integer lenx, leny;
|
||||
extern doublereal ddottest_();
|
||||
extern doublereal ddottest_(integer*,doublereal*,integer*,doublereal*,integer*);
|
||||
static integer i__, j, ksize;
|
||||
extern /* Subroutine */ int stest_(), dcopytest_(), dswaptest_(),
|
||||
daxpytest_(), stest1_();
|
||||
extern /* Subroutine */ int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), dcopytest_(integer*,doublereal*,integer*,doublereal*,integer*), dswaptest_(integer*,doublereal*,integer*,doublereal*,integer*),
|
||||
daxpytest_(integer*,doublereal*,doublereal*,integer*,doublereal*,integer*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*);
|
||||
static integer ki, kn, mx, my;
|
||||
static doublereal sx[7], sy[7], stx[7], sty[7];
|
||||
|
||||
|
|
@ -733,8 +599,7 @@ doublereal *sfac;
|
|||
return 0;
|
||||
} /* check2_ */
|
||||
|
||||
/* Subroutine */ int check3_(sfac)
|
||||
doublereal *sfac;
|
||||
/* Subroutine */ int check3_(doublereal* sfac)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -753,9 +618,9 @@ doublereal *sfac;
|
|||
;
|
||||
|
||||
/* Local variables */
|
||||
extern /* Subroutine */ int drottest_();
|
||||
extern /* Subroutine */ int drottest_(integer*,doublereal*,integer*,doublereal*,integer*,doublereal*,doublereal*);
|
||||
static integer i__, k, ksize;
|
||||
extern /* Subroutine */int stest_(), drotmtest_();
|
||||
extern /* Subroutine */int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), drotmtest_(integer*,doublereal*,integer*,doublereal*,integer*,doublereal*);
|
||||
static integer ki, kn;
|
||||
static doublereal dparam[5], sx[10], sy[10], stx[10], sty[10];
|
||||
|
||||
|
|
@ -826,9 +691,7 @@ doublereal *sfac;
|
|||
return 0;
|
||||
} /* check3_ */
|
||||
|
||||
/* Subroutine */ int stest_(len, scomp, strue, ssize, sfac)
|
||||
integer *len;
|
||||
doublereal *scomp, *strue, *ssize, *sfac;
|
||||
/* Subroutine */ int stest_(integer* len, doublereal* scomp, doublereal* strue, doublereal* ssize, doublereal* sfac)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer i__1;
|
||||
|
|
@ -836,7 +699,7 @@ doublereal *scomp, *strue, *ssize, *sfac;
|
|||
|
||||
/* Local variables */
|
||||
static integer i__;
|
||||
extern doublereal sdiff_();
|
||||
extern doublereal sdiff_(doublereal*,doublereal*);
|
||||
static doublereal sd;
|
||||
|
||||
/* ********************************* STEST ************************** */
|
||||
|
|
@ -892,11 +755,10 @@ L40:
|
|||
|
||||
} /* stest_ */
|
||||
|
||||
/* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac)
|
||||
doublereal *scomp1, *strue1, *ssize, *sfac;
|
||||
/* Subroutine */ int stest1_(doublereal* scomp1, doublereal* strue1, doublereal* ssize, doublereal* sfac)
|
||||
{
|
||||
static doublereal scomp[1], strue[1];
|
||||
extern /* Subroutine */ int stest_();
|
||||
extern /* Subroutine */ int stest_(integer*, doublereal*, doublereal*, doublereal*, doublereal*);
|
||||
|
||||
/* ************************* STEST1 ***************************** */
|
||||
|
||||
|
|
@ -923,8 +785,7 @@ doublereal *scomp1, *strue1, *ssize, *sfac;
|
|||
return 0;
|
||||
} /* stest1_ */
|
||||
|
||||
doublereal sdiff_(sa, sb)
|
||||
doublereal *sa, *sb;
|
||||
doublereal sdiff_(doublereal* sa, doublereal* sb)
|
||||
{
|
||||
/* System generated locals */
|
||||
doublereal ret_val;
|
||||
|
|
@ -938,8 +799,7 @@ doublereal *sa, *sb;
|
|||
return ret_val;
|
||||
} /* sdiff_ */
|
||||
|
||||
/* Subroutine */ int itest1_(icomp, itrue)
|
||||
integer *icomp, *itrue;
|
||||
/* Subroutine */ int itest1_(integer* icomp, integer* itrue)
|
||||
{
|
||||
/* Local variables */
|
||||
static integer id;
|
||||
|
|
|
|||
|
|
@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
#if 0
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
#endif
|
||||
/* -- translated by f2c (version 20000121).
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* Common Block Declarations */
|
||||
|
|
@ -395,7 +272,7 @@ static integer c_n1 = -1;
|
|||
static integer c__0 = 0;
|
||||
static logical c_false = FALSE_;
|
||||
|
||||
/* Main program */ int main()
|
||||
/* Main program */ int main(void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -413,17 +290,21 @@ static logical c_false = FALSE_;
|
|||
static logical same;
|
||||
static integer ninc, nbet, ntra;
|
||||
static logical rewi;
|
||||
extern /* Subroutine */ int dchk1_(), dchk2_(), dchk3_(), dchk4_(),
|
||||
dchk5_(), dchk6_();
|
||||
extern /* Subroutine */ int dchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int dchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int dchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int dchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int dchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int dchk6_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
static doublereal a[4225] /* was [65][65] */, g[65];
|
||||
static integer i__, j;
|
||||
extern doublereal ddiff_();
|
||||
extern doublereal ddiff_(doublereal*, doublereal*);
|
||||
static integer n;
|
||||
static logical fatal;
|
||||
static doublereal x[65], y[65], z__[130];
|
||||
static logical trace;
|
||||
static integer nidim;
|
||||
extern /* Subroutine */ int dmvch_();
|
||||
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
|
||||
static char snaps[32], trans[1];
|
||||
static integer isnum;
|
||||
static logical ltest[16];
|
||||
|
|
@ -437,11 +318,11 @@ static logical c_false = FALSE_;
|
|||
static char snamet[12];
|
||||
static doublereal thresh;
|
||||
static logical rorder;
|
||||
extern /* Subroutine */ int cd2chke_();
|
||||
extern /* Subroutine */ void cd2chke_(char*, ftnlen);
|
||||
static integer layout;
|
||||
static logical ltestt, tsterr;
|
||||
static doublereal alf[7];
|
||||
extern logical lde_();
|
||||
extern logical lde_(doublereal*, doublereal*, integer*);
|
||||
static integer inc[7], nkb;
|
||||
static doublereal bet[7],eps,err;
|
||||
char tmpchar;
|
||||
|
|
@ -977,21 +858,7 @@ L240:
|
|||
|
||||
} /* MAIN__ */
|
||||
|
||||
/* Subroutine */ int dchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
|
||||
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nkb, *kb, *nalf;
|
||||
doublereal *alf;
|
||||
integer *nbet;
|
||||
doublereal *bet;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int dchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1007,10 +874,10 @@ ftnlen sname_len;
|
|||
static integer incx, incy;
|
||||
static logical full, tran, null;
|
||||
static integer i__, m, n;
|
||||
extern /* Subroutine */ int dmake_();
|
||||
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
|
||||
static doublereal alpha;
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int dmvch_();
|
||||
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static integer incxs, incys;
|
||||
|
|
@ -1018,13 +885,14 @@ ftnlen sname_len;
|
|||
static integer ia, ib, ic;
|
||||
static logical banded;
|
||||
static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns;
|
||||
extern /* Subroutine */ int cdgbmv_(), cdgemv_();
|
||||
extern logical lderes_();
|
||||
extern /* Subroutine */ void cdgbmv_(integer*, char*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ void cdgemv_(integer*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
static char ctrans[14];
|
||||
static doublereal errmax, transl;
|
||||
static char transs[1];
|
||||
static integer laa, lda;
|
||||
extern logical lde_();
|
||||
extern logical lde_(doublereal*, doublereal*, integer*);
|
||||
static doublereal als, bls, err;
|
||||
static integer iku, kls, kus;
|
||||
|
||||
|
|
@ -1429,21 +1297,7 @@ L140:
|
|||
|
||||
} /* dchk1_ */
|
||||
|
||||
/* Subroutine */ int dchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
|
||||
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nkb, *kb, *nalf;
|
||||
doublereal *alf;
|
||||
integer *nbet;
|
||||
doublereal *bet;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int dchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1460,10 +1314,10 @@ ftnlen sname_len;
|
|||
static logical full, null;
|
||||
static char uplo[1];
|
||||
static integer i__, k, n;
|
||||
extern /* Subroutine */ int dmake_();
|
||||
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
|
||||
static doublereal alpha;
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int dmvch_();
|
||||
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
|
|
@ -1474,12 +1328,13 @@ ftnlen sname_len;
|
|||
static integer nc, ik, in;
|
||||
static logical packed;
|
||||
static integer nk, ks, ix, iy, ns, lx, ly;
|
||||
extern logical lderes_();
|
||||
extern /* Subroutine */ int cdsbmv_(), cdspmv_();
|
||||
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cdsbmv_(integer*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ void cdspmv_(integer*, char*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
static doublereal errmax, transl;
|
||||
extern /* Subroutine */ int cdsymv_();
|
||||
extern /* Subroutine */ void cdsymv_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
static integer laa, lda;
|
||||
extern logical lde_();
|
||||
extern logical lde_(doublereal*, doublereal*, integer*);
|
||||
static doublereal als, bls, err;
|
||||
|
||||
|
||||
|
|
@ -1882,17 +1737,7 @@ L130:
|
|||
|
||||
} /* dchk2_ */
|
||||
|
||||
/* Subroutine */ int dchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, xt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax;
|
||||
doublereal *a, *aa, *as, *x, *xx, *xs, *xt, *g, *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int dchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* xt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1911,10 +1756,10 @@ ftnlen sname_len;
|
|||
static logical full, null;
|
||||
static char uplo[1], cdiag[14];
|
||||
static integer i__, k, n;
|
||||
extern /* Subroutine */ int dmake_();
|
||||
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
|
||||
static char diags[1];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int dmvch_();
|
||||
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
|
|
@ -1924,16 +1769,19 @@ ftnlen sname_len;
|
|||
static integer nc, ik, in;
|
||||
static logical packed;
|
||||
static integer nk, ks, ix, ns, lx;
|
||||
extern logical lderes_();
|
||||
extern /* Subroutine */ int cdtbmv_(), cdtbsv_();
|
||||
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cdtbmv_(integer*, char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cdtbsv_(integer*, char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static char ctrans[14];
|
||||
static doublereal errmax;
|
||||
extern /* Subroutine */ int cdtpmv_(), cdtrmv_();
|
||||
extern /* Subroutine */ void cdtpmv_(integer*, char*, char*, char*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cdtrmv_(integer*, char*, char*, char*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static doublereal transl;
|
||||
extern /* Subroutine */ int cdtpsv_(), cdtrsv_();
|
||||
extern /* Subroutine */ void cdtpsv_(integer*, char*, char*, char*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cdtrsv_(integer*, char*, char*, char*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static char transs[1];
|
||||
static integer laa, icd, lda;
|
||||
extern logical lde_();
|
||||
extern logical lde_(doublereal*, doublereal*, integer*);
|
||||
static integer ict, icu;
|
||||
static doublereal err;
|
||||
|
||||
|
|
@ -2388,19 +2236,7 @@ L130:
|
|||
|
||||
} /* dchk3_ */
|
||||
|
||||
/* Subroutine */ int dchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublereal *alf;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int dchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
|
||||
|
|
@ -2411,17 +2247,18 @@ ftnlen sname_len;
|
|||
static integer incx, incy;
|
||||
static logical null;
|
||||
static integer i__, j, m, n;
|
||||
extern /* Subroutine */ int dmake_(), cdger_();
|
||||
extern /* Subroutine */ void cdger_(integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, integer*);
|
||||
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
|
||||
static doublereal alpha, w[1];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int dmvch_();
|
||||
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static integer incxs, incys, ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly;
|
||||
extern logical lderes_();
|
||||
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
static doublereal errmax, transl;
|
||||
static integer laa, lda;
|
||||
extern logical lde_();
|
||||
extern logical lde_(doublereal*, doublereal*, integer*);
|
||||
static doublereal als, err;
|
||||
|
||||
|
||||
|
|
@ -2727,19 +2564,7 @@ L150:
|
|||
|
||||
} /* dchk4_ */
|
||||
|
||||
/* Subroutine */ int dchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublereal *alf;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -2757,25 +2582,25 @@ ftnlen sname_len;
|
|||
static logical full, null;
|
||||
static char uplo[1];
|
||||
static integer i__, j, n;
|
||||
extern /* Subroutine */ int dmake_();
|
||||
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
|
||||
static doublereal alpha, w[1];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int dmvch_();
|
||||
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
|
||||
static integer nargs;
|
||||
extern /* Subroutine */ int cdspr_();
|
||||
extern /* Subroutine */ void cdspr_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, ftnlen);
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
static integer incxs;
|
||||
extern /* Subroutine */ int cdsyr_();
|
||||
extern /* Subroutine */ void cdsyr_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen);
|
||||
static logical upper;
|
||||
static char uplos[1];
|
||||
static integer ia, ja, ic, nc, jj, lj, in;
|
||||
static logical packed;
|
||||
static integer ix, ns, lx;
|
||||
extern logical lderes_();
|
||||
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
static doublereal errmax, transl;
|
||||
static integer laa, lda;
|
||||
extern logical lde_();
|
||||
extern logical lde_(doublereal*, doublereal*, integer*);
|
||||
static doublereal als, err;
|
||||
|
||||
|
||||
|
|
@ -3096,19 +2921,7 @@ L130:
|
|||
|
||||
} /* dchk5_ */
|
||||
|
||||
/* Subroutine */ int dchk6_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublereal *alf;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int dchk6_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -3125,24 +2938,25 @@ ftnlen sname_len;
|
|||
static logical full, null;
|
||||
static char uplo[1];
|
||||
static integer i__, j, n;
|
||||
extern /* Subroutine */ int dmake_();
|
||||
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
|
||||
static doublereal alpha, w[2];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int dmvch_();
|
||||
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
static integer incxs, incys;
|
||||
static logical upper;
|
||||
static char uplos[1];
|
||||
extern /* Subroutine */ int cdspr2_(), cdsyr2_();
|
||||
extern /* Subroutine */ void cdspr2_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, ftnlen);
|
||||
extern /* Subroutine */ void cdsyr2_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen);
|
||||
static integer ia, ja, ic, nc, jj, lj, in;
|
||||
static logical packed;
|
||||
static integer ix, iy, ns, lx, ly;
|
||||
extern logical lderes_();
|
||||
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
static doublereal errmax, transl;
|
||||
static integer laa, lda;
|
||||
extern logical lde_();
|
||||
extern logical lde_(doublereal*, doublereal*, integer*);
|
||||
static doublereal als, err;
|
||||
|
||||
/* Tests DSYR2 and DSPR2. */
|
||||
|
|
@ -3508,25 +3322,13 @@ L170:
|
|||
|
||||
} /* dchk6_ */
|
||||
|
||||
/* Subroutine */ int dmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl,
|
||||
ku, reset, transl, type_len, uplo_len, diag_len)
|
||||
char *type__, *uplo, *diag;
|
||||
integer *m, *n;
|
||||
doublereal *a;
|
||||
integer *nmax;
|
||||
doublereal *aa;
|
||||
integer *lda, *kl, *ku;
|
||||
logical *reset;
|
||||
doublereal *transl;
|
||||
ftnlen type_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen diag_len;
|
||||
/* Subroutine */ int dmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublereal* a, integer* nmax, doublereal* aa, integer* lda, integer* kl, integer* ku, logical* reset, doublereal* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
|
||||
|
||||
/* Local variables */
|
||||
extern doublereal dbeg_();
|
||||
extern doublereal dbeg_(logical* );
|
||||
static integer ibeg, iend, ioff;
|
||||
static logical unit;
|
||||
static integer i__, j;
|
||||
|
|
@ -3752,28 +3554,14 @@ ftnlen diag_len;
|
|||
|
||||
} /* dmake_ */
|
||||
|
||||
/* Subroutine */ int dmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y,
|
||||
incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len)
|
||||
char *trans;
|
||||
integer *m, *n;
|
||||
doublereal *alpha, *a;
|
||||
integer *nmax;
|
||||
doublereal *x;
|
||||
integer *incx;
|
||||
doublereal *beta, *y;
|
||||
integer *incy;
|
||||
doublereal *yt, *g, *yy, *eps, *err;
|
||||
logical *fatal;
|
||||
integer *nout;
|
||||
logical *mv;
|
||||
ftnlen trans_len;
|
||||
/* Subroutine */ int dmvch_(char* trans, integer* m, integer* n, doublereal* alpha, doublereal* a, integer* nmax, doublereal* x, integer* incx, doublereal* beta, doublereal* y, integer* incy, doublereal* yt, doublereal* g, doublereal* yy, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, i__1, i__2;
|
||||
doublereal d__1;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt();
|
||||
double sqrt(double);
|
||||
|
||||
/* Local variables */
|
||||
static doublereal erri;
|
||||
|
|
@ -3902,9 +3690,7 @@ L70:
|
|||
|
||||
} /* dmvch_ */
|
||||
|
||||
logical lde_(ri, rj, lr)
|
||||
doublereal *ri, *rj;
|
||||
integer *lr;
|
||||
logical lde_(doublereal* ri, doublereal* rj, integer* lr)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer i__1;
|
||||
|
|
@ -3949,13 +3735,7 @@ L30:
|
|||
|
||||
} /* lde_ */
|
||||
|
||||
logical lderes_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
|
||||
char *type__, *uplo;
|
||||
integer *m, *n;
|
||||
doublereal *aa, *as;
|
||||
integer *lda;
|
||||
ftnlen type_len;
|
||||
ftnlen uplo_len;
|
||||
logical lderes_(char* type__, char* uplo, integer* m, integer* n, doublereal* aa, doublereal* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2;
|
||||
|
|
@ -4042,8 +3822,7 @@ L80:
|
|||
|
||||
} /* lderes_ */
|
||||
|
||||
doublereal dbeg_(reset)
|
||||
logical *reset;
|
||||
doublereal dbeg_(logical* reset)
|
||||
{
|
||||
/* System generated locals */
|
||||
doublereal ret_val;
|
||||
|
|
@ -4094,8 +3873,7 @@ L10:
|
|||
|
||||
} /* dbeg_ */
|
||||
|
||||
doublereal ddiff_(x, y)
|
||||
doublereal *x, *y;
|
||||
doublereal ddiff_(doublereal* x, doublereal* y)
|
||||
{
|
||||
/* System generated locals */
|
||||
doublereal ret_val;
|
||||
|
|
|
|||
|
|
@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
#if 0
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
#endif
|
||||
/* -- translated by f2c (version 20000121).
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* Common Block Declarations */
|
||||
|
|
@ -393,7 +270,7 @@ static logical c_true = TRUE_;
|
|||
static integer c__0 = 0;
|
||||
static logical c_false = FALSE_;
|
||||
|
||||
/* Main program MAIN__() */ int main()
|
||||
/* Main program MAIN__() */ int main(void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -403,25 +280,24 @@ static logical c_false = FALSE_;
|
|||
integer i__1, i__2, i__3;
|
||||
doublereal d__1;
|
||||
|
||||
/* Builtin functions */
|
||||
integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(),
|
||||
e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe();
|
||||
integer f_clos();
|
||||
|
||||
/* Local variables */
|
||||
static integer nalf, idim[9];
|
||||
static logical same;
|
||||
static integer nbet, ntra;
|
||||
static logical rewi;
|
||||
extern /* Subroutine */ int dchk1_(), dchk2_(), dchk3_(), dchk4_(),
|
||||
dchk5_();
|
||||
extern /* Subroutine */ int dchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int dchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int dchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int dchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
|
||||
/* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* ab, doublereal* aa, doublereal* as, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, doublereal* w, integer* iorder, ftnlen sname_len);
|
||||
static doublereal c__[4225] /* was [65][65] */, g[65];
|
||||
static integer i__, j;
|
||||
extern doublereal ddiff_();
|
||||
extern doublereal ddiff_(doublereal*, doublereal*);
|
||||
static integer n;
|
||||
static logical fatal;
|
||||
static doublereal w[130];
|
||||
extern /* Subroutine */ int dmmch_();
|
||||
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static logical trace;
|
||||
static integer nidim;
|
||||
static char snaps[32];
|
||||
|
|
@ -433,11 +309,11 @@ static logical c_false = FALSE_;
|
|||
static char snamet[12], transa[1], transb[1];
|
||||
static doublereal thresh;
|
||||
static logical rorder;
|
||||
extern /* Subroutine */ int cd3chke_();
|
||||
extern /* Subroutine */ void cd3chke_(char*, ftnlen);
|
||||
static integer layout;
|
||||
static logical ltestt, tsterr;
|
||||
static doublereal alf[7];
|
||||
extern logical lde_();
|
||||
extern logical lde_(doublereal*, doublereal*, integer*);
|
||||
static doublereal bet[7], eps, err;
|
||||
char tmpchar;
|
||||
|
||||
|
|
@ -907,21 +783,7 @@ L230:
|
|||
|
||||
} /* MAIN__ */
|
||||
|
||||
/* Subroutine */ int dchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
|
||||
c__, cc, cs, ct, g, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublereal *alf;
|
||||
integer *nbet;
|
||||
doublereal *bet;
|
||||
integer *nmax;
|
||||
doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int dchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -931,29 +793,27 @@ ftnlen sname_len;
|
|||
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
|
||||
i__3, i__4, i__5, i__6;
|
||||
|
||||
/* Builtin functions */
|
||||
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
|
||||
|
||||
/* Local variables */
|
||||
static doublereal beta;
|
||||
static integer ldas, ldbs, ldcs;
|
||||
static logical same, null;
|
||||
static integer i__, k, m, n;
|
||||
extern /* Subroutine */ int dmake_();
|
||||
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
|
||||
static doublereal alpha;
|
||||
extern /* Subroutine */ int dmmch_();
|
||||
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static logical isame[13], trana, tranb;
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
extern /* Subroutine */ void dprcn1_();
|
||||
extern /* Subroutine */ void dprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static integer ia, ib, ma, mb, na, nb, nc, ik, im, in;
|
||||
extern /* Subroutine */ int cdgemm_();
|
||||
extern /* Subroutine */ void cdgemm_(integer*, char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
static integer ks, ms, ns;
|
||||
extern logical lderes_();
|
||||
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
static char tranas[1], tranbs[1], transa[1], transb[1];
|
||||
static doublereal errmax;
|
||||
static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc;
|
||||
extern logical lde_();
|
||||
extern logical lde_(doublereal*, doublereal*, integer*);
|
||||
static doublereal als, bls, err;
|
||||
|
||||
/* Tests DGEMM. */
|
||||
|
|
@ -1283,23 +1143,8 @@ L130:
|
|||
|
||||
} /* dchk1_ */
|
||||
|
||||
/* Subroutine */ void dprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k,
|
||||
alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *transa, *transb;
|
||||
integer *m, *n, *k;
|
||||
doublereal *alpha;
|
||||
integer *lda, *ldb;
|
||||
doublereal *beta;
|
||||
integer *ldc;
|
||||
ftnlen sname_len;
|
||||
ftnlen transa_len;
|
||||
ftnlen transb_len;
|
||||
/* Subroutine */ void dprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len)
|
||||
{
|
||||
/* Builtin functions */
|
||||
integer s_wsfe(), do_fio(), e_wsfe();
|
||||
|
||||
/* Local variables */
|
||||
static char crc[14], cta[14], ctb[14];
|
||||
|
|
@ -1328,21 +1173,7 @@ ftnlen transb_len;
|
|||
} /* dprcn1_ */
|
||||
|
||||
|
||||
/* Subroutine */ int dchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
|
||||
c__, cc, cs, ct, g, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublereal *alf;
|
||||
integer *nbet;
|
||||
doublereal *bet;
|
||||
integer *nmax;
|
||||
doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int dchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1353,8 +1184,6 @@ ftnlen sname_len;
|
|||
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
|
||||
i__3, i__4, i__5;
|
||||
|
||||
/* Builtin functions */
|
||||
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
|
||||
|
||||
/* Local variables */
|
||||
static doublereal beta;
|
||||
|
|
@ -1364,21 +1193,21 @@ ftnlen sname_len;
|
|||
static logical left, null;
|
||||
static char uplo[1];
|
||||
static integer i__, m, n;
|
||||
extern /* Subroutine */ int dmake_();
|
||||
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
|
||||
static doublereal alpha;
|
||||
extern /* Subroutine */ int dmmch_();
|
||||
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static logical isame[13];
|
||||
static char sides[1];
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static char uplos[1];
|
||||
extern /* Subroutine */ void dprcn2_();
|
||||
extern /* Subroutine */ void dprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static integer ia, ib, na, nc, im, in, ms, ns;
|
||||
extern logical lderes_();
|
||||
extern /* Subroutine */ int cdsymm_();
|
||||
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cdsymm_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
static doublereal errmax;
|
||||
static integer laa, lbb, lda, lcc, ldb, ldc;
|
||||
extern logical lde_();
|
||||
extern logical lde_(doublereal*, doublereal*, integer*);
|
||||
static integer ics;
|
||||
static doublereal als, bls;
|
||||
static integer icu;
|
||||
|
|
@ -1692,23 +1521,8 @@ L120:
|
|||
} /* dchk2_ */
|
||||
|
||||
|
||||
/* Subroutine */ void dprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha,
|
||||
lda, ldb, beta, ldc, sname_len, side_len, uplo_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *side, *uplo;
|
||||
integer *m, *n;
|
||||
doublereal *alpha;
|
||||
integer *lda, *ldb;
|
||||
doublereal *beta;
|
||||
integer *ldc;
|
||||
ftnlen sname_len;
|
||||
ftnlen side_len;
|
||||
ftnlen uplo_len;
|
||||
/* Subroutine */ void dprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len)
|
||||
{
|
||||
/* Builtin functions */
|
||||
integer s_wsfe(), do_fio(), e_wsfe();
|
||||
|
||||
/* Local variables */
|
||||
static char cs[14], cu[14], crc[14];
|
||||
|
|
@ -1733,19 +1547,7 @@ ftnlen uplo_len;
|
|||
} /* dprcn2_ */
|
||||
|
||||
|
||||
/* Subroutine */ int dchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__,
|
||||
iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublereal *alf;
|
||||
integer *nmax;
|
||||
doublereal *a, *aa, *as, *b, *bb, *bs, *ct, *g, *c__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int dchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* ct, doublereal* g, doublereal* c__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1766,24 +1568,24 @@ ftnlen sname_len;
|
|||
static logical left, null;
|
||||
static char uplo[1];
|
||||
static integer i__, j, m, n;
|
||||
extern /* Subroutine */ int dmake_();
|
||||
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
|
||||
static doublereal alpha;
|
||||
static char diags[1];
|
||||
extern /* Subroutine */ int dmmch_();
|
||||
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static logical isame[13];
|
||||
static char sides[1];
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static char uplos[1];
|
||||
extern /* Subroutine */ void dprcn3_();
|
||||
extern /* Subroutine */ void dprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, ftnlen, ftnlen, ftnlen, ftnlen, ftnlen);
|
||||
static integer ia, na, nc, im, in, ms, ns;
|
||||
extern logical lderes_();
|
||||
extern /* Subroutine */ int cdtrmm_();
|
||||
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cdtrmm_(integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
|
||||
static char tranas[1], transa[1];
|
||||
extern /* Subroutine */ int cdtrsm_();
|
||||
extern /* Subroutine */ void cdtrsm_(integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
|
||||
static doublereal errmax;
|
||||
static integer laa, icd, lbb, lda, ldb;
|
||||
extern logical lde_();
|
||||
extern logical lde_(doublereal*, doublereal*, integer*);
|
||||
static integer ics;
|
||||
static doublereal als;
|
||||
static integer ict, icu;
|
||||
|
|
@ -2165,24 +1967,8 @@ L160:
|
|||
} /* dchk3_ */
|
||||
|
||||
|
||||
/* Subroutine */ void dprcn3_(nout, nc, sname, iorder, side, uplo, transa,
|
||||
diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len,
|
||||
transa_len, diag_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *side, *uplo, *transa, *diag;
|
||||
integer *m, *n;
|
||||
doublereal *alpha;
|
||||
integer *lda, *ldb;
|
||||
ftnlen sname_len;
|
||||
ftnlen side_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen transa_len;
|
||||
ftnlen diag_len;
|
||||
/* Subroutine */ void dprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, doublereal* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len)
|
||||
{
|
||||
/* Builtin functions */
|
||||
integer s_wsfe(), do_fio(), e_wsfe();
|
||||
|
||||
/* Local variables */
|
||||
static char ca[14], cd[14], cs[14], cu[14], crc[14];
|
||||
|
|
@ -2219,21 +2005,7 @@ ftnlen diag_len;
|
|||
} /* dprcn3_ */
|
||||
|
||||
|
||||
/* Subroutine */ int dchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
|
||||
c__, cc, cs, ct, g, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublereal *alf;
|
||||
integer *nbet;
|
||||
doublereal *bet;
|
||||
integer *nmax;
|
||||
doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int dchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -2244,8 +2016,6 @@ ftnlen sname_len;
|
|||
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
|
||||
i__3, i__4, i__5;
|
||||
|
||||
/* Builtin functions */
|
||||
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
|
||||
|
||||
/* Local variables */
|
||||
static doublereal beta;
|
||||
|
|
@ -2255,23 +2025,23 @@ ftnlen sname_len;
|
|||
static logical tran, null;
|
||||
static char uplo[1];
|
||||
static integer i__, j, k, n;
|
||||
extern /* Subroutine */ int dmake_();
|
||||
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
|
||||
static doublereal alpha;
|
||||
extern /* Subroutine */ int dmmch_();
|
||||
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static logical isame[13];
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static char trans[1];
|
||||
static logical upper;
|
||||
static char uplos[1];
|
||||
extern /* Subroutine */ void dprcn4_();
|
||||
extern /* Subroutine */ void dprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns;
|
||||
extern logical lderes_();
|
||||
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
static doublereal errmax;
|
||||
extern /* Subroutine */ int cdsyrk_();
|
||||
extern /* Subroutine */ void cdsyrk_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
static char transs[1];
|
||||
static integer laa, lda, lcc, ldc;
|
||||
extern logical lde_();
|
||||
extern logical lde_(doublereal*, doublereal*, integer*);
|
||||
static doublereal als;
|
||||
static integer ict, icu;
|
||||
static doublereal err;
|
||||
|
|
@ -2586,23 +2356,8 @@ L130:
|
|||
} /* dchk4_ */
|
||||
|
||||
|
||||
/* Subroutine */ void dprcn4_(nout, nc, sname, iorder, uplo, transa, n, k,
|
||||
alpha, lda, beta, ldc, sname_len, uplo_len, transa_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *uplo, *transa;
|
||||
integer *n, *k;
|
||||
doublereal *alpha;
|
||||
integer *lda;
|
||||
doublereal *beta;
|
||||
integer *ldc;
|
||||
ftnlen sname_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen transa_len;
|
||||
/* Subroutine */ void dprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
|
||||
{
|
||||
/* Builtin functions */
|
||||
integer s_wsfe(), do_fio(), e_wsfe();
|
||||
|
||||
/* Local variables */
|
||||
static char ca[14], cu[14], crc[14];
|
||||
|
|
@ -2629,21 +2384,7 @@ ftnlen transa_len;
|
|||
} /* dprcn4_ */
|
||||
|
||||
|
||||
/* Subroutine */ int dchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs,
|
||||
c__, cc, cs, ct, g, w, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublereal *alf;
|
||||
integer *nbet;
|
||||
doublereal *bet;
|
||||
integer *nmax;
|
||||
doublereal *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct, *g, *w;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* ab, doublereal* aa, doublereal* as, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, doublereal* w, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -2653,8 +2394,6 @@ ftnlen sname_len;
|
|||
/* System generated locals */
|
||||
integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8;
|
||||
|
||||
/* Builtin functions */
|
||||
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
|
||||
|
||||
/* Local variables */
|
||||
static integer jjab;
|
||||
|
|
@ -2665,23 +2404,23 @@ ftnlen sname_len;
|
|||
static logical tran, null;
|
||||
static char uplo[1];
|
||||
static integer i__, j, k, n;
|
||||
extern /* Subroutine */ int dmake_();
|
||||
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
|
||||
static doublereal alpha;
|
||||
extern /* Subroutine */ int dmmch_();
|
||||
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static logical isame[13];
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static char trans[1];
|
||||
static logical upper;
|
||||
static char uplos[1];
|
||||
extern /* Subroutine */ void dprcn5_();
|
||||
extern /* Subroutine */ void dprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns;
|
||||
extern logical lderes_();
|
||||
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
static doublereal errmax;
|
||||
static char transs[1];
|
||||
static integer laa, lbb, lda, lcc, ldb, ldc;
|
||||
extern logical lde_();
|
||||
extern /* Subroutine */ int cdsyr2k_();
|
||||
extern logical lde_(doublereal*, doublereal*, integer*);
|
||||
extern /* Subroutine */ void cdsyr2k_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
|
||||
static doublereal als;
|
||||
static integer ict, icu;
|
||||
static doublereal err;
|
||||
|
|
@ -3048,23 +2787,8 @@ L160:
|
|||
} /* dchk5_ */
|
||||
|
||||
|
||||
/* Subroutine */ void dprcn5_(nout, nc, sname, iorder, uplo, transa, n, k,
|
||||
alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *uplo, *transa;
|
||||
integer *n, *k;
|
||||
doublereal *alpha;
|
||||
integer *lda, *ldb;
|
||||
doublereal *beta;
|
||||
integer *ldc;
|
||||
ftnlen sname_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen transa_len;
|
||||
/* Subroutine */ void dprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
|
||||
{
|
||||
/* Builtin functions */
|
||||
integer s_wsfe(), do_fio(), e_wsfe();
|
||||
|
||||
/* Local variables */
|
||||
static char ca[14], cu[14], crc[14];
|
||||
|
|
@ -3091,25 +2815,13 @@ ftnlen transa_len;
|
|||
} /* dprcn5_ */
|
||||
|
||||
|
||||
/* Subroutine */ int dmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset,
|
||||
transl, type_len, uplo_len, diag_len)
|
||||
char *type__, *uplo, *diag;
|
||||
integer *m, *n;
|
||||
doublereal *a;
|
||||
integer *nmax;
|
||||
doublereal *aa;
|
||||
integer *lda;
|
||||
logical *reset;
|
||||
doublereal *transl;
|
||||
ftnlen type_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen diag_len;
|
||||
/* Subroutine */ int dmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublereal* a, integer* nmax, doublereal* aa, integer* lda, logical* reset, doublereal* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, i__1, i__2;
|
||||
|
||||
/* Local variables */
|
||||
extern doublereal dbeg_();
|
||||
extern doublereal dbeg_(logical*);
|
||||
static integer ibeg, iend;
|
||||
static logical unit;
|
||||
static integer i__, j;
|
||||
|
|
@ -3241,25 +2953,7 @@ ftnlen diag_len;
|
|||
|
||||
} /* dmake_ */
|
||||
|
||||
/* Subroutine */ int dmmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb,
|
||||
beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv,
|
||||
transa_len, transb_len)
|
||||
char *transa, *transb;
|
||||
integer *m, *n, *kk;
|
||||
doublereal *alpha, *a;
|
||||
integer *lda;
|
||||
doublereal *b;
|
||||
integer *ldb;
|
||||
doublereal *beta, *c__;
|
||||
integer *ldc;
|
||||
doublereal *ct, *g, *cc;
|
||||
integer *ldcc;
|
||||
doublereal *eps, *err;
|
||||
logical *fatal;
|
||||
integer *nout;
|
||||
logical *mv;
|
||||
ftnlen transa_len;
|
||||
ftnlen transb_len;
|
||||
/* Subroutine */ int dmmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, doublereal* alpha, doublereal* a, integer* lda, doublereal* b, integer* ldb, doublereal* beta, doublereal* c__, integer* ldc, doublereal* ct, doublereal* g, doublereal* cc, integer* ldcc, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1,
|
||||
|
|
@ -3267,8 +2961,7 @@ ftnlen transb_len;
|
|||
doublereal d__1, d__2;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt();
|
||||
integer s_wsfe(), e_wsfe(), do_fio();
|
||||
double sqrt(double);
|
||||
|
||||
/* Local variables */
|
||||
static doublereal erri;
|
||||
|
|
@ -3432,9 +3125,7 @@ L150:
|
|||
|
||||
} /* dmmch_ */
|
||||
|
||||
logical lde_(ri, rj, lr)
|
||||
doublereal *ri, *rj;
|
||||
integer *lr;
|
||||
logical lde_(doublereal* ri, doublereal* rj, integer* lr)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer i__1;
|
||||
|
|
@ -3481,13 +3172,7 @@ L30:
|
|||
|
||||
} /* lde_ */
|
||||
|
||||
logical lderes_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
|
||||
char *type__, *uplo;
|
||||
integer *m, *n;
|
||||
doublereal *aa, *as;
|
||||
integer *lda;
|
||||
ftnlen type_len;
|
||||
ftnlen uplo_len;
|
||||
logical lderes_(char* type__, char* uplo, integer* m, integer* n, doublereal* aa, doublereal* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2;
|
||||
|
|
@ -3576,8 +3261,7 @@ L80:
|
|||
|
||||
} /* lderes_ */
|
||||
|
||||
doublereal dbeg_(reset)
|
||||
logical *reset;
|
||||
doublereal dbeg_(logical* reset)
|
||||
{
|
||||
/* System generated locals */
|
||||
doublereal ret_val;
|
||||
|
|
@ -3629,8 +3313,7 @@ L10:
|
|||
|
||||
} /* dbeg_ */
|
||||
|
||||
doublereal ddiff_(x, y)
|
||||
doublereal *x, *y;
|
||||
doublereal ddiff_(doublereal* x, doublereal* y)
|
||||
{
|
||||
/* System generated locals */
|
||||
doublereal ret_val;
|
||||
|
|
|
|||
|
|
@ -21,19 +21,6 @@ typedef float real;
|
|||
typedef double doublereal;
|
||||
typedef struct { real r, i; } complex;
|
||||
typedef struct { doublereal r, i; } doublecomplex;
|
||||
#ifdef _MSC_VER
|
||||
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
|
||||
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
|
||||
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
|
||||
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
|
||||
#else
|
||||
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
|
||||
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
|
||||
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
|
||||
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
|
||||
#endif
|
||||
#define pCf(z) (*_pCf(z))
|
||||
#define pCd(z) (*_pCd(z))
|
||||
typedef int logical;
|
||||
typedef short int shortlogical;
|
||||
typedef char logical1;
|
||||
|
|
@ -242,250 +229,6 @@ typedef struct Namelist Namelist;
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
#if 0
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
#endif
|
||||
#if 0
|
||||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i]) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i]) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/* Common Block Declarations */
|
||||
|
|
@ -502,16 +245,16 @@ struct {
|
|||
static integer c__1 = 1;
|
||||
static real c_b34 = (float)1.;
|
||||
|
||||
/* Main program */ int main ()
|
||||
/* Main program */ int main (void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
static real sfac = (float)9.765625e-4;
|
||||
|
||||
/* Local variables */
|
||||
extern /* Subroutine */ int check0_(), check1_(), check2_(), check3_();
|
||||
extern /* Subroutine */ int check0_(real*), check1_(real*), check2_(real*), check3_(real*);
|
||||
static integer ic;
|
||||
extern /* Subroutine */ int header_();
|
||||
extern /* Subroutine */ int header_(void);
|
||||
|
||||
/* Test program for the REAL Level 1 CBLAS. */
|
||||
/* Based upon the original CBLAS test routine together with: */
|
||||
|
|
@ -557,7 +300,7 @@ static real c_b34 = (float)1.;
|
|||
exit(0);
|
||||
} /* MAIN__ */
|
||||
|
||||
/* Subroutine */ int header_()
|
||||
/* Subroutine */ int header_(void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -580,8 +323,7 @@ static real c_b34 = (float)1.;
|
|||
|
||||
} /* header_ */
|
||||
|
||||
/* Subroutine */ int check0_(sfac)
|
||||
real *sfac;
|
||||
/* Subroutine */ int check0_(real *sfac)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -600,7 +342,7 @@ real *sfac;
|
|||
|
||||
/* Local variables */
|
||||
static integer k;
|
||||
extern /* Subroutine */ int srotgtest_(), stest1_();
|
||||
extern /* Subroutine */ int srotgtest_(real*,real*,real*,real*), stest1_(real*,real*,real*,real*);
|
||||
static real sa, sb, sc, ss;
|
||||
|
||||
/* .. Parameters .. */
|
||||
|
|
@ -645,8 +387,7 @@ L40:
|
|||
return 0;
|
||||
} /* check0_ */
|
||||
|
||||
/* Subroutine */ int check1_(sfac)
|
||||
real *sfac;
|
||||
/* Subroutine */ int check1_(real* sfac)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -692,14 +433,14 @@ real *sfac;
|
|||
|
||||
/* Local variables */
|
||||
static integer i__;
|
||||
extern real snrm2test_();
|
||||
extern real snrm2test_(integer*,real*,integer*);
|
||||
static real stemp[1], strue[8];
|
||||
extern /* Subroutine */ int stest_(), sscaltest_();
|
||||
extern real sasumtest_();
|
||||
extern /* Subroutine */ int itest1_(), stest1_();
|
||||
extern /* Subroutine */ int stest_(integer*, real*,real*,real*,real*), sscaltest_(integer*,real*,real*,integer*);
|
||||
extern real sasumtest_(integer*,real*,integer*);
|
||||
extern /* Subroutine */ int itest1_(integer*,integer*), stest1_(real*,real*,real*,real*);
|
||||
static real sx[8];
|
||||
static integer np1;
|
||||
extern integer isamaxtest_();
|
||||
extern integer isamaxtest_(integer*,real*,integer*);
|
||||
static integer len;
|
||||
|
||||
|
||||
|
|
@ -761,8 +502,7 @@ real *sfac;
|
|||
return 0;
|
||||
} /* check1_ */
|
||||
|
||||
/* Subroutine */ int check2_(sfac)
|
||||
real *sfac;
|
||||
/* Subroutine */ int check2_(real* sfac)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -850,12 +590,12 @@ real *sfac;
|
|||
|
||||
/* Local variables */
|
||||
static integer lenx, leny;
|
||||
extern real sdottest_();
|
||||
extern real sdottest_(integer*,real*,integer*,real*,integer*);
|
||||
static integer i__, j, ksize;
|
||||
extern /* Subroutine */ int stest_(), scopytest_(), sswaptest_(),
|
||||
saxpytest_();
|
||||
extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*), scopytest_(integer*,real*,integer*,real*,integer*), sswaptest_(integer*,real*,integer*,real*,integer*),
|
||||
saxpytest_(integer*,real*,real*,integer*,real*,integer*);
|
||||
static integer ki;
|
||||
extern /* Subroutine */ int stest1_();
|
||||
extern /* Subroutine */ int stest1_(real*,real*,real*,real*);
|
||||
static integer kn, mx, my;
|
||||
static real sx[7], sy[7], stx[7], sty[7];
|
||||
|
||||
|
|
@ -936,8 +676,7 @@ real *sfac;
|
|||
return 0;
|
||||
} /* check2_ */
|
||||
|
||||
/* Subroutine */ int check3_(sfac)
|
||||
real *sfac;
|
||||
/* Subroutine */ int check3_(real* sfac)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -969,9 +708,9 @@ real *sfac;
|
|||
1.17 };
|
||||
|
||||
/* Local variables */
|
||||
extern /* Subroutine */ void srottest_();
|
||||
extern /* Subroutine */ void srottest_(integer*,real*,integer*,real*,integer*,real*,real*);
|
||||
static integer i__, k, ksize;
|
||||
extern /* Subroutine */ int stest_(), srotmtest_();
|
||||
extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*), srotmtest_(integer*,real*,integer*,real*,integer*,real*);
|
||||
static integer ki, kn;
|
||||
static real sx[19], sy[19], sparam[5], stx[19], sty[19];
|
||||
|
||||
|
|
@ -1042,16 +781,14 @@ real *sfac;
|
|||
return 0;
|
||||
} /* check3_ */
|
||||
|
||||
/* Subroutine */ int stest_(len, scomp, strue, ssize, sfac)
|
||||
integer *len;
|
||||
real *scomp, *strue, *ssize, *sfac;
|
||||
/* Subroutine */ int stest_(integer* len, real* scomp, real* strue, real* ssize, real* sfac)
|
||||
{
|
||||
integer i__1;
|
||||
real r__1, r__2, r__3, r__4, r__5;
|
||||
|
||||
/* Local variables */
|
||||
static integer i__;
|
||||
extern doublereal sdiff_();
|
||||
extern doublereal sdiff_(real*,real*);
|
||||
static real sd;
|
||||
|
||||
/* ********************************* STEST ************************** */
|
||||
|
|
@ -1107,11 +844,10 @@ L40:
|
|||
|
||||
} /* stest_ */
|
||||
|
||||
/* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac)
|
||||
real *scomp1, *strue1, *ssize, *sfac;
|
||||
/* Subroutine */ int stest1_(real* scomp1, real* strue1, real* ssize, real* sfac)
|
||||
{
|
||||
static real scomp[1], strue[1];
|
||||
extern /* Subroutine */ int stest_();
|
||||
extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*);
|
||||
|
||||
/* ************************* STEST1 ***************************** */
|
||||
|
||||
|
|
@ -1138,8 +874,7 @@ real *scomp1, *strue1, *ssize, *sfac;
|
|||
return 0;
|
||||
} /* stest1_ */
|
||||
|
||||
doublereal sdiff_(sa, sb)
|
||||
real *sa, *sb;
|
||||
doublereal sdiff_(real* sa, real* sb)
|
||||
{
|
||||
/* System generated locals */
|
||||
real ret_val;
|
||||
|
|
@ -1153,8 +888,7 @@ real *sa, *sb;
|
|||
return ret_val;
|
||||
} /* sdiff_ */
|
||||
|
||||
/* Subroutine */ int itest1_(icomp, itrue)
|
||||
integer *icomp, *itrue;
|
||||
/* Subroutine */ int itest1_(integer* icomp, integer* itrue)
|
||||
{
|
||||
/* Local variables */
|
||||
static integer id;
|
||||
|
|
|
|||
|
|
@ -242,255 +242,6 @@ typedef struct Namelist Namelist;
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
#if 0
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
#endif
|
||||
#if 0
|
||||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i]) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i]) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
/* -- translated by f2c (version 20000121).
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* Common Block Declarations */
|
||||
|
|
@ -521,7 +272,7 @@ static integer c_n1 = -1;
|
|||
static integer c__0 = 0;
|
||||
static logical c_false = FALSE_;
|
||||
|
||||
/* Main program */ int main()
|
||||
/* Main program */ int main(void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -539,16 +290,20 @@ static logical c_false = FALSE_;
|
|||
static logical same;
|
||||
static integer ninc, nbet, ntra;
|
||||
static logical rewi;
|
||||
extern /* Subroutine */ int schk1_(), schk2_(), schk3_(), schk4_(),
|
||||
schk5_(), schk6_();
|
||||
extern /* Subroutine */ int schk1_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int schk2_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int schk3_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int schk4_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int schk5_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int schk6_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len);
|
||||
static real a[4225] /* was [65][65] */, g[65];
|
||||
static integer i__, j, n;
|
||||
static logical fatal;
|
||||
static real x[65], y[65], z__[130];
|
||||
extern doublereal sdiff_();
|
||||
extern doublereal sdiff_(real*, real*);
|
||||
static logical trace;
|
||||
static integer nidim;
|
||||
extern /* Subroutine */ int smvch_();
|
||||
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
|
||||
static char snaps[32], trans[1];
|
||||
static integer isnum;
|
||||
static logical ltest[16];
|
||||
|
|
@ -564,12 +319,12 @@ static logical c_false = FALSE_;
|
|||
static logical rorder;
|
||||
static integer layout;
|
||||
static logical ltestt;
|
||||
extern /* Subroutine */ int cs2chke_();
|
||||
extern /* Subroutine */ int cs2chke_(char*, ftnlen);
|
||||
static logical tsterr;
|
||||
static real alf[7];
|
||||
static integer inc[7], nkb;
|
||||
static real bet[7];
|
||||
extern logical lse_();
|
||||
extern logical lse_(real*, real*, integer*);
|
||||
static real eps, err;
|
||||
char tmpchar;
|
||||
|
||||
|
|
@ -1098,21 +853,7 @@ L240:
|
|||
|
||||
} /* MAIN__ */
|
||||
|
||||
/* Subroutine */ int schk1_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
|
||||
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nkb, *kb, *nalf;
|
||||
real *alf;
|
||||
integer *nbet;
|
||||
real *bet;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int schk1_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, real* alf, integer* nbet, real* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1130,24 +871,25 @@ ftnlen sname_len;
|
|||
static integer i__, m, n;
|
||||
static real alpha;
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int smake_();
|
||||
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
extern /* Subroutine */ int smvch_();
|
||||
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
|
||||
static logical reset;
|
||||
static integer incxs, incys;
|
||||
static char trans[1];
|
||||
static integer ia, ib, ic;
|
||||
static logical banded;
|
||||
static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns;
|
||||
extern /* Subroutine */ int csgbmv_(), csgemv_();
|
||||
extern /* Subroutine */ void csgbmv_(integer*, char*, integer*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen);
|
||||
extern /* Subroutine */ void csgemv_(integer*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen);
|
||||
static char ctrans[14];
|
||||
static real errmax;
|
||||
extern logical lseres_();
|
||||
extern logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen ltype_len, ftnlen uplo_len);
|
||||
static real transl;
|
||||
static char transs[1];
|
||||
static integer laa, lda;
|
||||
static real als, bls;
|
||||
extern logical lse_();
|
||||
extern logical lse_(real*, real*, integer*);
|
||||
static real err;
|
||||
static integer iku, kls, kus;
|
||||
|
||||
|
|
@ -1552,21 +1294,7 @@ L140:
|
|||
|
||||
} /* schk1_ */
|
||||
|
||||
/* Subroutine */ int schk2_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
|
||||
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nkb, *kb, *nalf;
|
||||
real *alf;
|
||||
integer *nbet;
|
||||
real *bet;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int schk2_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, real* alf, integer* nbet, real* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1585,9 +1313,9 @@ ftnlen sname_len;
|
|||
static integer i__, k, n;
|
||||
static real alpha;
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int smake_();
|
||||
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
extern /* Subroutine */ int smvch_();
|
||||
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
static integer incxs, incys;
|
||||
|
|
@ -1598,13 +1326,14 @@ ftnlen sname_len;
|
|||
static logical packed;
|
||||
static integer nk, ks, ix, iy, ns, lx, ly;
|
||||
static real errmax;
|
||||
extern logical lseres_();
|
||||
extern /* Subroutine */ int cssbmv_();
|
||||
extern logical lseres_(char* , char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cssbmv_(integer*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen);
|
||||
static real transl;
|
||||
extern /* Subroutine */ int csspmv_(), cssymv_();
|
||||
extern /* Subroutine */ void csspmv_(integer*, char*, integer*, real*, real*, real*, integer*, real*, real*, integer*, ftnlen);
|
||||
extern /* Subroutine */ void cssymv_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen);
|
||||
static integer laa, lda;
|
||||
static real als, bls;
|
||||
extern logical lse_();
|
||||
extern logical lse_(real*, real*, integer*);
|
||||
static real err;
|
||||
|
||||
/* Tests SSYMV, SSBMV and SSPMV. */
|
||||
|
|
@ -2003,17 +1732,7 @@ L130:
|
|||
|
||||
} /* schk2_ */
|
||||
|
||||
/* Subroutine */ int schk3_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, xt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax;
|
||||
real *a, *aa, *as, *x, *xx, *xs, *xt, *g, *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int schk3_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* xt, real* g, real* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -2034,9 +1753,9 @@ ftnlen sname_len;
|
|||
static integer i__, k, n;
|
||||
static char diags[1];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int smake_();
|
||||
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
extern /* Subroutine */ int smvch_();
|
||||
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
static integer incxs;
|
||||
|
|
@ -2047,14 +1766,17 @@ ftnlen sname_len;
|
|||
static integer nk, ks, ix, ns, lx;
|
||||
static char ctrans[14];
|
||||
static real errmax;
|
||||
extern logical lseres_();
|
||||
extern /* Subroutine */ int cstbmv_();
|
||||
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cstbmv_(integer*, char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static real transl;
|
||||
extern /* Subroutine */ int cstbsv_();
|
||||
extern /* Subroutine */ void cstbsv_(integer*, char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static char transs[1];
|
||||
extern /* Subroutine */ int cstpmv_(), cstrmv_(), cstpsv_(), cstrsv_();
|
||||
extern /* Subroutine */ void cstpmv_(integer*, char*, char*, char*, integer*, real*, real*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cstrmv_(integer*, char*, char*, char*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cstpsv_(integer*, char*, char*, char*, integer*, real*, real*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cstrsv_(integer*, char*, char*, char*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static integer laa, icd, lda, ict, icu;
|
||||
extern logical lse_();
|
||||
extern logical lse_(real*, real*, integer*);
|
||||
static real err;
|
||||
|
||||
/* Tests STRMV, STBMV, STPMV, STRSV, STBSV and STPSV. */
|
||||
|
|
@ -2508,19 +2230,7 @@ L130:
|
|||
|
||||
} /* schk3_ */
|
||||
|
||||
/* Subroutine */ int schk4_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
real *alf;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int schk4_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
|
||||
|
|
@ -2533,17 +2243,18 @@ ftnlen sname_len;
|
|||
static integer i__, j, m, n;
|
||||
static real alpha, w[1];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int smake_(), csger_();
|
||||
/* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void csger_(integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, integer*);
|
||||
static integer nargs;
|
||||
extern /* Subroutine */ int smvch_();
|
||||
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
|
||||
static logical reset;
|
||||
static integer incxs, incys, ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly;
|
||||
static real errmax;
|
||||
extern logical lseres_();
|
||||
extern logical lseres_(char* , char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
|
||||
static real transl;
|
||||
static integer laa, lda;
|
||||
static real als;
|
||||
extern logical lse_();
|
||||
extern logical lse_(real*, real*, integer*);
|
||||
static real err;
|
||||
|
||||
/* Tests SGER. */
|
||||
|
|
@ -2848,19 +2559,7 @@ L150:
|
|||
|
||||
} /* schk4_ */
|
||||
|
||||
/* Subroutine */ int schk5_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
real *alf;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int schk5_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -2880,25 +2579,25 @@ ftnlen sname_len;
|
|||
static integer i__, j, n;
|
||||
static real alpha, w[1];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int smake_();
|
||||
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
extern /* Subroutine */ int smvch_();
|
||||
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
static integer incxs;
|
||||
extern /* Subroutine */ int csspr_();
|
||||
extern /* Subroutine */ void csspr_(integer*, char*, integer*, real*, real*, integer*, real*, ftnlen);
|
||||
static logical upper;
|
||||
static char uplos[1];
|
||||
extern /* Subroutine */ int cssyr_();
|
||||
extern /* Subroutine */ void cssyr_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, ftnlen);
|
||||
static integer ia, ja, ic, nc, jj, lj, in;
|
||||
static logical packed;
|
||||
static integer ix, ns, lx;
|
||||
static real errmax;
|
||||
extern logical lseres_();
|
||||
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
|
||||
static real transl;
|
||||
static integer laa, lda;
|
||||
static real als;
|
||||
extern logical lse_();
|
||||
extern logical lse_(real*, real*, integer*);
|
||||
static real err;
|
||||
|
||||
/* Tests SSYR and SSPR. */
|
||||
|
|
@ -3218,19 +2917,7 @@ L130:
|
|||
|
||||
} /* schk5_ */
|
||||
|
||||
/* Subroutine */ int schk6_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
real *alf;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int schk6_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -3249,26 +2936,26 @@ ftnlen sname_len;
|
|||
static integer i__, j, n;
|
||||
static real alpha, w[2];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int smake_();
|
||||
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
extern /* Subroutine */ int smvch_();
|
||||
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
static integer incxs, incys;
|
||||
static logical upper;
|
||||
static char uplos[1];
|
||||
static integer ia, ja, ic;
|
||||
extern /* Subroutine */ int csspr2_();
|
||||
extern /* Subroutine */ void csspr2_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, real*, ftnlen);
|
||||
static integer nc, jj, lj, in;
|
||||
static logical packed;
|
||||
extern /* Subroutine */ int cssyr2_();
|
||||
extern /* Subroutine */ void cssyr2_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, real*, integer*, ftnlen);
|
||||
static integer ix, iy, ns, lx, ly;
|
||||
static real errmax;
|
||||
extern logical lseres_();
|
||||
extern logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen ltype_len, ftnlen uplo_len);
|
||||
static real transl;
|
||||
static integer laa, lda;
|
||||
static real als;
|
||||
extern logical lse_();
|
||||
extern logical lse_(real*, real*, integer*);
|
||||
static real err;
|
||||
|
||||
/* Tests SSYR2 and SSPR2. */
|
||||
|
|
@ -3634,26 +3321,14 @@ L170:
|
|||
|
||||
} /* schk6_ */
|
||||
|
||||
/* Subroutine */ int smake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl,
|
||||
ku, reset, transl, type_len, uplo_len, diag_len)
|
||||
char *type__, *uplo, *diag;
|
||||
integer *m, *n;
|
||||
real *a;
|
||||
integer *nmax;
|
||||
real *aa;
|
||||
integer *lda, *kl, *ku;
|
||||
logical *reset;
|
||||
real *transl;
|
||||
ftnlen type_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen diag_len;
|
||||
{
|
||||
/* Subroutine */ int smake_(char* type__, char* uplo, char* diag, integer* m, integer* n, real* a, integer* nmax, real* aa, integer* lda, integer* kl, integer* ku, logical* reset, real* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
|
||||
|
||||
/* Local variables */
|
||||
static integer ibeg, iend;
|
||||
extern doublereal sbeg_();
|
||||
extern doublereal sbeg_(logical*);
|
||||
static integer ioff;
|
||||
static logical unit;
|
||||
static integer i__, j;
|
||||
|
|
@ -3879,28 +3554,14 @@ ftnlen diag_len;
|
|||
|
||||
} /* smake_ */
|
||||
|
||||
/* Subroutine */ int smvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y,
|
||||
incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len)
|
||||
char *trans;
|
||||
integer *m, *n;
|
||||
real *alpha, *a;
|
||||
integer *nmax;
|
||||
real *x;
|
||||
integer *incx;
|
||||
real *beta, *y;
|
||||
integer *incy;
|
||||
real *yt, *g, *yy, *eps, *err;
|
||||
logical *fatal;
|
||||
integer *nout;
|
||||
logical *mv;
|
||||
ftnlen trans_len;
|
||||
/* Subroutine */ int smvch_(char* trans, integer* m, integer* n, real* alpha, real* a, integer* nmax, real* x, integer* incx, real* beta, real* y, integer* incy, real* yt, real* g, real* yy, real* eps, real* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, i__1, i__2;
|
||||
real r__1;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt();
|
||||
double sqrt(double);
|
||||
|
||||
/* Local variables */
|
||||
static real erri;
|
||||
|
|
@ -4029,9 +3690,7 @@ L70:
|
|||
|
||||
} /* smvch_ */
|
||||
|
||||
logical lse_(ri, rj, lr)
|
||||
real *ri, *rj;
|
||||
integer *lr;
|
||||
logical lse_(real* ri, real* rj, integer* lr)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer i__1;
|
||||
|
|
@ -4076,13 +3735,7 @@ L30:
|
|||
|
||||
} /* lse_ */
|
||||
|
||||
logical lseres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
|
||||
char *type__, *uplo;
|
||||
integer *m, *n;
|
||||
real *aa, *as;
|
||||
integer *lda;
|
||||
ftnlen type_len;
|
||||
ftnlen uplo_len;
|
||||
logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen ltype_len, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2;
|
||||
|
|
@ -4169,8 +3822,7 @@ L80:
|
|||
|
||||
} /* lseres_ */
|
||||
|
||||
doublereal sbeg_(reset)
|
||||
logical *reset;
|
||||
doublereal sbeg_(logical* reset)
|
||||
{
|
||||
/* System generated locals */
|
||||
real ret_val;
|
||||
|
|
@ -4221,8 +3873,7 @@ L10:
|
|||
|
||||
} /* sbeg_ */
|
||||
|
||||
doublereal sdiff_(x, y)
|
||||
real *x, *y;
|
||||
doublereal sdiff_(real* x, real* y)
|
||||
{
|
||||
/* System generated locals */
|
||||
real ret_val;
|
||||
|
|
|
|||
|
|
@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
#if 0
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
#endif
|
||||
/* -- translated by f2c (version 20000121).
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* Common Block Declarations */
|
||||
|
|
@ -393,7 +270,7 @@ static logical c_true = TRUE_;
|
|||
static integer c__0 = 0;
|
||||
static logical c_false = FALSE_;
|
||||
|
||||
/* Main program MAIN__() */ int main()
|
||||
/* Main program MAIN__() */ int main(void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -402,26 +279,25 @@ static logical c_false = FALSE_;
|
|||
/* System generated locals */
|
||||
integer i__1, i__2, i__3;
|
||||
real r__1;
|
||||
/* Builtin functions */
|
||||
integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(),
|
||||
e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe();
|
||||
integer f_clos();
|
||||
|
||||
/* Local variables */
|
||||
static integer nalf, idim[9];
|
||||
static logical same;
|
||||
static integer nbet, ntra;
|
||||
static logical rewi;
|
||||
extern /* Subroutine */ int schk1_(), schk2_(), schk3_(), schk4_(),
|
||||
schk5_();
|
||||
extern /* Subroutine */ int schk1_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int schk2_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int schk3_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int schk4_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int schk5_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
|
||||
static real c__[4225] /* was [65][65] */, g[65];
|
||||
static integer i__, j, n;
|
||||
static logical fatal;
|
||||
static real w[130];
|
||||
extern doublereal sdiff_();
|
||||
extern doublereal sdiff_(real*, real*);
|
||||
static logical trace;
|
||||
static integer nidim;
|
||||
extern /* Subroutine */ int smmch_();
|
||||
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static char snaps[32];
|
||||
static integer isnum;
|
||||
static logical ltest[6];
|
||||
|
|
@ -433,9 +309,9 @@ static logical c_false = FALSE_;
|
|||
static logical rorder;
|
||||
static integer layout;
|
||||
static logical ltestt, tsterr;
|
||||
extern /* Subroutine */ int cs3chke_();
|
||||
extern /* Subroutine */ void cs3chke_(char*, ftnlen);
|
||||
static real alf[7], bet[7];
|
||||
extern logical lse_();
|
||||
extern logical lse_(real*, real*, integer*);
|
||||
static real eps, err;
|
||||
char tmpchar;
|
||||
|
||||
|
|
@ -899,21 +775,7 @@ L230:
|
|||
|
||||
} /* MAIN__ */
|
||||
|
||||
/* Subroutine */ int schk1_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
|
||||
c__, cc, cs, ct, g, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
real *alf;
|
||||
integer *nbet;
|
||||
real *bet;
|
||||
integer *nmax;
|
||||
real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int schk1_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -923,8 +785,6 @@ ftnlen sname_len;
|
|||
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
|
||||
i__3, i__4, i__5, i__6;
|
||||
|
||||
/* Builtin functions */
|
||||
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
|
||||
|
||||
/* Local variables */
|
||||
static real beta;
|
||||
|
|
@ -936,18 +796,17 @@ ftnlen sname_len;
|
|||
static logical trana, tranb;
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
extern /* Subroutine */ void sprcn1_();
|
||||
extern /* Subroutine */ int smake_();
|
||||
extern /* Subroutine */ int smmch_();
|
||||
extern /* Subroutine */ void sprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static integer ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns;
|
||||
extern /* Subroutine */ int csgemm_();
|
||||
extern /* Subroutine */ void csgemm_(integer*, char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen);
|
||||
static char tranas[1], tranbs[1], transa[1], transb[1];
|
||||
static real errmax;
|
||||
extern logical lseres_();
|
||||
extern logical lse_();
|
||||
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
|
||||
extern logical lse_(real*, real*, integer*);
|
||||
static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc;
|
||||
static real als, bls;
|
||||
extern logical lse_();
|
||||
static real err;
|
||||
|
||||
/* Tests SGEMM. */
|
||||
|
|
@ -1278,23 +1137,8 @@ L130:
|
|||
|
||||
|
||||
|
||||
/* Subroutine */ void sprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k,
|
||||
alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *transa, *transb;
|
||||
integer *m, *n, *k;
|
||||
real *alpha;
|
||||
integer *lda, *ldb;
|
||||
real *beta;
|
||||
integer *ldc;
|
||||
ftnlen sname_len;
|
||||
ftnlen transa_len;
|
||||
ftnlen transb_len;
|
||||
/* Subroutine */ void sprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len)
|
||||
{
|
||||
/* Builtin functions */
|
||||
integer s_wsfe(), do_fio(), e_wsfe();
|
||||
|
||||
/* Local variables */
|
||||
static char crc[14], cta[14], ctb[14];
|
||||
|
|
@ -1324,21 +1168,7 @@ ftnlen transb_len;
|
|||
} /* sprcn1_ */
|
||||
|
||||
|
||||
/* Subroutine */ int schk2_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
|
||||
c__, cc, cs, ct, g, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
real *alf;
|
||||
integer *nbet;
|
||||
real *bet;
|
||||
integer *nmax;
|
||||
real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int schk2_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1349,8 +1179,6 @@ ftnlen sname_len;
|
|||
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
|
||||
i__3, i__4, i__5;
|
||||
|
||||
/* Builtin functions */
|
||||
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
|
||||
|
||||
/* Local variables */
|
||||
static real beta;
|
||||
|
|
@ -1368,15 +1196,15 @@ ftnlen sname_len;
|
|||
static char uplos[1];
|
||||
static integer ia, ib, na, nc, im, in, ms, ns;
|
||||
static real errmax;
|
||||
extern logical lseres_();
|
||||
extern /* Subroutine */ int cssymm_();
|
||||
extern void sprcn2_();
|
||||
extern int smake_();
|
||||
extern int smmch_();
|
||||
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cssymm_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen);
|
||||
extern void sprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static integer laa, lbb, lda, lcc, ldb, ldc, ics;
|
||||
static real als, bls;
|
||||
static integer icu;
|
||||
extern logical lse_();
|
||||
extern logical lse_(real*, real*, integer*);
|
||||
static real err;
|
||||
|
||||
/* Tests SSYMM. */
|
||||
|
|
@ -1685,23 +1513,8 @@ L120:
|
|||
} /* schk2_ */
|
||||
|
||||
|
||||
/* Subroutine */ void sprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha,
|
||||
lda, ldb, beta, ldc, sname_len, side_len, uplo_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *side, *uplo;
|
||||
integer *m, *n;
|
||||
real *alpha;
|
||||
integer *lda, *ldb;
|
||||
real *beta;
|
||||
integer *ldc;
|
||||
ftnlen sname_len;
|
||||
ftnlen side_len;
|
||||
ftnlen uplo_len;
|
||||
/* Subroutine */ void sprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len)
|
||||
{
|
||||
/* Builtin functions */
|
||||
integer s_wsfe(), do_fio(), e_wsfe();
|
||||
|
||||
/* Local variables */
|
||||
static char cs[14], cu[14], crc[14];
|
||||
|
|
@ -1726,19 +1539,7 @@ ftnlen uplo_len;
|
|||
} /* sprcn2_ */
|
||||
|
||||
|
||||
/* Subroutine */ int schk3_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__,
|
||||
iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
real *alf;
|
||||
integer *nmax;
|
||||
real *a, *aa, *as, *b, *bb, *bs, *ct, *g, *c__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int schk3_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* ct, real* g, real* c__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1751,8 +1552,6 @@ ftnlen sname_len;
|
|||
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
|
||||
i__3, i__4, i__5;
|
||||
|
||||
/* Builtin functions */
|
||||
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
|
||||
|
||||
/* Local variables */
|
||||
static char diag[1];
|
||||
|
|
@ -1769,18 +1568,19 @@ ftnlen sname_len;
|
|||
static integer nargs;
|
||||
static logical reset;
|
||||
static char uplos[1];
|
||||
extern /* Subroutine */ void sprcn3_();
|
||||
extern /* Subroutine */ void sprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, real*, integer*, integer*, ftnlen , ftnlen, ftnlen, ftnlen, ftnlen);
|
||||
static integer ia, na, nc, im, in, ms, ns;
|
||||
static char tranas[1], transa[1];
|
||||
static real errmax;
|
||||
extern int smake_();
|
||||
extern int smmch_();
|
||||
extern logical lseres_();
|
||||
extern /* Subroutine */ int cstrmm_(), cstrsm_();
|
||||
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cstrmm_(integer*, char*, char*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cstrsm_(integer*, char*, char*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
|
||||
static integer laa, icd, lbb, lda, ldb, ics;
|
||||
static real als;
|
||||
static integer ict, icu;
|
||||
extern logical lse_();
|
||||
extern logical lse_(real*, real*, integer*);
|
||||
static real err;
|
||||
|
||||
/* Tests STRMM and STRSM. */
|
||||
|
|
@ -2155,24 +1955,8 @@ L160:
|
|||
} /* schk3_ */
|
||||
|
||||
|
||||
/* Subroutine */ void sprcn3_(nout, nc, sname, iorder, side, uplo, transa,
|
||||
diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len,
|
||||
transa_len, diag_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *side, *uplo, *transa, *diag;
|
||||
integer *m, *n;
|
||||
real *alpha;
|
||||
integer *lda, *ldb;
|
||||
ftnlen sname_len;
|
||||
ftnlen side_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen transa_len;
|
||||
ftnlen diag_len;
|
||||
/* Subroutine */ void sprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, real* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len)
|
||||
{
|
||||
/* Builtin functions */
|
||||
integer s_wsfe(), do_fio(), e_wsfe();
|
||||
|
||||
/* Local variables */
|
||||
static char ca[14], cd[14], cs[14], cu[14], crc[14];
|
||||
|
|
@ -2210,21 +1994,7 @@ ftnlen diag_len;
|
|||
} /* sprcn3_ */
|
||||
|
||||
|
||||
/* Subroutine */ int schk4_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
|
||||
c__, cc, cs, ct, g, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
real *alf;
|
||||
integer *nbet;
|
||||
real *bet;
|
||||
integer *nmax;
|
||||
real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int schk4_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -2235,8 +2005,6 @@ ftnlen sname_len;
|
|||
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
|
||||
i__3, i__4, i__5;
|
||||
|
||||
/* Builtin functions */
|
||||
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
|
||||
|
||||
/* Local variables */
|
||||
static real beta;
|
||||
|
|
@ -2253,18 +2021,18 @@ ftnlen sname_len;
|
|||
static char trans[1];
|
||||
static logical upper;
|
||||
static char uplos[1];
|
||||
extern /* Subroutine */ void sprcn4_();
|
||||
extern /* Subroutine */ int smake_();
|
||||
extern /* Subroutine */ int smmch_();
|
||||
extern /* Subroutine */ void sprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns;
|
||||
static real errmax;
|
||||
extern logical lseres_();
|
||||
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
|
||||
static char transs[1];
|
||||
extern /* Subroutine */ int cssyrk_();
|
||||
extern /* Subroutine */ void cssyrk_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen);
|
||||
static integer laa, lda, lcc, ldc;
|
||||
static real als;
|
||||
static integer ict, icu;
|
||||
extern logical lse_();
|
||||
extern logical lse_(real*, real*, integer*);
|
||||
static real err;
|
||||
|
||||
/* Tests SSYRK. */
|
||||
|
|
@ -2575,23 +2343,8 @@ L130:
|
|||
} /* schk4_ */
|
||||
|
||||
|
||||
/* Subroutine */ void sprcn4_(nout, nc, sname, iorder, uplo, transa, n, k,
|
||||
alpha, lda, beta, ldc, sname_len, uplo_len, transa_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *uplo, *transa;
|
||||
integer *n, *k;
|
||||
real *alpha;
|
||||
integer *lda;
|
||||
real *beta;
|
||||
integer *ldc;
|
||||
ftnlen sname_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen transa_len;
|
||||
/* Subroutine */ void sprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, real* alpha, integer* lda, real* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
|
||||
{
|
||||
/* Builtin functions */
|
||||
integer s_wsfe(), do_fio(), e_wsfe();
|
||||
|
||||
/* Local variables */
|
||||
static char ca[14], cu[14], crc[14];
|
||||
|
|
@ -2619,21 +2372,7 @@ ftnlen transa_len;
|
|||
} /* sprcn4_ */
|
||||
|
||||
|
||||
/* Subroutine */ int schk5_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs,
|
||||
c__, cc, cs, ct, g, w, iorder, sname_len)
|
||||
char *sname;
|
||||
real *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
real *alf;
|
||||
integer *nbet;
|
||||
real *bet;
|
||||
integer *nmax;
|
||||
real *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct, *g, *w;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int schk5_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* ab, real* aa, real* as, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, real* w, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -2643,8 +2382,6 @@ ftnlen sname_len;
|
|||
/* System generated locals */
|
||||
integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8;
|
||||
|
||||
/* Builtin functions */
|
||||
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
|
||||
|
||||
/* Local variables */
|
||||
static integer jjab;
|
||||
|
|
@ -2663,18 +2400,18 @@ ftnlen sname_len;
|
|||
static logical upper;
|
||||
static char uplos[1];
|
||||
static integer ia, ib;
|
||||
extern /* Subroutine */ void sprcn5_();
|
||||
extern /* Subroutine */ void sprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static integer jc, ma, na, nc, ik, in, jj, lj, ks, ns;
|
||||
static real errmax;
|
||||
extern logical lseres_();
|
||||
extern int smake_();
|
||||
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
|
||||
static char transs[1];
|
||||
static integer laa, lbb, lda, lcc, ldb, ldc;
|
||||
static real als;
|
||||
static integer ict, icu;
|
||||
extern /* Subroutine */ int cssyr2k_();
|
||||
extern logical lse_();
|
||||
extern int smmch_();
|
||||
extern /* Subroutine */ void cssyr2k_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen);
|
||||
extern logical lse_(real*, real*, integer*);
|
||||
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static real err;
|
||||
|
||||
/* Tests SSYR2K. */
|
||||
|
|
@ -3037,23 +2774,8 @@ L160:
|
|||
} /* schk5_ */
|
||||
|
||||
|
||||
/* Subroutine */ void sprcn5_(nout, nc, sname, iorder, uplo, transa, n, k,
|
||||
alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *uplo, *transa;
|
||||
integer *n, *k;
|
||||
real *alpha;
|
||||
integer *lda, *ldb;
|
||||
real *beta;
|
||||
integer *ldc;
|
||||
ftnlen sname_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen transa_len;
|
||||
/* Subroutine */ void sprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
|
||||
{
|
||||
/* Builtin functions */
|
||||
integer s_wsfe(), do_fio(), e_wsfe();
|
||||
|
||||
/* Local variables */
|
||||
static char ca[14], cu[14], crc[14];
|
||||
|
|
@ -3081,19 +2803,7 @@ ftnlen transa_len;
|
|||
} /* sprcn5_ */
|
||||
|
||||
|
||||
/* Subroutine */ int smake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset,
|
||||
transl, type_len, uplo_len, diag_len)
|
||||
char *type__, *uplo, *diag;
|
||||
integer *m, *n;
|
||||
real *a;
|
||||
integer *nmax;
|
||||
real *aa;
|
||||
integer *lda;
|
||||
logical *reset;
|
||||
real *transl;
|
||||
ftnlen type_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen diag_len;
|
||||
/* Subroutine */ int smake_(char* type__, char* uplo, char* diag, integer* m, integer* n, real* a, integer* nmax, real* aa, integer* lda, logical* reset, real* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, i__1, i__2;
|
||||
|
|
@ -3102,7 +2812,7 @@ ftnlen diag_len;
|
|||
|
||||
/* Local variables */
|
||||
static integer ibeg, iend;
|
||||
extern doublereal sbeg_();
|
||||
extern doublereal sbeg_(logical*);
|
||||
static logical unit;
|
||||
static integer i__, j;
|
||||
static logical lower, upper, gen, tri, sym;
|
||||
|
|
@ -3233,25 +2943,7 @@ ftnlen diag_len;
|
|||
|
||||
} /* smake_ */
|
||||
|
||||
/* Subroutine */ int smmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb,
|
||||
beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv,
|
||||
transa_len, transb_len)
|
||||
char *transa, *transb;
|
||||
integer *m, *n, *kk;
|
||||
real *alpha, *a;
|
||||
integer *lda;
|
||||
real *b;
|
||||
integer *ldb;
|
||||
real *beta, *c__;
|
||||
integer *ldc;
|
||||
real *ct, *g, *cc;
|
||||
integer *ldcc;
|
||||
real *eps, *err;
|
||||
logical *fatal;
|
||||
integer *nout;
|
||||
logical *mv;
|
||||
ftnlen transa_len;
|
||||
ftnlen transb_len;
|
||||
/* Subroutine */ int smmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, real* alpha, real* a, integer* lda, real* b, integer* ldb, real* beta, real* c__, integer* ldc, real* ct, real* g, real* cc, integer* ldcc, real* eps, real* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len)
|
||||
{
|
||||
|
||||
/* System generated locals */
|
||||
|
|
@ -3260,8 +2952,7 @@ ftnlen transb_len;
|
|||
real r__1, r__2;
|
||||
|
||||
/* Builtin functions */
|
||||
double sqrt();
|
||||
integer s_wsfe(), e_wsfe(), do_fio();
|
||||
double sqrt(double);
|
||||
|
||||
/* Local variables */
|
||||
static real erri;
|
||||
|
|
@ -3426,9 +3117,7 @@ L150:
|
|||
|
||||
} /* smmch_ */
|
||||
|
||||
logical lse_(ri, rj, lr)
|
||||
real *ri, *rj;
|
||||
integer *lr;
|
||||
logical lse_(real* ri, real* rj, integer* lr)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer i__1;
|
||||
|
|
@ -3475,13 +3164,7 @@ L30:
|
|||
|
||||
} /* lse_ */
|
||||
|
||||
logical lseres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
|
||||
char *type__, *uplo;
|
||||
integer *m, *n;
|
||||
real *aa, *as;
|
||||
integer *lda;
|
||||
ftnlen type_len;
|
||||
ftnlen uplo_len;
|
||||
logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2;
|
||||
|
|
@ -3572,8 +3255,7 @@ L80:
|
|||
|
||||
} /* lseres_ */
|
||||
|
||||
doublereal sbeg_(reset)
|
||||
logical *reset;
|
||||
doublereal sbeg_(logical* reset)
|
||||
{
|
||||
/* System generated locals */
|
||||
real ret_val;
|
||||
|
|
@ -3625,8 +3307,7 @@ L10:
|
|||
|
||||
} /* sbeg_ */
|
||||
|
||||
doublereal sdiff_(x, y)
|
||||
real *x, *y;
|
||||
doublereal sdiff_(real* x, real* y)
|
||||
{
|
||||
/* System generated locals */
|
||||
real ret_val;
|
||||
|
|
|
|||
|
|
@ -242,250 +242,6 @@ typedef struct Namelist Namelist;
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
#if 0
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
#endif
|
||||
#if 0
|
||||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i]) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i]) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Common Block Declarations */
|
||||
|
||||
|
|
@ -502,16 +258,16 @@ static integer c__1 = 1;
|
|||
static integer c__5 = 5;
|
||||
static doublereal c_b43 = 1.;
|
||||
|
||||
/* Main program */ int main()
|
||||
/* Main program */ int main(void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
static doublereal sfac = 9.765625e-4;
|
||||
|
||||
/* Local variables */
|
||||
extern /* Subroutine */ int check1_(), check2_();
|
||||
extern /* Subroutine */ int check1_(doublereal*), check2_(doublereal*);
|
||||
static integer ic;
|
||||
extern /* Subroutine */ int header_();
|
||||
extern /* Subroutine */ int header_(void);
|
||||
|
||||
/* Test program for the COMPLEX*16 Level 1 CBLAS. */
|
||||
/* Based upon the original CBLAS test routine together with: */
|
||||
|
|
@ -551,7 +307,7 @@ static doublereal c_b43 = 1.;
|
|||
exit(0);
|
||||
} /* MAIN__ */
|
||||
|
||||
/* Subroutine */ int header_()
|
||||
/* Subroutine */ int header_(void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -570,8 +326,7 @@ static doublereal c_b43 = 1.;
|
|||
|
||||
} /* header_ */
|
||||
|
||||
/* Subroutine */ int check1_(sfac)
|
||||
doublereal *sfac;
|
||||
/* Subroutine */ int check1_(doublereal* sfac)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -623,15 +378,15 @@ doublereal *sfac;
|
|||
|
||||
/* Local variables */
|
||||
static integer i__;
|
||||
extern /* Subroutine */ int ctest_();
|
||||
extern /* Subroutine */ int ctest_(integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*);
|
||||
static doublecomplex mwpcs[5], mwpct[5];
|
||||
extern /* Subroutine */ int zscaltest_(), itest1_(), stest1_();
|
||||
extern /* Subroutine */ int zscaltest_(integer*, doublereal*, doublecomplex*, integer*), itest1_(integer*, integer*), stest1_(doublereal*, doublereal*, doublereal*, doublereal*);
|
||||
static doublecomplex cx[8];
|
||||
extern doublereal dznrm2test_();
|
||||
extern doublereal dznrm2test_(integer*, doublecomplex*, integer*);
|
||||
static integer np1;
|
||||
extern /* Subroutine */ int zdscaltest_();
|
||||
extern integer izamaxtest_();
|
||||
extern doublereal dzasumtest_();
|
||||
extern /* Subroutine */ int zdscaltest_(integer*, doublereal*, doublecomplex*, integer*);
|
||||
extern integer izamaxtest_(integer*, doublecomplex*, integer*);
|
||||
extern doublereal dzasumtest_(integer*, doublecomplex*, integer*);
|
||||
static integer len;
|
||||
|
||||
/* .. Parameters .. */
|
||||
|
|
@ -748,8 +503,7 @@ doublereal *sfac;
|
|||
return 0;
|
||||
} /* check1_ */
|
||||
|
||||
/* Subroutine */ int check2_(sfac)
|
||||
doublereal *sfac;
|
||||
/* Subroutine */ int check2_(doublereal* sfac)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -834,14 +588,14 @@ doublereal *sfac;
|
|||
/* Local variables */
|
||||
static doublecomplex cdot[1];
|
||||
static integer lenx, leny, i__;
|
||||
extern /* Subroutine */ int ctest_();
|
||||
extern /* Subroutine */ int ctest_(integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*);
|
||||
static integer ksize;
|
||||
static doublecomplex ztemp;
|
||||
extern /* Subroutine */ int zdotctest_(), zcopytest_();
|
||||
extern /* Subroutine */ int zdotctest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*), zcopytest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*);
|
||||
static integer ki;
|
||||
extern /* Subroutine */ int zdotutest_(), zswaptest_();
|
||||
extern /* Subroutine */ int zdotutest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*), zswaptest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*);
|
||||
static integer kn;
|
||||
extern /* Subroutine */ int zaxpytest_();
|
||||
extern /* Subroutine */ int zaxpytest_(integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, integer*);
|
||||
static doublecomplex cx[7], cy[7];
|
||||
static integer mx, my;
|
||||
|
||||
|
|
@ -923,20 +677,18 @@ doublereal *sfac;
|
|||
return 0;
|
||||
} /* check2_ */
|
||||
|
||||
/* Subroutine */ int stest_(len, scomp, strue, ssize, sfac)
|
||||
integer *len;
|
||||
doublereal *scomp, *strue, *ssize, *sfac;
|
||||
/* Subroutine */ int stest_(integer* len, doublereal* scomp, doublereal* strue, doublereal* ssize, doublereal* sfac)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer i__1;
|
||||
doublereal d__1, d__2, d__3, d__4, d__5;
|
||||
|
||||
/* Builtin functions */
|
||||
integer s_wsfe(), e_wsfe(), do_fio();
|
||||
integer s_wsfe(void), e_wsfe(void), do_fio(void);
|
||||
|
||||
/* Local variables */
|
||||
static integer i__;
|
||||
extern doublereal sdiff_();
|
||||
extern doublereal sdiff_(doublereal*, doublereal*);
|
||||
static doublereal sd;
|
||||
|
||||
/* ********************************* STEST ************************** */
|
||||
|
|
@ -992,11 +744,10 @@ L40:
|
|||
|
||||
} /* stest_ */
|
||||
|
||||
/* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac)
|
||||
doublereal *scomp1, *strue1, *ssize, *sfac;
|
||||
/* Subroutine */ int stest1_(doublereal* scomp1, doublereal* strue1, doublereal* ssize, doublereal* sfac)
|
||||
{
|
||||
static doublereal scomp[1], strue[1];
|
||||
extern /* Subroutine */ int stest_();
|
||||
extern /* Subroutine */ int stest_(integer*,doublereal*, doublereal*, doublereal*, doublereal*);
|
||||
|
||||
/* ************************* STEST1 ***************************** */
|
||||
|
||||
|
|
@ -1023,8 +774,7 @@ doublereal *scomp1, *strue1, *ssize, *sfac;
|
|||
return 0;
|
||||
} /* stest1_ */
|
||||
|
||||
doublereal sdiff_(sa, sb)
|
||||
doublereal *sa, *sb;
|
||||
doublereal sdiff_(doublereal* sa, doublereal* sb)
|
||||
{
|
||||
/* System generated locals */
|
||||
doublereal ret_val;
|
||||
|
|
@ -1038,10 +788,7 @@ doublereal *sa, *sb;
|
|||
return ret_val;
|
||||
} /* sdiff_ */
|
||||
|
||||
/* Subroutine */ int ctest_(len, ccomp, ctrue, csize, sfac)
|
||||
integer *len;
|
||||
doublecomplex *ccomp, *ctrue, *csize;
|
||||
doublereal *sfac;
|
||||
/* Subroutine */ int ctest_(integer* len, doublecomplex* ccomp, doublecomplex* ctrue, doublecomplex* csize, doublereal* sfac)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer i__1, i__2;
|
||||
|
|
@ -1049,7 +796,7 @@ doublereal *sfac;
|
|||
/* Local variables */
|
||||
static integer i__;
|
||||
static doublereal scomp[20], ssize[20], strue[20];
|
||||
extern /* Subroutine */ int stest_();
|
||||
extern /* Subroutine */ int stest_(integer*, doublereal*, doublereal*, doublereal*, doublereal*);
|
||||
|
||||
/* **************************** CTEST ***************************** */
|
||||
|
||||
|
|
@ -1087,8 +834,7 @@ doublereal *sfac;
|
|||
return 0;
|
||||
} /* ctest_ */
|
||||
|
||||
/* Subroutine */ int itest1_(icomp, itrue)
|
||||
integer *icomp, *itrue;
|
||||
/* Subroutine */ int itest1_(integer* icomp, integer* itrue)
|
||||
{
|
||||
static integer id;
|
||||
|
||||
|
|
|
|||
|
|
@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
#if 0
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
#endif
|
||||
/* -- translated by f2c (version 20000121).
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* Common Block Declarations */
|
||||
|
|
@ -396,7 +273,7 @@ static integer c_n1 = -1;
|
|||
static integer c__0 = 0;
|
||||
static logical c_false = FALSE_;
|
||||
|
||||
/* Main program */ int main()
|
||||
/* Main program */ int main(void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -414,19 +291,23 @@ static logical c_false = FALSE_;
|
|||
static logical same;
|
||||
static integer ninc, nbet, ntra;
|
||||
static logical rewi;
|
||||
extern /* Subroutine */ int zchk1_(), zchk2_(), zchk3_(), zchk4_(),
|
||||
zchk5_(), zchk6_();
|
||||
extern /* Subroutine */ int zchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int zchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int zchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int zchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int zchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int zchk6_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
|
||||
static doublecomplex a[4225] /* was [65][65] */;
|
||||
static doublereal g[65];
|
||||
static integer i__, j;
|
||||
extern doublereal ddiff_();
|
||||
extern doublereal ddiff_(doublereal*, doublereal*);
|
||||
static integer n;
|
||||
static logical fatal;
|
||||
static doublecomplex x[65], y[65], z__[130];
|
||||
static logical trace;
|
||||
static integer nidim;
|
||||
static char snaps[32], trans[1];
|
||||
extern /* Subroutine */ int zmvch_();
|
||||
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
|
||||
static integer isnum;
|
||||
static logical ltest[17];
|
||||
static doublecomplex aa[4225];
|
||||
|
|
@ -441,12 +322,12 @@ static logical c_false = FALSE_;
|
|||
static logical rorder;
|
||||
static integer layout;
|
||||
static logical ltestt, tsterr;
|
||||
extern /* Subroutine */ int cz2chke_();
|
||||
extern /* Subroutine */ void cz2chke_(char*, ftnlen);
|
||||
static doublecomplex alf[7];
|
||||
static integer inc[7], nkb;
|
||||
static doublecomplex bet[7];
|
||||
static doublereal eps, err;
|
||||
extern logical lze_();
|
||||
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
|
||||
char tmpchar;
|
||||
|
||||
/* Test program for the DOUBLE PRECISION COMPLEX Level 2 Blas. */
|
||||
|
|
@ -984,22 +865,7 @@ L240:
|
|||
|
||||
} /* MAIN__ */
|
||||
|
||||
/* Subroutine */ int zchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
|
||||
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nkb, *kb, *nalf;
|
||||
doublecomplex *alf;
|
||||
integer *nbet;
|
||||
doublecomplex *bet;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
|
||||
doublereal *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int zchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1018,27 +884,27 @@ ftnlen sname_len;
|
|||
static integer i__, m, n;
|
||||
static doublecomplex alpha;
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int zmake_();
|
||||
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static integer incxs, incys;
|
||||
static char trans[1];
|
||||
extern /* Subroutine */ int zmvch_();
|
||||
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
|
||||
static integer ia, ib, ic;
|
||||
static logical banded;
|
||||
static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns;
|
||||
extern /* Subroutine */ int czgbmv_();
|
||||
extern /* Subroutine */ void czgbmv_(integer*, char*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
|
||||
static char ctrans[14];
|
||||
extern /* Subroutine */ int czgemv_();
|
||||
extern /* Subroutine */ void czgemv_(integer*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
|
||||
static doublereal errmax;
|
||||
static doublecomplex transl;
|
||||
extern logical lzeres_();
|
||||
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
static char transs[1];
|
||||
static integer laa, lda;
|
||||
static doublecomplex als, bls;
|
||||
static doublereal err;
|
||||
static integer iku, kls;
|
||||
extern logical lze_();
|
||||
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
|
||||
static integer kus;
|
||||
|
||||
|
||||
|
|
@ -1451,22 +1317,7 @@ L140:
|
|||
|
||||
} /* zchk1_ */
|
||||
|
||||
/* Subroutine */ int zchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
|
||||
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nkb, *kb, *nalf;
|
||||
doublecomplex *alf;
|
||||
integer *nbet;
|
||||
doublecomplex *bet;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
|
||||
doublereal *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int zchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1486,27 +1337,28 @@ ftnlen sname_len;
|
|||
static integer i__, k, n;
|
||||
static doublecomplex alpha;
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int zmake_();
|
||||
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
static integer incxs, incys;
|
||||
extern /* Subroutine */ int zmvch_();
|
||||
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
|
||||
static char uplos[1];
|
||||
static integer ia, ib, ic;
|
||||
static logical banded;
|
||||
static integer nc, ik, in;
|
||||
static logical packed;
|
||||
static integer nk, ks, ix, iy, ns, lx, ly;
|
||||
extern /* Subroutine */ int czhbmv_(), czhemv_();
|
||||
extern /* Subroutine */ void czhbmv_(integer*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
|
||||
extern /* Subroutine */ void czhemv_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
|
||||
static doublereal errmax;
|
||||
static doublecomplex transl;
|
||||
extern logical lzeres_();
|
||||
extern /* Subroutine */ int czhpmv_();
|
||||
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void czhpmv_(integer*, char*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
|
||||
static integer laa, lda;
|
||||
static doublecomplex als, bls;
|
||||
static doublereal err;
|
||||
extern logical lze_();
|
||||
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
|
||||
|
||||
/* Tests CHEMV, CHBMV and CHPMV. */
|
||||
|
||||
|
|
@ -1909,19 +1761,7 @@ L130:
|
|||
|
||||
} /* zchk2_ */
|
||||
|
||||
/* Subroutine */ int zchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, xt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax;
|
||||
doublecomplex *a, *aa, *as, *x, *xx, *xs, *xt;
|
||||
doublereal *g;
|
||||
doublecomplex *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int zchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* xt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1942,13 +1782,13 @@ ftnlen sname_len;
|
|||
static integer i__, k, n;
|
||||
static char diags[1];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int zmake_();
|
||||
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
static integer incxs;
|
||||
static char trans[1];
|
||||
extern /* Subroutine */ int zmvch_();
|
||||
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
|
||||
static char uplos[1];
|
||||
static logical banded;
|
||||
static integer nc, ik, in;
|
||||
|
|
@ -1957,14 +1797,17 @@ ftnlen sname_len;
|
|||
static char ctrans[14];
|
||||
static doublereal errmax;
|
||||
static doublecomplex transl;
|
||||
extern logical lzeres_();
|
||||
extern /* Subroutine */ int cztbmv_();
|
||||
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cztbmv_(integer*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static char transs[1];
|
||||
extern /* Subroutine */ int cztbsv_(), cztpmv_(), cztrmv_(), cztpsv_(),
|
||||
cztrsv_();
|
||||
extern /* Subroutine */ void cztbsv_(integer*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cztpmv_(integer*, char*, char*, char*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cztpsv_(integer*, char*, char*, char*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cztrmv_(integer*, char*, char*, char*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cztrsv_(integer*, char*, char*, char*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static integer laa, icd, lda, ict, icu;
|
||||
static doublereal err;
|
||||
extern logical lze_();
|
||||
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
|
||||
|
||||
|
||||
|
||||
|
|
@ -2422,21 +2265,7 @@ L130:
|
|||
|
||||
} /* zchk3_ */
|
||||
|
||||
/* Subroutine */ int zchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublecomplex *alf;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
|
||||
doublereal *g;
|
||||
doublecomplex *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int zchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
|
||||
|
|
@ -2450,21 +2279,21 @@ ftnlen sname_len;
|
|||
static integer i__, j, m, n;
|
||||
static doublecomplex alpha, w[1];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int zmake_();
|
||||
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static integer incxs, incys;
|
||||
extern /* Subroutine */ int zmvch_();
|
||||
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
|
||||
static integer ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly;
|
||||
extern /* Subroutine */ int czgerc_();
|
||||
extern /* Subroutine */ void czgerc_(integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*);
|
||||
static doublereal errmax;
|
||||
extern /* Subroutine */ int czgeru_();
|
||||
extern /* Subroutine */ void czgeru_(integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*);
|
||||
static doublecomplex transl;
|
||||
extern logical lzeres_();
|
||||
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
static integer laa, lda;
|
||||
static doublecomplex als;
|
||||
static doublereal err;
|
||||
extern logical lze_();
|
||||
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
|
||||
|
||||
|
||||
|
||||
|
|
@ -2793,21 +2622,7 @@ L150:
|
|||
|
||||
} /* zchk4_ */
|
||||
|
||||
/* Subroutine */ int zchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublecomplex *alf;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
|
||||
doublereal *g;
|
||||
doublecomplex *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int zchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -2827,13 +2642,14 @@ ftnlen sname_len;
|
|||
static integer i__, j, n;
|
||||
static doublecomplex alpha, w[1];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int zmake_();
|
||||
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
extern /* Subroutine */ int czher_();
|
||||
extern /* Subroutine */ void czher_(integer*, char*, integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen);
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
static integer incxs;
|
||||
extern /* Subroutine */ int czhpr_(), zmvch_();
|
||||
extern /* Subroutine */ void czhpr_(integer*, char*, integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, ftnlen);
|
||||
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
|
||||
static logical upper;
|
||||
static char uplos[1];
|
||||
static integer ia, ja, ic, nc, jj, lj, in;
|
||||
|
|
@ -2841,10 +2657,10 @@ ftnlen sname_len;
|
|||
static integer ix, ns, lx;
|
||||
static doublereal ralpha, errmax;
|
||||
static doublecomplex transl;
|
||||
extern logical lzeres_();
|
||||
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
static integer laa, lda;
|
||||
static doublereal err;
|
||||
extern logical lze_();
|
||||
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
|
||||
|
||||
/* Tests ZHER and ZHPR. */
|
||||
|
||||
|
|
@ -3167,21 +2983,7 @@ L130:
|
|||
|
||||
} /* zchk5_ */
|
||||
|
||||
/* Subroutine */ int zchk6_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
|
||||
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublecomplex *alf;
|
||||
integer *ninc, *inc, *nmax, *incmax;
|
||||
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
|
||||
doublereal *g;
|
||||
doublecomplex *z__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int zchk6_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -3201,25 +3003,26 @@ ftnlen sname_len;
|
|||
static integer i__, j, n;
|
||||
static doublecomplex alpha, w[2];
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int zmake_();
|
||||
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
static logical reset;
|
||||
static char cuplo[14];
|
||||
static integer incxs, incys;
|
||||
extern /* Subroutine */ int zmvch_();
|
||||
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
|
||||
static logical upper;
|
||||
static char uplos[1];
|
||||
extern /* Subroutine */ int czher2_(), czhpr2_();
|
||||
extern /* Subroutine */ void czher2_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen);
|
||||
extern /* Subroutine */ void czhpr2_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, ftnlen);
|
||||
static integer ia, ja, ic, nc, jj, lj, in;
|
||||
static logical packed;
|
||||
static integer ix, iy, ns, lx, ly;
|
||||
static doublereal errmax;
|
||||
static doublecomplex transl;
|
||||
extern logical lzeres_();
|
||||
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
static integer laa, lda;
|
||||
static doublecomplex als;
|
||||
static doublereal err;
|
||||
extern logical lze_();
|
||||
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
|
||||
|
||||
/* Tests ZHER2 and ZHPR2. */
|
||||
|
||||
|
|
@ -3604,24 +3407,7 @@ L170:
|
|||
|
||||
} /* zchk6_ */
|
||||
|
||||
/* Subroutine */ int zmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y,
|
||||
incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len)
|
||||
char *trans;
|
||||
integer *m, *n;
|
||||
doublecomplex *alpha, *a;
|
||||
integer *nmax;
|
||||
doublecomplex *x;
|
||||
integer *incx;
|
||||
doublecomplex *beta, *y;
|
||||
integer *incy;
|
||||
doublecomplex *yt;
|
||||
doublereal *g;
|
||||
doublecomplex *yy;
|
||||
doublereal *eps, *err;
|
||||
logical *fatal;
|
||||
integer *nout;
|
||||
logical *mv;
|
||||
ftnlen trans_len;
|
||||
/* Subroutine */ int zmvch_(char* trans, integer* m, integer* n, doublecomplex* alpha, doublecomplex* a, integer* nmax, doublecomplex* x, integer* incx, doublecomplex* beta, doublecomplex* y, integer* incy, doublecomplex* yt, doublereal* g, doublecomplex* yy, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len)
|
||||
{
|
||||
|
||||
/* System generated locals */
|
||||
|
|
@ -3819,9 +3605,7 @@ L80:
|
|||
|
||||
} /* zmvch_ */
|
||||
|
||||
logical lze_(ri, rj, lr)
|
||||
doublecomplex *ri, *rj;
|
||||
integer *lr;
|
||||
logical lze_(doublecomplex* ri, doublecomplex* rj, integer* lr)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer i__1, i__2, i__3;
|
||||
|
|
@ -3868,13 +3652,7 @@ L30:
|
|||
|
||||
} /* lze_ */
|
||||
|
||||
logical lzeres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
|
||||
char *type__, *uplo;
|
||||
integer *m, *n;
|
||||
doublecomplex *aa, *as;
|
||||
integer *lda;
|
||||
ftnlen type_len;
|
||||
ftnlen uplo_len;
|
||||
logical lzeres_(char* type__, char* uplo, integer* m, integer* n, doublecomplex* aa, doublecomplex* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4;
|
||||
|
|
@ -3967,9 +3745,7 @@ L80:
|
|||
|
||||
} /* lzeres_ */
|
||||
|
||||
/* Double Complex */ VOID zbeg_( ret_val, reset)
|
||||
doublecomplex * ret_val;
|
||||
logical *reset;
|
||||
/* Double Complex */ VOID zbeg_( doublecomplex* ret_val, logical* reset)
|
||||
{
|
||||
/* System generated locals */
|
||||
doublereal d__1, d__2;
|
||||
|
|
@ -4030,8 +3806,7 @@ L10:
|
|||
|
||||
} /* zbeg_ */
|
||||
|
||||
doublereal ddiff_(x, y)
|
||||
doublereal *x, *y;
|
||||
doublereal ddiff_(doublereal* x, doublereal* y)
|
||||
{
|
||||
/* System generated locals */
|
||||
doublereal ret_val;
|
||||
|
|
@ -4051,19 +3826,7 @@ doublereal *x, *y;
|
|||
|
||||
} /* ddiff_ */
|
||||
|
||||
/* Subroutine */ int zmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl,
|
||||
ku, reset, transl, type_len, uplo_len, diag_len)
|
||||
char *type__, *uplo, *diag;
|
||||
integer *m, *n;
|
||||
doublecomplex *a;
|
||||
integer *nmax;
|
||||
doublecomplex *aa;
|
||||
integer *lda, *kl, *ku;
|
||||
logical *reset;
|
||||
doublecomplex *transl;
|
||||
ftnlen type_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen diag_len;
|
||||
/* Subroutine */ int zmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublecomplex* a, integer* nmax, doublecomplex* aa, integer* lda, integer* kl, integer* ku, logical* reset, doublecomplex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
|
||||
|
|
@ -4072,7 +3835,7 @@ ftnlen diag_len;
|
|||
|
||||
/* Local variables */
|
||||
static integer ibeg, iend, ioff;
|
||||
extern /* Double Complex */ VOID zbeg_();
|
||||
extern /* Double Complex */ VOID zbeg_(doublecomplex*, logical*);
|
||||
static logical unit;
|
||||
static integer i__, j;
|
||||
static logical lower;
|
||||
|
|
|
|||
|
|
@ -22,14 +22,11 @@ typedef double doublereal;
|
|||
typedef struct { real r, i; } complex;
|
||||
typedef struct { doublereal r, i; } doublecomplex;
|
||||
#ifdef _MSC_VER
|
||||
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
|
||||
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
|
||||
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
|
||||
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
|
||||
#else
|
||||
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
|
||||
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
|
||||
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
|
||||
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
|
||||
#endif
|
||||
#define pCf(z) (*_pCf(z))
|
||||
|
|
@ -242,124 +239,7 @@ typedef struct Namelist Namelist;
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
#if 0
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* Common Block Declarations */
|
||||
|
||||
|
|
@ -388,7 +268,7 @@ static logical c_true = TRUE_;
|
|||
static integer c__0 = 0;
|
||||
static logical c_false = FALSE_;
|
||||
|
||||
/* Main program MAIN__() */ int main()
|
||||
/* Main program MAIN__() */ int main(void)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -400,26 +280,29 @@ static logical c_false = FALSE_;
|
|||
doublereal d__1;
|
||||
|
||||
/* Builtin functions */
|
||||
integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(),
|
||||
e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe();
|
||||
integer s_rsle(void), do_lio(void), e_rsle(void), f_open(void), s_wsfe(void), do_fio(void),
|
||||
e_wsfe(void), s_wsle(void), e_wsle(void), s_rsfe(void), e_rsfe(void);
|
||||
|
||||
/* Local variables */
|
||||
static integer nalf, idim[9];
|
||||
static logical same;
|
||||
static integer nbet, ntra;
|
||||
static logical rewi;
|
||||
extern /* Subroutine */ int zchk1_(), zchk2_(), zchk3_(), zchk4_(),
|
||||
zchk5_();
|
||||
extern /* Subroutine */ int zchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int zchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int zchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int zchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
|
||||
extern /* Subroutine */ int zchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
|
||||
static doublecomplex c__[4225] /* was [65][65] */;
|
||||
static doublereal g[65];
|
||||
static integer i__, j;
|
||||
extern doublereal ddiff_();
|
||||
extern doublereal ddiff_(doublereal*, doublereal*);
|
||||
static integer n;
|
||||
static logical fatal;
|
||||
static doublecomplex w[130];
|
||||
static logical trace;
|
||||
static integer nidim;
|
||||
extern /* Subroutine */ int zmmch_();
|
||||
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static char snaps[32];
|
||||
static integer isnum;
|
||||
static logical ltest[9];
|
||||
|
|
@ -431,10 +314,10 @@ static logical c_false = FALSE_;
|
|||
static logical rorder;
|
||||
static integer layout;
|
||||
static logical ltestt, tsterr;
|
||||
extern /* Subroutine */ int cz3chke_();
|
||||
extern /* Subroutine */ int cz3chke_(char*, ftnlen);
|
||||
static doublecomplex alf[7], bet[7];
|
||||
static doublereal eps, err;
|
||||
extern logical lze_();
|
||||
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
|
||||
char tmpchar;
|
||||
|
||||
/* Test program for the COMPLEX*16 Level 3 Blas. */
|
||||
|
|
@ -924,22 +807,7 @@ L230:
|
|||
|
||||
} /* MAIN__ */
|
||||
|
||||
/* Subroutine */ int zchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
|
||||
c__, cc, cs, ct, g, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublecomplex *alf;
|
||||
integer *nbet;
|
||||
doublecomplex *bet;
|
||||
integer *nmax;
|
||||
doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct;
|
||||
doublereal *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int zchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -956,21 +824,21 @@ ftnlen sname_len;
|
|||
static integer i__, k, m, n;
|
||||
static doublecomplex alpha;
|
||||
static logical isame[13], trana, tranb;
|
||||
extern /* Subroutine */ int zmake_();
|
||||
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
extern /* Subroutine */ int zmmch_();
|
||||
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static logical reset;
|
||||
static integer ia, ib;
|
||||
extern /* Subroutine */ int zprcn1_();
|
||||
extern /* Subroutine */ int zprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static integer ma, mb, na, nb, nc, ik, im, in, ks, ms, ns;
|
||||
extern /* Subroutine */ int czgemm_();
|
||||
extern /* Subroutine */ void czgemm_(integer*, char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
static char tranas[1], tranbs[1], transa[1], transb[1];
|
||||
static doublereal errmax;
|
||||
extern logical lzeres_();
|
||||
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc;
|
||||
static doublecomplex als, bls;
|
||||
static doublereal err;
|
||||
extern logical lze_();
|
||||
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
|
||||
|
||||
/* Tests ZGEMM. */
|
||||
|
||||
|
|
@ -1313,20 +1181,7 @@ L130:
|
|||
} /* zchk1_ */
|
||||
|
||||
|
||||
/* Subroutine */ int zprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k,
|
||||
alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *transa, *transb;
|
||||
integer *m, *n, *k;
|
||||
doublecomplex *alpha;
|
||||
integer *lda, *ldb;
|
||||
doublecomplex *beta;
|
||||
integer *ldc;
|
||||
ftnlen sname_len;
|
||||
ftnlen transa_len;
|
||||
ftnlen transb_len;
|
||||
/* Subroutine */ int zprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len)
|
||||
{
|
||||
/* Local variables */
|
||||
static char crc[14], cta[14], ctb[14];
|
||||
|
|
@ -1357,22 +1212,7 @@ return 0;
|
|||
} /* zprcn1_ */
|
||||
|
||||
|
||||
/* Subroutine */ int zchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
|
||||
c__, cc, cs, ct, g, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublecomplex *alf;
|
||||
integer *nbet;
|
||||
doublecomplex *bet;
|
||||
integer *nmax;
|
||||
doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct;
|
||||
doublereal *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int zchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1394,23 +1234,23 @@ ftnlen sname_len;
|
|||
static doublecomplex alpha;
|
||||
static logical isame[13];
|
||||
static char sides[1];
|
||||
extern /* Subroutine */ int zmake_();
|
||||
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
extern /* Subroutine */ int zmmch_();
|
||||
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static logical reset;
|
||||
static char uplos[1];
|
||||
static integer ia, ib;
|
||||
extern /* Subroutine */ int zprcn2_();
|
||||
extern /* Subroutine */ int zprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static integer na, nc, im, in, ms, ns;
|
||||
extern /* Subroutine */ int czhemm_();
|
||||
extern /* Subroutine */ void czhemm_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
static doublereal errmax;
|
||||
extern logical lzeres_();
|
||||
extern /* Subroutine */ int czsymm_();
|
||||
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void czsymm_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
static integer laa, lbb, lda, lcc, ldb, ldc, ics;
|
||||
static doublecomplex als, bls;
|
||||
static integer icu;
|
||||
static doublereal err;
|
||||
extern logical lze_();
|
||||
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
|
||||
|
||||
/* Tests ZHEMM and ZSYMM. */
|
||||
|
||||
|
|
@ -1737,20 +1577,7 @@ L120:
|
|||
} /* zchk2_ */
|
||||
|
||||
|
||||
/* Subroutine */ int zprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha,
|
||||
lda, ldb, beta, ldc, sname_len, side_len, uplo_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *side, *uplo;
|
||||
integer *m, *n;
|
||||
doublecomplex *alpha;
|
||||
integer *lda, *ldb;
|
||||
doublecomplex *beta;
|
||||
integer *ldc;
|
||||
ftnlen sname_len;
|
||||
ftnlen side_len;
|
||||
ftnlen uplo_len;
|
||||
/* Subroutine */ int zprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len)
|
||||
{
|
||||
/* Local variables */
|
||||
static char cs[14], cu[14], crc[14];
|
||||
|
|
@ -1777,21 +1604,7 @@ return 0;
|
|||
} /* zprcn2_ */
|
||||
|
||||
|
||||
/* Subroutine */ int zchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__,
|
||||
iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublecomplex *alf;
|
||||
integer *nmax;
|
||||
doublecomplex *a, *aa, *as, *b, *bb, *bs, *ct;
|
||||
doublereal *g;
|
||||
doublecomplex *c__;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int zchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* ct, doublereal* g, doublecomplex* c__, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -1817,23 +1630,24 @@ ftnlen sname_len;
|
|||
static char diags[1];
|
||||
static logical isame[13];
|
||||
static char sides[1];
|
||||
extern /* Subroutine */ int zmake_();
|
||||
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
extern /* Subroutine */ int zmmch_();
|
||||
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static logical reset;
|
||||
static char uplos[1];
|
||||
static integer ia, na;
|
||||
extern /* Subroutine */ int zprcn3_();
|
||||
extern /* Subroutine */ int zprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, ftnlen, ftnlen, ftnlen, ftnlen, ftnlen);
|
||||
static integer nc, im, in, ms, ns;
|
||||
static char tranas[1], transa[1];
|
||||
static doublereal errmax;
|
||||
extern logical lzeres_();
|
||||
extern /* Subroutine */ int cztrmm_(), cztrsm_();
|
||||
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cztrmm_(integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ void cztrsm_(integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
|
||||
static integer laa, icd, lbb, lda, ldb, ics;
|
||||
static doublecomplex als;
|
||||
static integer ict, icu;
|
||||
static doublereal err;
|
||||
extern logical lze_();
|
||||
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
|
||||
|
||||
/* Tests ZTRMM and ZTRSM. */
|
||||
|
||||
|
|
@ -2227,21 +2041,7 @@ L160:
|
|||
} /* zchk3_ */
|
||||
|
||||
|
||||
/* Subroutine */ int zprcn3_(nout, nc, sname, iorder, side, uplo, transa,
|
||||
diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len,
|
||||
transa_len, diag_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *side, *uplo, *transa, *diag;
|
||||
integer *m, *n;
|
||||
doublecomplex *alpha;
|
||||
integer *lda, *ldb;
|
||||
ftnlen sname_len;
|
||||
ftnlen side_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen transa_len;
|
||||
ftnlen diag_len;
|
||||
/* Subroutine */ int zprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, doublecomplex* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len)
|
||||
{
|
||||
|
||||
/* Local variables */
|
||||
|
|
@ -2281,22 +2081,7 @@ return 0;
|
|||
} /* zprcn3_ */
|
||||
|
||||
|
||||
/* Subroutine */ int zchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
|
||||
c__, cc, cs, ct, g, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublecomplex *alf;
|
||||
integer *nbet;
|
||||
doublecomplex *bet;
|
||||
integer *nmax;
|
||||
doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct;
|
||||
doublereal *g;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int zchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -2320,30 +2105,30 @@ ftnlen sname_len;
|
|||
static doublecomplex alpha;
|
||||
static doublereal rbeta;
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int zmake_();
|
||||
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
extern /* Subroutine */ int zmmch_();
|
||||
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static doublereal rbets;
|
||||
static logical reset;
|
||||
static char trans[1];
|
||||
static logical upper;
|
||||
static char uplos[1];
|
||||
static integer ia, ib, jc, ma, na;
|
||||
extern /* Subroutine */ int zprcn4_();
|
||||
extern /* Subroutine */ int zprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nc;
|
||||
extern /* Subroutine */ int zprcn6_();
|
||||
extern /* Subroutine */ int zprcn6_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static integer ik, in, jj, lj, ks, ns;
|
||||
static doublereal ralpha;
|
||||
extern /* Subroutine */ int czherk_();
|
||||
extern /* Subroutine */ int czherk_(integer*, char*, char*, integer*, integer*, doublereal*, doublecomplex*, integer*, doublereal*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
static doublereal errmax;
|
||||
extern logical lzeres_();
|
||||
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
static char transs[1], transt[1];
|
||||
extern /* Subroutine */ int czsyrk_();
|
||||
extern /* Subroutine */ int czsyrk_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
static integer laa, lda, lcc, ldc;
|
||||
static doublecomplex als;
|
||||
static integer ict, icu;
|
||||
static doublereal err;
|
||||
extern logical lze_();
|
||||
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
|
||||
|
||||
/* Tests ZHERK and ZSYRK. */
|
||||
|
||||
|
|
@ -2732,20 +2517,7 @@ L130:
|
|||
} /* zchk4_ */
|
||||
|
||||
|
||||
/* Subroutine */ int zprcn4_(nout, nc, sname, iorder, uplo, transa, n, k,
|
||||
alpha, lda, beta, ldc, sname_len, uplo_len, transa_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *uplo, *transa;
|
||||
integer *n, *k;
|
||||
doublecomplex *alpha;
|
||||
integer *lda;
|
||||
doublecomplex *beta;
|
||||
integer *ldc;
|
||||
ftnlen sname_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen transa_len;
|
||||
/* Subroutine */ int zprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
|
||||
{
|
||||
/* Local variables */
|
||||
static char ca[14], cu[14], crc[14];
|
||||
|
|
@ -2775,20 +2547,7 @@ return 0;
|
|||
|
||||
|
||||
|
||||
/* Subroutine */ int zprcn6_(nout, nc, sname, iorder, uplo, transa, n, k,
|
||||
alpha, lda, beta, ldc, sname_len, uplo_len, transa_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *uplo, *transa;
|
||||
integer *n, *k;
|
||||
doublereal *alpha;
|
||||
integer *lda;
|
||||
doublereal *beta;
|
||||
integer *ldc;
|
||||
ftnlen sname_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen transa_len;
|
||||
/* Subroutine */ int zprcn6_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
|
||||
{
|
||||
|
||||
/* Local variables */
|
||||
|
|
@ -2818,23 +2577,7 @@ return 0;
|
|||
} /* zprcn6_ */
|
||||
|
||||
|
||||
/* Subroutine */ int zchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
|
||||
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs,
|
||||
c__, cc, cs, ct, g, w, iorder, sname_len)
|
||||
char *sname;
|
||||
doublereal *eps, *thresh;
|
||||
integer *nout, *ntra;
|
||||
logical *trace, *rewi, *fatal;
|
||||
integer *nidim, *idim, *nalf;
|
||||
doublecomplex *alf;
|
||||
integer *nbet;
|
||||
doublecomplex *bet;
|
||||
integer *nmax;
|
||||
doublecomplex *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct;
|
||||
doublereal *g;
|
||||
doublecomplex *w;
|
||||
integer *iorder;
|
||||
ftnlen sname_len;
|
||||
/* Subroutine */ int zchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* ab, doublecomplex* aa, doublecomplex* as, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, doublecomplex* w, integer* iorder, ftnlen sname_len)
|
||||
{
|
||||
/* Initialized data */
|
||||
|
||||
|
|
@ -2857,27 +2600,28 @@ ftnlen sname_len;
|
|||
static doublecomplex alpha;
|
||||
static doublereal rbeta;
|
||||
static logical isame[13];
|
||||
extern /* Subroutine */ int zmake_();
|
||||
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
|
||||
static integer nargs;
|
||||
extern /* Subroutine */ int zmmch_();
|
||||
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
|
||||
static doublereal rbets;
|
||||
static logical reset;
|
||||
static char trans[1];
|
||||
static logical upper;
|
||||
static char uplos[1];
|
||||
static integer ia, ib, jc, ma, na, nc;
|
||||
extern /* Subroutine */ int zprcn5_(), zprcn7_();
|
||||
extern /* Subroutine */ int zprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
extern /* Subroutine */ int zprcn7_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
|
||||
static integer ik, in, jj, lj, ks, ns;
|
||||
static doublereal errmax;
|
||||
extern logical lzeres_();
|
||||
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
static char transs[1], transt[1];
|
||||
extern /* Subroutine */ int czher2k_();
|
||||
extern /* Subroutine */ int czher2k_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublereal*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
static integer laa, lbb, lda, lcc, ldb, ldc;
|
||||
static doublecomplex als;
|
||||
static integer ict, icu;
|
||||
extern /* Subroutine */ int czsyr2k_();
|
||||
extern /* Subroutine */ int czsyr2k_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
|
||||
static doublereal err;
|
||||
extern logical lze_();
|
||||
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
|
||||
|
||||
/* Tests ZHER2K and ZSYR2K. */
|
||||
|
||||
|
|
@ -3349,20 +3093,7 @@ L160:
|
|||
} /* zchk5_ */
|
||||
|
||||
|
||||
/* Subroutine */ int zprcn5_(nout, nc, sname, iorder, uplo, transa, n, k,
|
||||
alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *uplo, *transa;
|
||||
integer *n, *k;
|
||||
doublecomplex *alpha;
|
||||
integer *lda, *ldb;
|
||||
doublecomplex *beta;
|
||||
integer *ldc;
|
||||
ftnlen sname_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen transa_len;
|
||||
/* Subroutine */ int zprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
|
||||
{
|
||||
/* Local variables */
|
||||
static char ca[14], cu[14], crc[14];
|
||||
|
|
@ -3392,20 +3123,7 @@ return 0;
|
|||
|
||||
|
||||
|
||||
/* Subroutine */ int zprcn7_(nout, nc, sname, iorder, uplo, transa, n, k,
|
||||
alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len)
|
||||
integer *nout, *nc;
|
||||
char *sname;
|
||||
integer *iorder;
|
||||
char *uplo, *transa;
|
||||
integer *n, *k;
|
||||
doublecomplex *alpha;
|
||||
integer *lda, *ldb;
|
||||
doublereal *beta;
|
||||
integer *ldc;
|
||||
ftnlen sname_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen transa_len;
|
||||
/* Subroutine */ int zprcn7_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
|
||||
{
|
||||
|
||||
/* Local variables */
|
||||
|
|
@ -3435,19 +3153,7 @@ return 0;
|
|||
} /* zprcn7_ */
|
||||
|
||||
|
||||
/* Subroutine */ int zmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset,
|
||||
transl, type_len, uplo_len, diag_len)
|
||||
char *type__, *uplo, *diag;
|
||||
integer *m, *n;
|
||||
doublecomplex *a;
|
||||
integer *nmax;
|
||||
doublecomplex *aa;
|
||||
integer *lda;
|
||||
logical *reset;
|
||||
doublecomplex *transl;
|
||||
ftnlen type_len;
|
||||
ftnlen uplo_len;
|
||||
ftnlen diag_len;
|
||||
/* Subroutine */ int zmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublecomplex* a, integer* nmax, doublecomplex* aa, integer* lda, logical* reset, doublecomplex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
|
||||
|
|
@ -3456,7 +3162,7 @@ ftnlen diag_len;
|
|||
|
||||
/* Local variables */
|
||||
static integer ibeg, iend;
|
||||
extern /* Double Complex */ VOID zbeg_();
|
||||
extern /* Double Complex */ VOID zbeg_(doublecomplex*, logical*);
|
||||
static logical unit;
|
||||
static integer i__, j;
|
||||
static logical lower, upper;
|
||||
|
|
@ -3629,27 +3335,7 @@ ftnlen diag_len;
|
|||
|
||||
} /* zmake_ */
|
||||
|
||||
/* Subroutine */ int zmmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb,
|
||||
beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv,
|
||||
transa_len, transb_len)
|
||||
char *transa, *transb;
|
||||
integer *m, *n, *kk;
|
||||
doublecomplex *alpha, *a;
|
||||
integer *lda;
|
||||
doublecomplex *b;
|
||||
integer *ldb;
|
||||
doublecomplex *beta, *c__;
|
||||
integer *ldc;
|
||||
doublecomplex *ct;
|
||||
doublereal *g;
|
||||
doublecomplex *cc;
|
||||
integer *ldcc;
|
||||
doublereal *eps, *err;
|
||||
logical *fatal;
|
||||
integer *nout;
|
||||
logical *mv;
|
||||
ftnlen transa_len;
|
||||
ftnlen transb_len;
|
||||
/* Subroutine */ int zmmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, doublecomplex* alpha, doublecomplex* a, integer* lda, doublecomplex* b, integer* ldb, doublecomplex* beta, doublecomplex* c__, integer* ldc, doublecomplex* ct, doublereal* g, doublecomplex* cc, integer* ldcc, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len)
|
||||
{
|
||||
|
||||
/* System generated locals */
|
||||
|
|
@ -3658,7 +3344,7 @@ ftnlen transb_len;
|
|||
doublereal d__1, d__2, d__3, d__4, d__5, d__6;
|
||||
doublecomplex z__1, z__2, z__3, z__4;
|
||||
|
||||
double sqrt();
|
||||
double sqrt(double);
|
||||
/* Local variables */
|
||||
static doublereal erri;
|
||||
static integer i__, j, k;
|
||||
|
|
@ -4031,9 +3717,7 @@ L250:
|
|||
|
||||
} /* zmmch_ */
|
||||
|
||||
logical lze_(ri, rj, lr)
|
||||
doublecomplex *ri, *rj;
|
||||
integer *lr;
|
||||
logical lze_(doublecomplex* ri, doublecomplex* rj, integer* lr)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer i__1, i__2, i__3;
|
||||
|
|
@ -4082,13 +3766,7 @@ L30:
|
|||
|
||||
} /* lze_ */
|
||||
|
||||
logical lzeres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
|
||||
char *type__, *uplo;
|
||||
integer *m, *n;
|
||||
doublecomplex *aa, *as;
|
||||
integer *lda;
|
||||
ftnlen type_len;
|
||||
ftnlen uplo_len;
|
||||
logical lzeres_(char* type__, char* uplo, integer* m, integer* n, doublecomplex *aa, doublecomplex* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4;
|
||||
|
|
@ -4184,9 +3862,7 @@ L80:
|
|||
|
||||
} /* lzeres_ */
|
||||
|
||||
/* Double Complex */ VOID zbeg_( ret_val, reset)
|
||||
doublecomplex * ret_val;
|
||||
logical *reset;
|
||||
/* Double Complex */ VOID zbeg_(doublecomplex* ret_val, logical* reset)
|
||||
{
|
||||
/* System generated locals */
|
||||
doublereal d__1, d__2;
|
||||
|
|
@ -4249,8 +3925,7 @@ L10:
|
|||
|
||||
} /* zbeg_ */
|
||||
|
||||
doublereal ddiff_(x, y)
|
||||
doublereal *x, *y;
|
||||
doublereal ddiff_(doublereal* x, doublereal* y)
|
||||
{
|
||||
/* System generated locals */
|
||||
doublereal ret_val;
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@
|
|||
#include <stdlib.h>
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
|
||||
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG nthreads) {
|
||||
|
||||
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||
BLASLONG range[MAX_CPU_NUMBER + 1];
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ static const int divide_rule[][2] =
|
|||
{ 1, 61}, { 2, 31}, { 7, 9}, { 8, 8},
|
||||
};
|
||||
|
||||
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
|
||||
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG nthreads) {
|
||||
|
||||
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@
|
|||
#include <stdlib.h>
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
|
||||
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *sa, void *sb, BLASLONG nthreads) {
|
||||
|
||||
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||
BLASLONG range[MAX_CPU_NUMBER + 1];
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@
|
|||
|
||||
int CNAME(int mode,
|
||||
blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
|
||||
int (*function)(), void *sa, void *sb, BLASLONG divM, BLASLONG divN) {
|
||||
int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG divM, BLASLONG divN) {
|
||||
|
||||
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@
|
|||
#include <math.h>
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
|
||||
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*, FLOAT *, FLOAT *, BLASLONG), void *sa, void *sb, BLASLONG nthreads) {
|
||||
|
||||
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||
BLASLONG range[MAX_CPU_NUMBER + 1];
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@
|
|||
int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
|
||||
void *a, BLASLONG lda,
|
||||
void *b, BLASLONG ldb,
|
||||
void *c, BLASLONG ldc, int (*function)(), int nthreads){
|
||||
void *c, BLASLONG ldc, int (*function)(void), int nthreads){
|
||||
|
||||
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||
blas_arg_t args [MAX_CPU_NUMBER];
|
||||
|
|
@ -141,7 +141,7 @@ int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha
|
|||
int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
|
||||
void *a, BLASLONG lda,
|
||||
void *b, BLASLONG ldb,
|
||||
void *c, BLASLONG ldc, int (*function)(), int nthreads){
|
||||
void *c, BLASLONG ldc, int (*function)(void), int nthreads){
|
||||
|
||||
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||
blas_arg_t args [MAX_CPU_NUMBER];
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
extern unsigned int openblas_thread_timeout();
|
||||
extern unsigned int openblas_thread_timeout(void);
|
||||
|
||||
#ifdef SMP_SERVER
|
||||
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@
|
|||
int blas_server_avail = 0;
|
||||
int blas_omp_number_max = 0;
|
||||
|
||||
extern int openblas_omp_adaptive_env();
|
||||
extern int openblas_omp_adaptive_env(void);
|
||||
|
||||
static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
|
||||
#ifdef HAVE_C11
|
||||
|
|
@ -79,7 +79,7 @@ static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
|
|||
static _Bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
|
||||
#endif
|
||||
|
||||
static void adjust_thread_buffers() {
|
||||
static void adjust_thread_buffers(void) {
|
||||
|
||||
int i=0, j=0;
|
||||
|
||||
|
|
@ -124,8 +124,17 @@ void openblas_set_num_threads(int num_threads) {
|
|||
}
|
||||
|
||||
int blas_thread_init(void){
|
||||
if(blas_omp_number_max <= 0)
|
||||
blas_omp_number_max = omp_get_max_threads();
|
||||
|
||||
#if defined(__FreeBSD__) && defined(__clang__)
|
||||
extern int openblas_omp_num_threads_env(void);
|
||||
|
||||
if(blas_omp_number_max <= 0)
|
||||
blas_omp_number_max= openblas_omp_num_threads_env();
|
||||
if (blas_omp_number_max <= 0)
|
||||
blas_omp_number_max=MAX_CPU_NUMBER;
|
||||
#else
|
||||
blas_omp_number_max = omp_get_max_threads();
|
||||
#endif
|
||||
|
||||
blas_get_cpu_number();
|
||||
|
||||
|
|
|
|||
|
|
@ -805,7 +805,8 @@ static gotoblas_t *get_coretype(void){
|
|||
}
|
||||
return NULL;
|
||||
}
|
||||
case 0xf:
|
||||
break;
|
||||
case 0xf:
|
||||
if (model <= 0x2) return &gotoblas_NORTHWOOD;
|
||||
return &gotoblas_PRESCOTT;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
extern gotoblas_t gotoblas_POWER6;
|
||||
extern gotoblas_t gotoblas_POWER8;
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
|
||||
extern gotoblas_t gotoblas_POWER9;
|
||||
#endif
|
||||
#ifdef HAVE_P10_SUPPORT
|
||||
|
|
@ -20,14 +20,14 @@ static char *corename[] = {
|
|||
"POWER10"
|
||||
};
|
||||
|
||||
#define NUM_CORETYPES 4
|
||||
#define NUM_CORETYPES 5
|
||||
|
||||
char *gotoblas_corename(void) {
|
||||
#ifndef C_PGI
|
||||
if (gotoblas == &gotoblas_POWER6) return corename[1];
|
||||
#endif
|
||||
if (gotoblas == &gotoblas_POWER8) return corename[2];
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
|
||||
if (gotoblas == &gotoblas_POWER9) return corename[3];
|
||||
#endif
|
||||
#ifdef HAVE_P10_SUPPORT
|
||||
|
|
@ -36,13 +36,37 @@ char *gotoblas_corename(void) {
|
|||
return corename[0];
|
||||
}
|
||||
|
||||
#if defined(__clang__)
|
||||
static int __builtin_cpu_supports(char* arg)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_POWER5 5
|
||||
#define CPU_POWER6 6
|
||||
#define CPU_POWER8 8
|
||||
#define CPU_POWER9 9
|
||||
#define CPU_POWER10 10
|
||||
|
||||
#ifdef _AIX
|
||||
#include <sys/systemcfg.h>
|
||||
|
||||
static int cpuid(void)
|
||||
{
|
||||
int arch = _system_configuration.implementation;
|
||||
#ifdef POWER_6
|
||||
if (arch == POWER_6) return CPU_POWER6;
|
||||
#endif
|
||||
#ifdef POWER_7
|
||||
else if (arch == POWER_7) return CPU_POWER6;
|
||||
#endif
|
||||
#ifdef POWER_8
|
||||
else if (arch == POWER_8) return CPU_POWER8;
|
||||
#endif
|
||||
#ifdef POWER_9
|
||||
else if (arch == POWER_9) return CPU_POWER9;
|
||||
#endif
|
||||
#ifdef POWER_10
|
||||
else if (arch == POWER_10) return CPU_POWER10;
|
||||
#endif
|
||||
return CPU_UNKNOWN;
|
||||
}
|
||||
#else
|
||||
#if defined(C_PGI) || defined(__clang__)
|
||||
/*
|
||||
* NV HPC compilers do not yet implement __builtin_cpu_is().
|
||||
|
|
@ -53,21 +77,12 @@ static int __builtin_cpu_supports(char* arg)
|
|||
* what was requested.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Define POWER processor version table.
|
||||
*
|
||||
* NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
|
||||
*/
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_POWER5 5
|
||||
#define CPU_POWER6 6
|
||||
#define CPU_POWER8 8
|
||||
#define CPU_POWER9 9
|
||||
#define CPU_POWER10 10
|
||||
|
||||
static struct {
|
||||
uint32_t pvr_mask;
|
||||
uint32_t pvr_value;
|
||||
|
|
@ -160,7 +175,8 @@ static struct {
|
|||
},
|
||||
};
|
||||
|
||||
static int __builtin_cpu_is(const char *cpu) {
|
||||
static int cpuid(void)
|
||||
{
|
||||
int i;
|
||||
uint32_t pvr;
|
||||
uint32_t cpu_type;
|
||||
|
|
@ -178,15 +194,46 @@ static int __builtin_cpu_is(const char *cpu) {
|
|||
pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type);
|
||||
#endif
|
||||
cpu_type = pvrPOWER[i].cpu_type;
|
||||
|
||||
if (!strcmp(cpu, "power8"))
|
||||
return cpu_type == CPU_POWER8;
|
||||
if (!strcmp(cpu, "power9"))
|
||||
return cpu_type == CPU_POWER9;
|
||||
return 0;
|
||||
return (int)(cpu_type);
|
||||
}
|
||||
|
||||
#endif /* C_PGI */
|
||||
#endif /* _AIX */
|
||||
|
||||
#ifndef __BUILTIN_CPU_SUPPORTS__
|
||||
#include <string.h>
|
||||
|
||||
#if defined(_AIX) || (defined(__has_builtin) && !__has_builtin(__builtin_cpu_is))
|
||||
static int __builtin_cpu_is(const char *arg)
|
||||
{
|
||||
static int ipinfo = -1;
|
||||
if (ipinfo < 0) {
|
||||
ipinfo = cpuid();
|
||||
}
|
||||
#ifdef HAVE_P10_SUPPORT
|
||||
if (ipinfo == CPU_POWER10) {
|
||||
if (!strcmp(arg, "power10")) return 1;
|
||||
}
|
||||
#endif
|
||||
if (ipinfo == CPU_POWER9) {
|
||||
if (!strcmp(arg, "power9")) return 1;
|
||||
} else if (ipinfo == CPU_POWER8) {
|
||||
if (!strcmp(arg, "power8")) return 1;
|
||||
#ifndef C_PGI
|
||||
} else if (ipinfo == CPU_POWER6) {
|
||||
if (!strcmp(arg, "power6")) return 1;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(_AIX) || (defined(__has_builtin) && !__has_builtin(__builtin_cpu_supports))
|
||||
static int __builtin_cpu_supports(const char *arg)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static gotoblas_t *get_coretype(void) {
|
||||
|
||||
|
|
@ -196,12 +243,16 @@ static gotoblas_t *get_coretype(void) {
|
|||
#endif
|
||||
if (__builtin_cpu_is("power8"))
|
||||
return &gotoblas_POWER8;
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
|
||||
if (__builtin_cpu_is("power9"))
|
||||
return &gotoblas_POWER9;
|
||||
#endif
|
||||
#ifdef HAVE_P10_SUPPORT
|
||||
#if defined(_AIX) || defined(__clang__)
|
||||
if (__builtin_cpu_is("power10"))
|
||||
#else
|
||||
if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma"))
|
||||
#endif
|
||||
return &gotoblas_POWER10;
|
||||
#endif
|
||||
/* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */
|
||||
|
|
@ -233,7 +284,7 @@ static gotoblas_t *force_coretype(char * coretype) {
|
|||
case 1: return (&gotoblas_POWER6);
|
||||
#endif
|
||||
case 2: return (&gotoblas_POWER8);
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
|
||||
case 3: return (&gotoblas_POWER9);
|
||||
#endif
|
||||
#ifdef HAVE_P10_SUPPORT
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ extern gotoblas_t gotoblas_Z14;
|
|||
|
||||
#define NUM_CORETYPES 4
|
||||
|
||||
extern int openblas_verbose();
|
||||
extern int openblas_verbose(void);
|
||||
extern void openblas_warning(int verbose, const char* msg);
|
||||
|
||||
char* gotoblas_corename(void) {
|
||||
|
|
|
|||
|
|
@ -73,6 +73,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#define NEW_BUFFERS 512
|
||||
#ifndef likely
|
||||
#ifdef __GNUC__
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
|
|
@ -426,9 +427,9 @@ int goto_get_num_procs (void) {
|
|||
return blas_cpu_number;
|
||||
}
|
||||
|
||||
static void blas_memory_init();
|
||||
static void blas_memory_init(void);
|
||||
|
||||
void openblas_fork_handler()
|
||||
void openblas_fork_handler(void)
|
||||
{
|
||||
// This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is
|
||||
// built with "make USE_OPENMP=0".
|
||||
|
|
@ -445,9 +446,9 @@ void openblas_fork_handler()
|
|||
#endif
|
||||
}
|
||||
|
||||
extern int openblas_num_threads_env();
|
||||
extern int openblas_goto_num_threads_env();
|
||||
extern int openblas_omp_num_threads_env();
|
||||
extern int openblas_num_threads_env(void);
|
||||
extern int openblas_goto_num_threads_env(void);
|
||||
extern int openblas_omp_num_threads_env(void);
|
||||
|
||||
int blas_get_cpu_number(void){
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_HAIKU)
|
||||
|
|
@ -591,7 +592,7 @@ static BLASULONG key_lock = 0UL;
|
|||
#endif
|
||||
|
||||
/* Returns a pointer to the start of the per-thread memory allocation data */
|
||||
static __inline struct alloc_t ** get_memory_table() {
|
||||
static __inline struct alloc_t ** get_memory_table(void) {
|
||||
#if defined(SMP)
|
||||
LOCK_COMMAND(&key_lock);
|
||||
lsk=local_storage_key;
|
||||
|
|
@ -1144,7 +1145,7 @@ static void blas_memory_cleanup(void* ptr){
|
|||
}
|
||||
}
|
||||
|
||||
static void blas_memory_init(){
|
||||
static void blas_memory_init(void){
|
||||
#if defined(SMP)
|
||||
# if defined(OS_WINDOWS)
|
||||
local_storage_key = TlsAlloc();
|
||||
|
|
@ -1501,7 +1502,7 @@ static void gotoblas_memory_init(void) {
|
|||
/* Initialization for all function; this function should be called before main */
|
||||
|
||||
static int gotoblas_initialized = 0;
|
||||
extern void openblas_read_env();
|
||||
extern void openblas_read_env(void);
|
||||
|
||||
void CONSTRUCTOR gotoblas_init(void) {
|
||||
|
||||
|
|
@ -1998,7 +1999,7 @@ int goto_get_num_procs (void) {
|
|||
return blas_cpu_number;
|
||||
}
|
||||
|
||||
void openblas_fork_handler()
|
||||
void openblas_fork_handler(void)
|
||||
{
|
||||
// This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is
|
||||
// built with "make USE_OPENMP=0".
|
||||
|
|
@ -2015,9 +2016,9 @@ void openblas_fork_handler()
|
|||
#endif
|
||||
}
|
||||
|
||||
extern int openblas_num_threads_env();
|
||||
extern int openblas_goto_num_threads_env();
|
||||
extern int openblas_omp_num_threads_env();
|
||||
extern int openblas_num_threads_env(void);
|
||||
extern int openblas_goto_num_threads_env(void);
|
||||
extern int openblas_omp_num_threads_env(void);
|
||||
|
||||
int blas_get_cpu_number(void){
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_HAIKU)
|
||||
|
|
@ -2897,7 +2898,7 @@ void *blas_memory_alloc(int procpos){
|
|||
#endif
|
||||
position ++;
|
||||
|
||||
} while (position < 512+NUM_BUFFERS);
|
||||
} while (position < NEW_BUFFERS + NUM_BUFFERS);
|
||||
}
|
||||
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
|
||||
UNLOCK_COMMAND(&alloc_lock);
|
||||
|
|
@ -3012,11 +3013,12 @@ void *blas_memory_alloc(int procpos){
|
|||
if (memory_overflowed) goto terminate;
|
||||
fprintf(stderr,"OpenBLAS warning: precompiled NUM_THREADS exceeded, adding auxiliary array for thread metadata.\n");
|
||||
fprintf(stderr,"To avoid this warning, please rebuild your copy of OpenBLAS with a larger NUM_THREADS setting\n");
|
||||
fprintf(stderr,"or set the environment variable OPENBLAS_NUM_THREADS to %d or lower\n", NUM_BUFFERS);
|
||||
fprintf(stderr,"or set the environment variable OPENBLAS_NUM_THREADS to %d or lower\n", MAX_CPU_NUMBER);
|
||||
memory_overflowed=1;
|
||||
new_release_info = (struct release_t*) malloc(512*sizeof(struct release_t));
|
||||
newmemory = (struct newmemstruct*) malloc(512*sizeof(struct newmemstruct));
|
||||
for (i = 0; i < 512; i++) {
|
||||
MB;
|
||||
new_release_info = (struct release_t*) malloc(NEW_BUFFERS * sizeof(struct release_t));
|
||||
newmemory = (struct newmemstruct*) malloc(NEW_BUFFERS * sizeof(struct newmemstruct));
|
||||
for (i = 0; i < NEW_BUFFERS; i++) {
|
||||
newmemory[i].addr = (void *)0;
|
||||
#if defined(WHEREAMI) && !defined(USE_OPENMP)
|
||||
newmemory[i].pos = -1;
|
||||
|
|
@ -3129,12 +3131,12 @@ void blas_memory_free(void *free_area){
|
|||
printf(" Position : %d\n", position);
|
||||
#endif
|
||||
if (unlikely(memory_overflowed && position >= NUM_BUFFERS)) {
|
||||
while ((position < NUM_BUFFERS+512) && (newmemory[position-NUM_BUFFERS].addr != free_area))
|
||||
while ((position < NUM_BUFFERS+NEW_BUFFERS) && (newmemory[position-NUM_BUFFERS].addr != free_area))
|
||||
position++;
|
||||
// arm: ensure all writes are finished before other thread takes this memory
|
||||
WMB;
|
||||
|
||||
newmemory[position].used = 0;
|
||||
if (position - NUM_BUFFERS >= NEW_BUFFERS) goto error;
|
||||
newmemory[position-NUM_BUFFERS].used = 0;
|
||||
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
|
||||
UNLOCK_COMMAND(&alloc_lock);
|
||||
#endif
|
||||
|
|
@ -3213,7 +3215,7 @@ void blas_shutdown(void){
|
|||
memory[pos].lock = 0;
|
||||
}
|
||||
if (memory_overflowed)
|
||||
for (pos = 0; pos < 512; pos ++){
|
||||
for (pos = 0; pos < NEW_BUFFERS; pos ++){
|
||||
newmemory[pos].addr = (void *)0;
|
||||
newmemory[pos].used = 0;
|
||||
#if defined(WHEREAMI) && !defined(USE_OPENMP)
|
||||
|
|
@ -3337,7 +3339,7 @@ static void gotoblas_memory_init(void) {
|
|||
/* Initialization for all function; this function should be called before main */
|
||||
|
||||
static int gotoblas_initialized = 0;
|
||||
extern void openblas_read_env();
|
||||
extern void openblas_read_env(void);
|
||||
|
||||
void CONSTRUCTOR gotoblas_init(void) {
|
||||
|
||||
|
|
|
|||
|
|
@ -288,7 +288,7 @@ int goto_get_num_procs (void) {
|
|||
return blas_cpu_number;
|
||||
}
|
||||
|
||||
void openblas_fork_handler()
|
||||
void openblas_fork_handler(void)
|
||||
{
|
||||
// This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is
|
||||
// built with "make USE_OPENMP=0".
|
||||
|
|
@ -305,9 +305,9 @@ void openblas_fork_handler()
|
|||
#endif
|
||||
}
|
||||
|
||||
extern int openblas_num_threads_env();
|
||||
extern int openblas_goto_num_threads_env();
|
||||
extern int openblas_omp_num_threads_env();
|
||||
extern int openblas_num_threads_env(void);
|
||||
extern int openblas_goto_num_threads_env(void);
|
||||
extern int openblas_omp_num_threads_env(void);
|
||||
|
||||
int blas_get_cpu_number(void){
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
|
||||
|
|
|
|||
|
|
@ -41,15 +41,15 @@ static int openblas_env_goto_num_threads=0;
|
|||
static int openblas_env_omp_num_threads=0;
|
||||
static int openblas_env_omp_adaptive=0;
|
||||
|
||||
int openblas_verbose() { return openblas_env_verbose;}
|
||||
unsigned int openblas_thread_timeout() { return openblas_env_thread_timeout;}
|
||||
int openblas_block_factor() { return openblas_env_block_factor;}
|
||||
int openblas_num_threads_env() { return openblas_env_openblas_num_threads;}
|
||||
int openblas_goto_num_threads_env() { return openblas_env_goto_num_threads;}
|
||||
int openblas_omp_num_threads_env() { return openblas_env_omp_num_threads;}
|
||||
int openblas_omp_adaptive_env() { return openblas_env_omp_adaptive;}
|
||||
int openblas_verbose(void) { return openblas_env_verbose;}
|
||||
unsigned int openblas_thread_timeout(void) { return openblas_env_thread_timeout;}
|
||||
int openblas_block_factor(void) { return openblas_env_block_factor;}
|
||||
int openblas_num_threads_env(void) { return openblas_env_openblas_num_threads;}
|
||||
int openblas_goto_num_threads_env(void) { return openblas_env_goto_num_threads;}
|
||||
int openblas_omp_num_threads_env(void) { return openblas_env_omp_num_threads;}
|
||||
int openblas_omp_adaptive_env(void) { return openblas_env_omp_adaptive;}
|
||||
|
||||
void openblas_read_env() {
|
||||
void openblas_read_env(void) {
|
||||
int ret=0;
|
||||
env_var_t p;
|
||||
if (readenv(p,"OPENBLAS_VERBOSE")) ret = atoi(p);
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
extern int openblas_verbose();
|
||||
extern int openblas_verbose(void);
|
||||
|
||||
void openblas_warning(int verbose, const char * msg) {
|
||||
int current_verbose;
|
||||
|
|
|
|||
|
|
@ -69,13 +69,13 @@ static char* openblas_config_str=""
|
|||
;
|
||||
|
||||
#ifdef DYNAMIC_ARCH
|
||||
char *gotoblas_corename();
|
||||
char *gotoblas_corename(void);
|
||||
#endif
|
||||
|
||||
static char tmp_config_str[256];
|
||||
int openblas_get_parallel();
|
||||
int openblas_get_parallel(void);
|
||||
|
||||
char* CNAME() {
|
||||
char* CNAME(void) {
|
||||
char tmpstr[20];
|
||||
strcpy(tmp_config_str, openblas_config_str);
|
||||
#ifdef DYNAMIC_ARCH
|
||||
|
|
@ -90,7 +90,7 @@ char tmpstr[20];
|
|||
}
|
||||
|
||||
|
||||
char* openblas_get_corename() {
|
||||
char* openblas_get_corename(void) {
|
||||
#ifndef DYNAMIC_ARCH
|
||||
return CHAR_CORENAME;
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -42,17 +42,17 @@ static int parallel = 0;
|
|||
|
||||
|
||||
#ifdef NEEDBUNDERSCORE
|
||||
int CNAME() {
|
||||
int CNAME(void) {
|
||||
return parallel;
|
||||
}
|
||||
|
||||
int NAME() {
|
||||
int NAME(void) {
|
||||
return parallel;
|
||||
}
|
||||
|
||||
#else
|
||||
//The CNAME and NAME are the same.
|
||||
int NAME() {
|
||||
int NAME(void) {
|
||||
return parallel;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@
|
|||
#include <string.h>
|
||||
#include "common.h"
|
||||
|
||||
extern int openblas_block_factor();
|
||||
extern int openblas_block_factor(void);
|
||||
int get_L2_size(void);
|
||||
|
||||
#define DEFAULT_GEMM_P 128
|
||||
|
|
|
|||
|
|
@ -272,23 +272,23 @@ static : ../$(LIBNAME)
|
|||
rm -f goto.$(SUFFIX)
|
||||
|
||||
osx.def : $(GENSYM) ../Makefile.system ../getarch.c
|
||||
./$(GENSYM) osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||
./$(GENSYM) osx $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||
|
||||
aix.def : $(GENSYM) ../Makefile.system ../getarch.c
|
||||
./$(GENSYM) aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||
./$(GENSYM) aix $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||
|
||||
objcopy.def : $(GENSYM) ../Makefile.system ../getarch.c
|
||||
./$(GENSYM) objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||
./$(GENSYM) objcopy $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||
|
||||
objconv.def : $(GENSYM) ../Makefile.system ../getarch.c
|
||||
./$(GENSYM) objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||
./$(GENSYM) objconv $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||
|
||||
test : linktest.c
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
||||
rm -f linktest
|
||||
|
||||
linktest.c : $(GENSYM) ../Makefile.system ../getarch.c
|
||||
./$(GENSYM) linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c
|
||||
./$(GENSYM) linktest $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c
|
||||
|
||||
clean ::
|
||||
@rm -f *.def *.dylib __.SYMDEF* *.renamed
|
||||
|
|
|
|||
18
f_check
18
f_check
|
|
@ -117,6 +117,9 @@ else
|
|||
vendor=PGI
|
||||
openmp='-mp'
|
||||
;;
|
||||
*xlf*)
|
||||
vendor=IBM
|
||||
;;
|
||||
*)
|
||||
vendor=G77
|
||||
openmp=''
|
||||
|
|
@ -155,6 +158,10 @@ else
|
|||
*'IBM XL'*)
|
||||
vendor=IBM
|
||||
openmp='-openmp'
|
||||
case "$CC" in *gcc*)
|
||||
bu=_
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*NAG*)
|
||||
vendor=NAG
|
||||
|
|
@ -223,6 +230,10 @@ else
|
|||
*ppuf*|*xlf*)
|
||||
vendor=IBM
|
||||
openmp='-openmp'
|
||||
case "$CC" in *gcc*)
|
||||
bu=_
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*open64*)
|
||||
vendor=OPEN64
|
||||
|
|
@ -362,13 +373,6 @@ if [ -n "$link" ]; then
|
|||
;;
|
||||
esac
|
||||
|
||||
case "$flag" in *-lgomp*)
|
||||
case "$CC" in *clang*)
|
||||
flag="-lomp"
|
||||
;;
|
||||
esac
|
||||
esac
|
||||
|
||||
case "$flag" in -l*)
|
||||
case "$flag" in
|
||||
*ibrary*|*gfortranbegin*|*flangmain*|*frtbegin*|*pathfstart*|\
|
||||
|
|
|
|||
|
|
@ -1301,7 +1301,7 @@ xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c
|
|||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
sbgemm.$(SUFFIX) sbgemm.$(PSUFFIX) : gemm.c ../param.h
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
sbgemmt.$(SUFFIX) sbgemm.$(PSUFFIX) : gemmt.c ../param.h
|
||||
sbgemmt.$(SUFFIX) sbgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
endif
|
||||
|
||||
|
|
|
|||
|
|
@ -100,27 +100,29 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
|||
|
||||
if ( order == BlasColMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *rows ) info = 8;
|
||||
if ( trans == BlasTrans && *ldb < *cols ) info = 8;
|
||||
if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 8;
|
||||
if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 8;
|
||||
}
|
||||
if ( order == BlasRowMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *cols ) info = 8;
|
||||
if ( trans == BlasTrans && *ldb < *rows ) info = 8;
|
||||
if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 8;
|
||||
if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 8;
|
||||
}
|
||||
|
||||
if ( order == BlasColMajor && *lda < *rows ) info = 7;
|
||||
if ( order == BlasRowMajor && *lda < *cols ) info = 7;
|
||||
if ( *cols <= 0 ) info = 4;
|
||||
if ( *rows <= 0 ) info = 3;
|
||||
if ( trans < 0 ) info = 2;
|
||||
if ( order < 0 ) info = 1;
|
||||
if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7;
|
||||
if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7;
|
||||
if ( *cols < 0 ) info = 4;
|
||||
if ( *rows < 0 ) info = 3;
|
||||
if ( trans < 0 ) info = 2;
|
||||
if ( order < 0 ) info = 1;
|
||||
|
||||
if (info >= 0) {
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
if ((*rows == 0) || (*cols == 0)) return;
|
||||
|
||||
#ifdef NEW_IMATCOPY
|
||||
if ( *lda == *ldb ) {
|
||||
if ( order == BlasColMajor )
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint *
|
|||
|
||||
blas_level1_thread(mode, n, k1, k2, dummyalpha,
|
||||
a, lda, NULL, 0, ipiv, incx,
|
||||
(int(*)())laswp[flag], nthreads);
|
||||
(int(*)(void))laswp[flag], nthreads);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint *
|
|||
mode = BLAS_SINGLE | BLAS_COMPLEX;
|
||||
#endif
|
||||
|
||||
blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, (int(*)())laswp[flag], nthreads);
|
||||
blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, (int(*)(void))laswp[flag], nthreads);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -90,27 +90,29 @@ void CNAME(enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
|||
#endif
|
||||
if ( order == BlasColMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 9;
|
||||
}
|
||||
if ( order == BlasRowMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 9;
|
||||
}
|
||||
|
||||
if ( order == BlasColMajor && *lda < *rows ) info = 7;
|
||||
if ( order == BlasRowMajor && *lda < *cols ) info = 7;
|
||||
if ( *cols <= 0 ) info = 4;
|
||||
if ( *rows <= 0 ) info = 3;
|
||||
if ( trans < 0 ) info = 2;
|
||||
if ( order < 0 ) info = 1;
|
||||
if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7;
|
||||
if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7;
|
||||
if ( *cols < 0 ) info = 4;
|
||||
if ( *rows < 0 ) info = 3;
|
||||
if ( trans < 0 ) info = 2;
|
||||
if ( order < 0 ) info = 1;
|
||||
|
||||
if (info >= 0) {
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
if ((*rows == 0) || (*cols == 0)) return;
|
||||
|
||||
if ( order == BlasColMajor )
|
||||
{
|
||||
if ( trans == BlasNoTrans )
|
||||
|
|
|
|||
|
|
@ -66,13 +66,8 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
|
|||
c = da / r;
|
||||
s = db / r;
|
||||
z = ONE;
|
||||
if (da != ZERO) {
|
||||
if (ada > adb){
|
||||
z = s;
|
||||
} else {
|
||||
z = ONE / c;
|
||||
}
|
||||
}
|
||||
if (ada > adb) z = s;
|
||||
if ((ada <= adb) && (c != ZERO)) z = ONE / c;
|
||||
|
||||
*C = c;
|
||||
*S = s;
|
||||
|
|
|
|||
|
|
@ -101,31 +101,33 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
|||
|
||||
if ( order == BlasColMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasConj && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasTransConj && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 9;
|
||||
if ( trans == BlasConj && *ldb < MAX(1,*rows) ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 9;
|
||||
if ( trans == BlasTransConj && *ldb < MAX(1,*cols) ) info = 9;
|
||||
}
|
||||
if ( order == BlasRowMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasConj && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasTransConj && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 9;
|
||||
if ( trans == BlasConj && *ldb < MAX(1,*cols) ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 9;
|
||||
if ( trans == BlasTransConj && *ldb < MAX(1,*rows) ) info = 9;
|
||||
}
|
||||
|
||||
if ( order == BlasColMajor && *lda < *rows ) info = 7;
|
||||
if ( order == BlasRowMajor && *lda < *cols ) info = 7;
|
||||
if ( *cols <= 0 ) info = 4;
|
||||
if ( *rows <= 0 ) info = 3;
|
||||
if ( trans < 0 ) info = 2;
|
||||
if ( order < 0 ) info = 1;
|
||||
if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7;
|
||||
if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7;
|
||||
if ( *cols < 0 ) info = 4;
|
||||
if ( *rows < 0 ) info = 3;
|
||||
if ( trans < 0 ) info = 2;
|
||||
if ( order < 0 ) info = 1;
|
||||
|
||||
if (info >= 0) {
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
if ((*rows == 0) || (*cols == 0)) return;
|
||||
|
||||
#ifdef NEW_IMATCOPY
|
||||
if (*lda == *ldb ) {
|
||||
if ( order == BlasColMajor )
|
||||
|
|
|
|||
|
|
@ -92,31 +92,33 @@ void CNAME(enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
|||
#endif
|
||||
if ( order == BlasColMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasConj && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasTransConj && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 9;
|
||||
if ( trans == BlasConj && *ldb < MAX(1,*rows) ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 9;
|
||||
if ( trans == BlasTransConj && *ldb < MAX(1,*cols) ) info = 9;
|
||||
}
|
||||
if ( order == BlasRowMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasConj && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasTransConj && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 9;
|
||||
if ( trans == BlasConj && *ldb < MAX(1,*cols) ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 9;
|
||||
if ( trans == BlasTransConj && *ldb < MAX(1,*rows) ) info = 9;
|
||||
}
|
||||
|
||||
if ( order == BlasColMajor && *lda < *rows ) info = 7;
|
||||
if ( order == BlasRowMajor && *lda < *cols ) info = 7;
|
||||
if ( *cols <= 0 ) info = 4;
|
||||
if ( *rows <= 0 ) info = 3;
|
||||
if ( trans < 0 ) info = 2;
|
||||
if ( order < 0 ) info = 1;
|
||||
if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7;
|
||||
if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7;
|
||||
if ( *cols < 0 ) info = 4;
|
||||
if ( *rows < 0 ) info = 3;
|
||||
if ( trans < 0 ) info = 2;
|
||||
if ( order < 0 ) info = 1;
|
||||
|
||||
if (info >= 0) {
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
if ((*rows == 0) || (*cols == 0)) return;
|
||||
|
||||
if ( order == BlasColMajor )
|
||||
{
|
||||
|
||||
|
|
|
|||
|
|
@ -30,14 +30,12 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
|
|||
FLOAT db_r = *(DB+0);
|
||||
FLOAT db_i = *(DB+1);
|
||||
//long double r;
|
||||
FLOAT *r, *S1=(FLOAT *)malloc(2*sizeof(FLOAT));
|
||||
FLOAT *R=(FLOAT *)malloc(2*sizeof(FLOAT));
|
||||
FLOAT S1[2];
|
||||
FLOAT R[2];
|
||||
long double d;
|
||||
|
||||
FLOAT ada = da_r * da_r + da_i * da_i;
|
||||
FLOAT adb = db_r * db_r + db_i * db_i;
|
||||
FLOAT adart = sqrt( da_r * da_r + da_i * da_i);
|
||||
FLOAT adbrt = sqrt( db_r * db_r + db_i * db_i);
|
||||
|
||||
PRINT_DEBUG_NAME;
|
||||
|
||||
|
|
@ -64,13 +62,13 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
|
|||
*C = ZERO;
|
||||
if (db_r == ZERO) {
|
||||
(*DA) = fabsl(db_i);
|
||||
*S = *S1 /da_r;
|
||||
*(S+1) = *(S1+1) /da_r;
|
||||
*S = *S1 /(*DA);
|
||||
*(S+1) = *(S1+1) /(*DA);
|
||||
return;
|
||||
} else if ( db_i == ZERO) {
|
||||
*DA = fabsl(db_r);
|
||||
*S = *S1 /da_r;
|
||||
*(S+1) = *(S1+1) /da_r;
|
||||
*S = *S1 /(*DA);
|
||||
*(S+1) = *(S1+1) /(*DA);
|
||||
return;
|
||||
} else {
|
||||
long double g1 = MAX( fabsl(db_r), fabsl(db_i));
|
||||
|
|
@ -115,10 +113,13 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
|
|||
}
|
||||
} else {
|
||||
*C = ada / adahsq;
|
||||
if (*C >= safmin)
|
||||
if (*C >= safmin) {
|
||||
*R = *DA / *C;
|
||||
else
|
||||
*(R+1) = *(DA+1) / *(C+1);
|
||||
} else {
|
||||
*R = *DA * (h / adahsq);
|
||||
*(R+1) = *(DA+1) * (h / adahsq);
|
||||
}
|
||||
*S = *S1 * ada / adahsq;
|
||||
*(S+1) = *(S1+1) * ada / adahsq;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,12 +5,6 @@ endif
|
|||
TOPDIR = ..
|
||||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
ifeq ($(ARCH), power)
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
override CFLAGS += -fno-integrated-as
|
||||
endif
|
||||
endif
|
||||
|
||||
AVX2OPT =
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# AVX2 support was added in 4.7.0
|
||||
|
|
|
|||
|
|
@ -61,6 +61,15 @@ ifeq ($(CORE), ZEN)
|
|||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
ifeq ($(OS), AIX)
|
||||
M4VERSION := $(shell m4 --version < /dev/null 2>&1 | grep GNU 2>&1 >/dev/null ; echo $$?)
|
||||
ifeq ($(M4VERSION), 0)
|
||||
M4_AIX := m4 -l16384
|
||||
else
|
||||
M4_AIX := m4 -B16384
|
||||
endif
|
||||
$(info $$var is [${$(M4_AIX)}])
|
||||
endif
|
||||
ifeq ($(CORE), POWER8)
|
||||
ifeq ($(BINARY64),1)
|
||||
USE_TRMM = 1
|
||||
|
|
@ -173,7 +182,7 @@ ifeq ($(BUILD_BFLOAT16),1)
|
|||
SBBLASOBJS += sbgemm_beta$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
|
||||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" ""
|
||||
SBLASOBJS += \
|
||||
sgemm_beta$(TSUFFIX).$(SUFFIX) \
|
||||
strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
|
||||
|
|
@ -182,7 +191,7 @@ SBLASOBJS += \
|
|||
strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_DOUBLE),1)
|
||||
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
|
||||
DBLASOBJS += \
|
||||
dgemm_beta$(TSUFFIX).$(SUFFIX) \
|
||||
dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
|
||||
|
|
@ -198,7 +207,7 @@ QBLASOBJS += \
|
|||
qtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \
|
||||
qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ifeq ($(BUILD_COMPLEX),1)
|
||||
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
|
||||
CBLASOBJS += \
|
||||
ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
|
||||
ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) \
|
||||
|
|
@ -246,7 +255,7 @@ XBLASOBJS += xgemm3m_kernel$(TSUFFIX).$(SUFFIX)
|
|||
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_SINGLE),1)
|
||||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" ""
|
||||
SBLASOBJS += \
|
||||
strmm_iunucopy$(TSUFFIX).$(SUFFIX) strmm_iunncopy$(TSUFFIX).$(SUFFIX) \
|
||||
strmm_ilnucopy$(TSUFFIX).$(SUFFIX) strmm_ilnncopy$(TSUFFIX).$(SUFFIX) \
|
||||
|
|
@ -255,10 +264,7 @@ SBLASOBJS += \
|
|||
strmm_ounucopy$(TSUFFIX).$(SUFFIX) strmm_ounncopy$(TSUFFIX).$(SUFFIX) \
|
||||
strmm_olnucopy$(TSUFFIX).$(SUFFIX) strmm_olnncopy$(TSUFFIX).$(SUFFIX) \
|
||||
strmm_outucopy$(TSUFFIX).$(SUFFIX) strmm_outncopy$(TSUFFIX).$(SUFFIX) \
|
||||
strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
|
||||
SBLASOBJS += \
|
||||
strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) \
|
||||
strsm_iunucopy$(TSUFFIX).$(SUFFIX) strsm_iunncopy$(TSUFFIX).$(SUFFIX) \
|
||||
strsm_ilnucopy$(TSUFFIX).$(SUFFIX) strsm_ilnncopy$(TSUFFIX).$(SUFFIX) \
|
||||
strsm_iutucopy$(TSUFFIX).$(SUFFIX) strsm_iutncopy$(TSUFFIX).$(SUFFIX) \
|
||||
|
|
@ -266,10 +272,7 @@ SBLASOBJS += \
|
|||
strsm_ounucopy$(TSUFFIX).$(SUFFIX) strsm_ounncopy$(TSUFFIX).$(SUFFIX) \
|
||||
strsm_olnucopy$(TSUFFIX).$(SUFFIX) strsm_olnncopy$(TSUFFIX).$(SUFFIX) \
|
||||
strsm_outucopy$(TSUFFIX).$(SUFFIX) strsm_outncopy$(TSUFFIX).$(SUFFIX) \
|
||||
strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
ifeq ($(BUILD_SINGLE),1)
|
||||
SBLASOBJS += \
|
||||
strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) \
|
||||
ssymm_iutcopy$(TSUFFIX).$(SUFFIX) ssymm_iltcopy$(TSUFFIX).$(SUFFIX) \
|
||||
ssymm_outcopy$(TSUFFIX).$(SUFFIX) ssymm_oltcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
|
@ -391,7 +394,7 @@ XBLASOBJS += \
|
|||
|
||||
ifeq ($(USE_GEMM3M), 1)
|
||||
|
||||
ifeq ($(BUILD_COMPLEX),1)
|
||||
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
|
||||
CBLASOBJS += \
|
||||
cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \
|
||||
cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \
|
||||
|
|
@ -634,15 +637,7 @@ $(KDIR)$(SBGEMMONCOPYOBJ) : $(KERNELDIR)/$(SBGEMMONCOPY)
|
|||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
|
||||
$(KDIR)$(SBGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SBGEMMOTCOPY)
|
||||
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmotcopy.s
|
||||
m4 sbgemmotcopy.s > sbgemmotcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmotcopy_nomacros.s -o $@
|
||||
rm sbgemmotcopy.s sbgemmotcopy_nomacros.s
|
||||
else
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
endif
|
||||
|
||||
ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))
|
||||
|
||||
|
|
@ -650,14 +645,7 @@ $(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY)
|
|||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
|
||||
$(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmitcopy.s
|
||||
m4 sbgemmitcopy.s > sbgemmitcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmitcopy_nomacros.s -o $@
|
||||
rm sbgemmitcopy.s sbgemmitcopy_nomacros.s
|
||||
else
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
endif
|
||||
|
||||
endif
|
||||
endif
|
||||
|
|
@ -668,7 +656,7 @@ $(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY)
|
|||
$(KDIR)$(SGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SGEMMOTCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmotcopy.s
|
||||
m4 sgemmotcopy.s > sgemmotcopy_nomacros.s
|
||||
$(M4_AIX) sgemmotcopy.s > sgemmotcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmotcopy_nomacros.s -o $@
|
||||
rm sgemmotcopy.s sgemmotcopy_nomacros.s
|
||||
else
|
||||
|
|
@ -684,7 +672,7 @@ $(KDIR)$(SGEMMINCOPYOBJ) : $(KERNELDIR)/$(SGEMMINCOPY)
|
|||
$(KDIR)$(SGEMMITCOPYOBJ) : $(KERNELDIR)/$(SGEMMITCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmitcopy.s
|
||||
m4 sgemmitcopy.s > sgemmitcopy_nomacros.s
|
||||
$(M4_AIX) sgemmitcopy.s > sgemmitcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmitcopy_nomacros.s -o $@
|
||||
rm sgemmitcopy.s sgemmitcopy_nomacros.s
|
||||
else
|
||||
|
|
@ -696,7 +684,7 @@ endif
|
|||
$(KDIR)$(DGEMMONCOPYOBJ) : $(KERNELDIR)/$(DGEMMONCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_ncopy.s
|
||||
m4 dgemm_ncopy.s > dgemm_ncopy_nomacros.s
|
||||
$(M4_AIX) dgemm_ncopy.s > dgemm_ncopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_ncopy_nomacros.s -o $@
|
||||
rm dgemm_ncopy.s dgemm_ncopy_nomacros.s
|
||||
else
|
||||
|
|
@ -714,7 +702,7 @@ $(KDIR)$(DGEMMINCOPYOBJ) : $(KERNELDIR)/$(DGEMMINCOPY)
|
|||
$(KDIR)$(DGEMMITCOPYOBJ) : $(KERNELDIR)/$(DGEMMITCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_itcopy.s
|
||||
m4 dgemm_itcopy.s > dgemm_itcopy_nomacros.s
|
||||
$(M4_AIX) dgemm_itcopy.s > dgemm_itcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_itcopy_nomacros.s -o $@
|
||||
rm dgemm_itcopy.s dgemm_itcopy_nomacros.s
|
||||
else
|
||||
|
|
@ -757,7 +745,7 @@ $(KDIR)$(CGEMMINCOPYOBJ) : $(KERNELDIR)/$(CGEMMINCOPY)
|
|||
$(KDIR)$(CGEMMITCOPYOBJ) : $(KERNELDIR)/$(CGEMMITCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -S $< -o - > cgemm_itcopy.s
|
||||
m4 cgemm_itcopy.s > cgemm_itcopy_nomacros.s
|
||||
$(M4_AIX) cgemm_itcopy.s > cgemm_itcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX cgemm_itcopy_nomacros.s -o $@
|
||||
rm cgemm_itcopy.s cgemm_itcopy_nomacros.s
|
||||
else
|
||||
|
|
@ -780,7 +768,7 @@ $(KDIR)$(ZGEMMINCOPYOBJ) : $(KERNELDIR)/$(ZGEMMINCOPY)
|
|||
$(KDIR)$(ZGEMMITCOPYOBJ) : $(KERNELDIR)/$(ZGEMMITCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > zgemm_itcopy.s
|
||||
m4 zgemm_itcopy.s > zgemm_itcopy_nomacros.s
|
||||
$(M4_AIX) zgemm_itcopy.s > zgemm_itcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX zgemm_itcopy_nomacros.s -o $@
|
||||
rm zgemm_itcopy.s zgemm_itcopy_nomacros.s
|
||||
else
|
||||
|
|
@ -812,7 +800,7 @@ endif
|
|||
$(KDIR)sgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemm_kernel$(TSUFFIX).s
|
||||
m4 sgemm_kernel$(TSUFFIX).s > sgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
$(M4_AIX) sgemm_kernel$(TSUFFIX).s > sgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemm_kernel$(TSUFFIX)_nomacros.s -o $@
|
||||
rm sgemm_kernel$(TSUFFIX).s sgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
else
|
||||
|
|
@ -829,20 +817,13 @@ endif
|
|||
ifeq ($(BUILD_BFLOAT16), 1)
|
||||
|
||||
$(KDIR)sbgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemm_kernel$(TSUFFIX).s
|
||||
m4 sbgemm_kernel$(TSUFFIX).s > sbgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemm_kernel$(TSUFFIX)_nomacros.s -o $@
|
||||
rm sbgemm_kernel$(TSUFFIX).s sbgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
else
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
endif
|
||||
endif
|
||||
|
||||
$(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_kernel$(TSUFFIX).s
|
||||
m4 dgemm_kernel$(TSUFFIX).s > dgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
$(M4_AIX) dgemm_kernel$(TSUFFIX).s > dgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_kernel$(TSUFFIX)_nomacros.s -o $@
|
||||
rm dgemm_kernel$(TSUFFIX).s dgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
else
|
||||
|
|
@ -855,7 +836,7 @@ $(KDIR)qgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(QGEMMDEP
|
|||
$(KDIR)cgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNN $< -o - > cgemm_kernel_n.s
|
||||
m4 cgemm_kernel_n.s > cgemm_kernel_n_nomacros.s
|
||||
$(M4_AIX) cgemm_kernel_n.s > cgemm_kernel_n_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN cgemm_kernel_n_nomacros.s -o $@
|
||||
rm cgemm_kernel_n.s cgemm_kernel_n_nomacros.s
|
||||
else
|
||||
|
|
@ -865,7 +846,7 @@ endif
|
|||
$(KDIR)cgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCN $< -o - > cgemm_kernel_l.s
|
||||
m4 cgemm_kernel_l.s > cgemm_kernel_l_nomacros.s
|
||||
$(M4_AIX) cgemm_kernel_l.s > cgemm_kernel_l_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN cgemm_kernel_l_nomacros.s -o $@
|
||||
rm cgemm_kernel_l.s cgemm_kernel_l_nomacros.s
|
||||
else
|
||||
|
|
@ -875,7 +856,7 @@ endif
|
|||
$(KDIR)cgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s
|
||||
m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s
|
||||
$(M4_AIX) cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@
|
||||
rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s
|
||||
else
|
||||
|
|
@ -885,7 +866,7 @@ endif
|
|||
$(KDIR)cgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCC $< -o - > cgemm_kernel_b.s
|
||||
m4 cgemm_kernel_b.s > cgemm_kernel_b_nomacros.s
|
||||
$(M4_AIX) cgemm_kernel_b.s > cgemm_kernel_b_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC cgemm_kernel_b_nomacros.s -o $@
|
||||
rm cgemm_kernel_b.s cgemm_kernel_b_nomacros.s
|
||||
else
|
||||
|
|
@ -895,7 +876,7 @@ endif
|
|||
$(KDIR)zgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNN $< -o - > zgemm_kernel_n.s
|
||||
m4 zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s
|
||||
$(M4_AIX) zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN zgemm_kernel_n_nomacros.s -o $@
|
||||
rm zgemm_kernel_n.s zgemm_kernel_n_nomacros.s
|
||||
else ifeq ($(CORE),SANDYBRIDGE)
|
||||
|
|
@ -907,7 +888,7 @@ endif
|
|||
$(KDIR)zgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCN $< -o - > zgemm_kernel_l.s
|
||||
m4 zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s
|
||||
$(M4_AIX) zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN zgemm_kernel_l_nomacros.s -o $@
|
||||
rm zgemm_kernel_l.s zgemm_kernel_l_nomacros.s
|
||||
else ifeq ($(CORE),SANDYBRIDGE)
|
||||
|
|
@ -919,7 +900,7 @@ endif
|
|||
$(KDIR)zgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNC $< -o - > zgemm_kernel_r.s
|
||||
m4 zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s
|
||||
$(M4_AIX) zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC zgemm_kernel_r_nomacros.s -o $@
|
||||
rm zgemm_kernel_r.s zgemm_kernel_r_nomacros.s
|
||||
else ifeq ($(CORE),SANDYBRIDGE)
|
||||
|
|
@ -931,7 +912,7 @@ endif
|
|||
$(KDIR)zgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCC $< -o - > zgemm_kernel_b.s
|
||||
m4 zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s
|
||||
$(M4_AIX) zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC zgemm_kernel_b_nomacros.s -o $@
|
||||
rm zgemm_kernel_b.s zgemm_kernel_b_nomacros.s
|
||||
else ifeq ($(CORE),SANDYBRIDGE)
|
||||
|
|
@ -957,7 +938,7 @@ ifdef USE_TRMM
|
|||
$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > strmmkernel_ln.s
|
||||
m4 strmmkernel_ln.s > strmmkernel_ln_nomacros.s
|
||||
$(M4_AIX) strmmkernel_ln.s > strmmkernel_ln_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA strmmkernel_ln_nomacros.s -o $@
|
||||
rm strmmkernel_ln.s strmmkernel_ln_nomacros.s
|
||||
else
|
||||
|
|
@ -967,7 +948,7 @@ endif
|
|||
$(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > strmmkernel_lt.s
|
||||
m4 strmmkernel_lt.s > strmmkernel_lt_nomacros.s
|
||||
$(M4_AIX) strmmkernel_lt.s > strmmkernel_lt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA strmmkernel_lt_nomacros.s -o $@
|
||||
rm strmmkernel_lt.s strmmkernel_lt_nomacros.s
|
||||
else
|
||||
|
|
@ -977,7 +958,7 @@ endif
|
|||
$(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > strmmkernel_rn.s
|
||||
m4 strmmkernel_rn.s > strmmkernel_rn_nomacros.s
|
||||
$(M4_AIX) strmmkernel_rn.s > strmmkernel_rn_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA strmmkernel_rn_nomacros.s -o $@
|
||||
rm strmmkernel_rn.s strmmkernel_rn_nomacros.s
|
||||
else
|
||||
|
|
@ -987,7 +968,7 @@ endif
|
|||
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
|
||||
m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s
|
||||
$(M4_AIX) strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@
|
||||
rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s
|
||||
else
|
||||
|
|
@ -997,7 +978,7 @@ endif
|
|||
$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > dtrmm_kernel_ln.s
|
||||
m4 dtrmm_kernel_ln.s > dtrmm_kernel_ln_nomacros.s
|
||||
$(M4_AIX) dtrmm_kernel_ln.s > dtrmm_kernel_ln_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA dtrmm_kernel_ln_nomacros.s -o $@
|
||||
rm dtrmm_kernel_ln.s dtrmm_kernel_ln_nomacros.s
|
||||
else
|
||||
|
|
@ -1007,7 +988,7 @@ endif
|
|||
$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > dtrmm_kernel_lt.s
|
||||
m4 dtrmm_kernel_lt.s > dtrmm_kernel_lt_nomacros.s
|
||||
$(M4_AIX) dtrmm_kernel_lt.s > dtrmm_kernel_lt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA dtrmm_kernel_lt_nomacros.s -o $@
|
||||
rm dtrmm_kernel_lt.s dtrmm_kernel_lt_nomacros.s
|
||||
else
|
||||
|
|
@ -1017,7 +998,7 @@ endif
|
|||
$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > dtrmm_kernel_rn.s
|
||||
m4 dtrmm_kernel_rn.s > dtrmm_kernel_rn_nomacros.s
|
||||
$(M4_AIX) dtrmm_kernel_rn.s > dtrmm_kernel_rn_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA dtrmm_kernel_rn_nomacros.s -o $@
|
||||
rm dtrmm_kernel_rn.s dtrmm_kernel_rn_nomacros.s
|
||||
else
|
||||
|
|
@ -1027,7 +1008,7 @@ endif
|
|||
$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > dtrmm_kernel_rt.s
|
||||
m4 dtrmm_kernel_rt.s > dtrmm_kernel_rt_nomacros.s
|
||||
$(M4_AIX) dtrmm_kernel_rt.s > dtrmm_kernel_rt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA dtrmm_kernel_rt_nomacros.s -o $@
|
||||
rm dtrmm_kernel_rt.s dtrmm_kernel_rt_nomacros.s
|
||||
else
|
||||
|
|
@ -1049,7 +1030,7 @@ $(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
|
|||
$(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_ln.s
|
||||
m4 ctrmm_kernel_ln.s > ctrmm_kernel_ln_nomacros.s
|
||||
$(M4_AIX) ctrmm_kernel_ln.s > ctrmm_kernel_ln_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_ln_nomacros.s -o $@
|
||||
rm ctrmm_kernel_ln.s ctrmm_kernel_ln_nomacros.s
|
||||
else
|
||||
|
|
@ -1059,7 +1040,7 @@ endif
|
|||
$(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_lt.s
|
||||
m4 ctrmm_kernel_lt.s > ctrmm_kernel_lt_nomacros.s
|
||||
$(M4_AIX) ctrmm_kernel_lt.s > ctrmm_kernel_lt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_lt_nomacros.s -o $@
|
||||
rm ctrmm_kernel_lt.s ctrmm_kernel_lt_nomacros.s
|
||||
else
|
||||
|
|
@ -1069,7 +1050,7 @@ endif
|
|||
$(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lr.s
|
||||
m4 ctrmm_kernel_lr.s > ctrmm_kernel_lr_nomacros.s
|
||||
$(M4_AIX) ctrmm_kernel_lr.s > ctrmm_kernel_lr_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ctrmm_kernel_lr_nomacros.s -o $@
|
||||
rm ctrmm_kernel_lr.s ctrmm_kernel_lr_nomacros.s
|
||||
else
|
||||
|
|
@ -1079,7 +1060,7 @@ endif
|
|||
$(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lc.s
|
||||
m4 ctrmm_kernel_lc.s > ctrmm_kernel_lc_nomacros.s
|
||||
$(M4_AIX) ctrmm_kernel_lc.s > ctrmm_kernel_lc_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ctrmm_kernel_lc_nomacros.s -o $@
|
||||
rm ctrmm_kernel_lc_nomacros.s ctrmm_kernel_lc.s
|
||||
else
|
||||
|
|
@ -1089,7 +1070,7 @@ endif
|
|||
$(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rn.s
|
||||
m4 ctrmm_kernel_rn.s > ctrmm_kernel_rn_nomacros.s
|
||||
$(M4_AIX) ctrmm_kernel_rn.s > ctrmm_kernel_rn_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_rn_nomacros.s -o $@
|
||||
rm ctrmm_kernel_rn.s ctrmm_kernel_rn_nomacros.s
|
||||
else
|
||||
|
|
@ -1099,7 +1080,7 @@ endif
|
|||
$(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rt.s
|
||||
m4 ctrmm_kernel_rt.s > ctrmm_kernel_rt_nomacros.s
|
||||
$(M4_AIX) ctrmm_kernel_rt.s > ctrmm_kernel_rt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_rt_nomacros.s -o $@
|
||||
rm ctrmm_kernel_rt.s ctrmm_kernel_rt_nomacros.s
|
||||
else
|
||||
|
|
@ -1109,7 +1090,7 @@ endif
|
|||
$(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_rr.s
|
||||
m4 ctrmm_kernel_rr.s > ctrmm_kernel_rr_nomacros.s
|
||||
$(M4_AIX) ctrmm_kernel_rr.s > ctrmm_kernel_rr_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ctrmm_kernel_rr_nomacros.s -o $@
|
||||
rm ctrmm_kernel_rr.s ctrmm_kernel_rr_nomacros.s
|
||||
else
|
||||
|
|
@ -1119,7 +1100,7 @@ endif
|
|||
$(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_RC.s
|
||||
m4 ctrmm_kernel_RC.s > ctrmm_kernel_RC_nomacros.s
|
||||
$(M4_AIX) ctrmm_kernel_RC.s > ctrmm_kernel_RC_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ctrmm_kernel_RC_nomacros.s -o $@
|
||||
rm ctrmm_kernel_RC.s ctrmm_kernel_RC_nomacros.s
|
||||
else
|
||||
|
|
@ -1129,7 +1110,7 @@ endif
|
|||
$(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_ln.s
|
||||
m4 ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s
|
||||
$(M4_AIX) ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_ln_nomacros.s -o $@
|
||||
rm ztrmm_kernel_ln.s ztrmm_kernel_ln_nomacros.s
|
||||
else ifeq ($(CORE), SANDYBRIDGE)
|
||||
|
|
@ -1141,7 +1122,7 @@ endif
|
|||
$(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_lt.s
|
||||
m4 ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s
|
||||
$(M4_AIX) ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_lt_nomacros.s -o $@
|
||||
rm ztrmm_kernel_lt.s ztrmm_kernel_lt_nomacros.s
|
||||
else ifeq ($(CORE), SANDYBRIDGE)
|
||||
|
|
@ -1153,7 +1134,7 @@ endif
|
|||
$(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lr.s
|
||||
m4 ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s
|
||||
$(M4_AIX) ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ztrmm_kernel_lr_nomacros.s -o $@
|
||||
rm ztrmm_kernel_lr.s ztrmm_kernel_lr_nomacros.s
|
||||
else ifeq ($(CORE), SANDYBRIDGE)
|
||||
|
|
@ -1165,7 +1146,7 @@ endif
|
|||
$(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lc.s
|
||||
m4 ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s
|
||||
$(M4_AIX) ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ztrmm_kernel_lc_nomacros.s -o $@
|
||||
rm ztrmm_kernel_lc.s ztrmm_kernel_lc_nomacros.s
|
||||
else ifeq ($(CORE), SANDYBRIDGE)
|
||||
|
|
@ -1177,7 +1158,7 @@ endif
|
|||
$(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rn.s
|
||||
m4 ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s
|
||||
$(M4_AIX) ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_rn_nomacros.s -o $@
|
||||
rm ztrmm_kernel_rn.s ztrmm_kernel_rn_nomacros.s
|
||||
else ifeq ($(CORE), SANDYBRIDGE)
|
||||
|
|
@ -1189,7 +1170,7 @@ endif
|
|||
$(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rt.s
|
||||
m4 ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s
|
||||
$(M4_AIX) ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_rt_nomacros.s -o $@
|
||||
rm ztrmm_kernel_rt.s ztrmm_kernel_rt_nomacros.s
|
||||
else ifeq ($(CORE), SANDYBRIDGE)
|
||||
|
|
@ -1201,7 +1182,7 @@ endif
|
|||
$(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rr.s
|
||||
m4 ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s
|
||||
$(M4_AIX) ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ztrmm_kernel_rr_nomacros.s -o $@
|
||||
rm ztrmm_kernel_rr.s ztrmm_kernel_rr_nomacros.s
|
||||
else ifeq ($(CORE), SANDYBRIDGE)
|
||||
|
|
@ -1213,7 +1194,7 @@ endif
|
|||
$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rc.s
|
||||
m4 ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s
|
||||
$(M4_AIX) ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ztrmm_kernel_rc_nomacros.s -o $@
|
||||
rm ztrmm_kernel_rc.s ztrmm_kernel_rc_nomacros.s
|
||||
else ifeq ($(CORE), SANDYBRIDGE)
|
||||
|
|
@ -1235,7 +1216,7 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
|
|||
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
|
||||
m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s
|
||||
$(M4_AIX) strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@
|
||||
rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s
|
||||
else
|
||||
|
|
@ -1395,7 +1376,7 @@ $(KDIR)dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LN) $(DT
|
|||
$(KDIR)dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LT) $(DTRSMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o - > dtrsm_kernel_lt.s
|
||||
m4 dtrsm_kernel_lt.s > dtrsm_kernel_lt_nomacros.s
|
||||
$(M4_AIX) dtrsm_kernel_lt.s > dtrsm_kernel_lt_nomacros.s
|
||||
$(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ dtrsm_kernel_lt_nomacros.s -o $@
|
||||
rm dtrsm_kernel_lt.s dtrsm_kernel_lt_nomacros.s
|
||||
else
|
||||
|
|
@ -2987,7 +2968,7 @@ $(KDIR)cgemm_kernel_l$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMM
|
|||
$(KDIR)cgemm_kernel_r$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(PFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s
|
||||
m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s
|
||||
$(M4_AIX) cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@
|
||||
rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s
|
||||
else
|
||||
|
|
@ -3033,7 +3014,7 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
|
|||
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
|
||||
m4 strmmkernel_rn.s > strmm_kernel_rt_nomacros.s
|
||||
$(M4_AIX) strmmkernel_rn.s > strmm_kernel_rt_nomacros.s
|
||||
$(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@
|
||||
rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s
|
||||
else
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ CAMAXKERNEL = zamax.S
|
|||
ZAMAXKERNEL = zamax.S
|
||||
|
||||
SAXPYKERNEL = axpy.S
|
||||
DAXPYKERNEL = axpy.S
|
||||
DAXPYKERNEL = daxpy_thunderx2t99.S
|
||||
CAXPYKERNEL = zaxpy.S
|
||||
ZAXPYKERNEL = zaxpy.S
|
||||
|
||||
|
|
@ -81,45 +81,35 @@ DGEMVTKERNEL = gemv_t.S
|
|||
CGEMVTKERNEL = zgemv_t.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
SASUMKERNEL = sasum_thunderx2t99.c
|
||||
DASUMKERNEL = dasum_thunderx2t99.c
|
||||
CASUMKERNEL = casum_thunderx2t99.c
|
||||
ZASUMKERNEL = zasum_thunderx2t99.c
|
||||
|
||||
SASUMKERNEL = asum.S
|
||||
DASUMKERNEL = asum.S
|
||||
CASUMKERNEL = casum.S
|
||||
ZASUMKERNEL = zasum.S
|
||||
SCOPYKERNEL = copy_thunderx2t99.c
|
||||
DCOPYKERNEL = copy_thunderx2t99.c
|
||||
CCOPYKERNEL = copy_thunderx2t99.c
|
||||
ZCOPYKERNEL = copy_thunderx2t99.c
|
||||
|
||||
SCOPYKERNEL = copy.S
|
||||
DCOPYKERNEL = copy.S
|
||||
CCOPYKERNEL = copy.S
|
||||
ZCOPYKERNEL = copy.S
|
||||
SSWAPKERNEL = swap_thunderx2t99.S
|
||||
DSWAPKERNEL = swap_thunderx2t99.S
|
||||
CSWAPKERNEL = swap_thunderx2t99.S
|
||||
ZSWAPKERNEL = swap_thunderx2t99.S
|
||||
|
||||
SSWAPKERNEL = swap.S
|
||||
DSWAPKERNEL = swap.S
|
||||
CSWAPKERNEL = swap.S
|
||||
ZSWAPKERNEL = swap.S
|
||||
ISAMAXKERNEL = iamax_thunderx2t99.c
|
||||
IDAMAXKERNEL = iamax_thunderx2t99.c
|
||||
ICAMAXKERNEL = izamax_thunderx2t99.c
|
||||
IZAMAXKERNEL = izamax_thunderx2t99.c
|
||||
|
||||
ISAMAXKERNEL = iamax.S
|
||||
IDAMAXKERNEL = iamax.S
|
||||
ICAMAXKERNEL = izamax.S
|
||||
IZAMAXKERNEL = izamax.S
|
||||
SNRM2KERNEL = scnrm2_thunderx2t99.c
|
||||
DNRM2KERNEL = dznrm2_thunderx2t99.c
|
||||
CNRM2KERNEL = scnrm2_thunderx2t99.c
|
||||
ZNRM2KERNEL = dznrm2_thunderx2t99.c
|
||||
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
DDOTKERNEL = dot.S
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
SDOTKERNEL = ../generic/dot.c
|
||||
else
|
||||
SDOTKERNEL = dot.S
|
||||
endif
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CDOTKERNEL = zdot.S
|
||||
ZDOTKERNEL = zdot.S
|
||||
else
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = ../arm/zdot.c
|
||||
endif
|
||||
DDOTKERNEL = dot.c
|
||||
SDOTKERNEL = dot.c
|
||||
CDOTKERNEL = zdot_thunderx2t99.c
|
||||
ZDOTKERNEL = zdot_thunderx2t99.c
|
||||
DSDOTKERNEL = dot.S
|
||||
|
||||
DGEMM_BETA = dgemm_beta.S
|
||||
|
|
@ -128,10 +118,10 @@ SGEMM_BETA = sgemm_beta.S
|
|||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
SGEMMINCOPY = gemm_ncopy_sve_v1x$(SGEMM_UNROLL_N).c
|
||||
SGEMMITCOPY = gemm_tcopy_sve_v1x$(SGEMM_UNROLL_N).c
|
||||
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
|
||||
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
|
@ -149,8 +139,8 @@ SSYMMLCOPY_M = symm_lcopy_sve.c
|
|||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||
DGEMMINCOPY = gemm_ncopy_sve_v1x$(DGEMM_UNROLL_N).c
|
||||
DGEMMITCOPY = gemm_tcopy_sve_v1x$(DGEMM_UNROLL_N).c
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
|
|
@ -170,8 +160,8 @@ DSYMMLCOPY_M = symm_lcopy_sve.c
|
|||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
||||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
||||
CGEMMINCOPY = gemm_ncopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c
|
||||
CGEMMITCOPY = gemm_tcopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
|
|
@ -194,8 +184,8 @@ CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
|||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
||||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
||||
ZGEMMINCOPY = gemm_ncopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMITCOPY = gemm_tcopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
|
|
|
|||
|
|
@ -1,216 +1 @@
|
|||
SAMINKERNEL = ../arm/amin.c
|
||||
DAMINKERNEL = ../arm/amin.c
|
||||
CAMINKERNEL = ../arm/zamin.c
|
||||
ZAMINKERNEL = ../arm/zamin.c
|
||||
|
||||
SMAXKERNEL = ../arm/max.c
|
||||
DMAXKERNEL = ../arm/max.c
|
||||
|
||||
SMINKERNEL = ../arm/min.c
|
||||
DMINKERNEL = ../arm/min.c
|
||||
|
||||
ISAMINKERNEL = ../arm/iamin.c
|
||||
IDAMINKERNEL = ../arm/iamin.c
|
||||
ICAMINKERNEL = ../arm/izamin.c
|
||||
IZAMINKERNEL = ../arm/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../arm/imax.c
|
||||
IDMAXKERNEL = ../arm/imax.c
|
||||
|
||||
ISMINKERNEL = ../arm/imin.c
|
||||
IDMINKERNEL = ../arm/imin.c
|
||||
|
||||
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
TRSMCOPYLN_M = trsm_lncopy_sve.c
|
||||
TRSMCOPYLT_M = trsm_ltcopy_sve.c
|
||||
TRSMCOPYUN_M = trsm_uncopy_sve.c
|
||||
TRSMCOPYUT_M = trsm_utcopy_sve.c
|
||||
|
||||
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
|
||||
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
|
||||
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
|
||||
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
|
||||
|
||||
|
||||
SAMAXKERNEL = amax.S
|
||||
DAMAXKERNEL = amax.S
|
||||
CAMAXKERNEL = zamax.S
|
||||
ZAMAXKERNEL = zamax.S
|
||||
|
||||
SAXPYKERNEL = axpy.S
|
||||
DAXPYKERNEL = axpy.S
|
||||
CAXPYKERNEL = zaxpy.S
|
||||
ZAXPYKERNEL = zaxpy.S
|
||||
|
||||
SROTKERNEL = rot.S
|
||||
DROTKERNEL = rot.S
|
||||
CROTKERNEL = zrot.S
|
||||
ZROTKERNEL = zrot.S
|
||||
|
||||
SSCALKERNEL = scal.S
|
||||
DSCALKERNEL = scal.S
|
||||
CSCALKERNEL = zscal.S
|
||||
ZSCALKERNEL = zscal.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n.S
|
||||
DGEMVNKERNEL = gemv_n.S
|
||||
CGEMVNKERNEL = zgemv_n.S
|
||||
ZGEMVNKERNEL = zgemv_n.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t.S
|
||||
DGEMVTKERNEL = gemv_t.S
|
||||
CGEMVTKERNEL = zgemv_t.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
|
||||
SASUMKERNEL = asum.S
|
||||
DASUMKERNEL = asum.S
|
||||
CASUMKERNEL = casum.S
|
||||
ZASUMKERNEL = zasum.S
|
||||
|
||||
SCOPYKERNEL = copy.S
|
||||
DCOPYKERNEL = copy.S
|
||||
CCOPYKERNEL = copy.S
|
||||
ZCOPYKERNEL = copy.S
|
||||
|
||||
SSWAPKERNEL = swap.S
|
||||
DSWAPKERNEL = swap.S
|
||||
CSWAPKERNEL = swap.S
|
||||
ZSWAPKERNEL = swap.S
|
||||
|
||||
ISAMAXKERNEL = iamax.S
|
||||
IDAMAXKERNEL = iamax.S
|
||||
ICAMAXKERNEL = izamax.S
|
||||
IZAMAXKERNEL = izamax.S
|
||||
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
DDOTKERNEL = dot.S
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
SDOTKERNEL = ../generic/dot.c
|
||||
else
|
||||
SDOTKERNEL = dot.S
|
||||
endif
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CDOTKERNEL = zdot.S
|
||||
ZDOTKERNEL = zdot.S
|
||||
else
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = ../arm/zdot.c
|
||||
endif
|
||||
DSDOTKERNEL = dot.S
|
||||
|
||||
DGEMM_BETA = dgemm_beta.S
|
||||
SGEMM_BETA = sgemm_beta.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
DSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
DSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
||||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
CSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
||||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||
include $(KERNELDIR)/KERNEL.ARMV8SVE
|
||||
|
|
|
|||
|
|
@ -1,216 +1 @@
|
|||
SAMINKERNEL = ../arm/amin.c
|
||||
DAMINKERNEL = ../arm/amin.c
|
||||
CAMINKERNEL = ../arm/zamin.c
|
||||
ZAMINKERNEL = ../arm/zamin.c
|
||||
|
||||
SMAXKERNEL = ../arm/max.c
|
||||
DMAXKERNEL = ../arm/max.c
|
||||
|
||||
SMINKERNEL = ../arm/min.c
|
||||
DMINKERNEL = ../arm/min.c
|
||||
|
||||
ISAMINKERNEL = ../arm/iamin.c
|
||||
IDAMINKERNEL = ../arm/iamin.c
|
||||
ICAMINKERNEL = ../arm/izamin.c
|
||||
IZAMINKERNEL = ../arm/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../arm/imax.c
|
||||
IDMAXKERNEL = ../arm/imax.c
|
||||
|
||||
ISMINKERNEL = ../arm/imin.c
|
||||
IDMINKERNEL = ../arm/imin.c
|
||||
|
||||
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
TRSMCOPYLN_M = trsm_lncopy_sve.c
|
||||
TRSMCOPYLT_M = trsm_ltcopy_sve.c
|
||||
TRSMCOPYUN_M = trsm_uncopy_sve.c
|
||||
TRSMCOPYUT_M = trsm_utcopy_sve.c
|
||||
|
||||
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
|
||||
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
|
||||
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
|
||||
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
|
||||
|
||||
|
||||
SAMAXKERNEL = amax.S
|
||||
DAMAXKERNEL = amax.S
|
||||
CAMAXKERNEL = zamax.S
|
||||
ZAMAXKERNEL = zamax.S
|
||||
|
||||
SAXPYKERNEL = axpy.S
|
||||
DAXPYKERNEL = axpy.S
|
||||
CAXPYKERNEL = zaxpy.S
|
||||
ZAXPYKERNEL = zaxpy.S
|
||||
|
||||
SROTKERNEL = rot.S
|
||||
DROTKERNEL = rot.S
|
||||
CROTKERNEL = zrot.S
|
||||
ZROTKERNEL = zrot.S
|
||||
|
||||
SSCALKERNEL = scal.S
|
||||
DSCALKERNEL = scal.S
|
||||
CSCALKERNEL = zscal.S
|
||||
ZSCALKERNEL = zscal.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n.S
|
||||
DGEMVNKERNEL = gemv_n.S
|
||||
CGEMVNKERNEL = zgemv_n.S
|
||||
ZGEMVNKERNEL = zgemv_n.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t.S
|
||||
DGEMVTKERNEL = gemv_t.S
|
||||
CGEMVTKERNEL = zgemv_t.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
|
||||
SASUMKERNEL = asum.S
|
||||
DASUMKERNEL = asum.S
|
||||
CASUMKERNEL = casum.S
|
||||
ZASUMKERNEL = zasum.S
|
||||
|
||||
SCOPYKERNEL = copy.S
|
||||
DCOPYKERNEL = copy.S
|
||||
CCOPYKERNEL = copy.S
|
||||
ZCOPYKERNEL = copy.S
|
||||
|
||||
SSWAPKERNEL = swap.S
|
||||
DSWAPKERNEL = swap.S
|
||||
CSWAPKERNEL = swap.S
|
||||
ZSWAPKERNEL = swap.S
|
||||
|
||||
ISAMAXKERNEL = iamax.S
|
||||
IDAMAXKERNEL = iamax.S
|
||||
ICAMAXKERNEL = izamax.S
|
||||
IZAMAXKERNEL = izamax.S
|
||||
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
DDOTKERNEL = dot.S
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
SDOTKERNEL = ../generic/dot.c
|
||||
else
|
||||
SDOTKERNEL = dot.S
|
||||
endif
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CDOTKERNEL = zdot.S
|
||||
ZDOTKERNEL = zdot.S
|
||||
else
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = ../arm/zdot.c
|
||||
endif
|
||||
DSDOTKERNEL = dot.S
|
||||
|
||||
DGEMM_BETA = dgemm_beta.S
|
||||
SGEMM_BETA = sgemm_beta.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
DSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
DSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
||||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
CSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
||||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||
include $(KERNELDIR)/KERNEL.ARMV8SVE
|
||||
|
|
|
|||
|
|
@ -1,216 +1 @@
|
|||
SAMINKERNEL = ../arm/amin.c
|
||||
DAMINKERNEL = ../arm/amin.c
|
||||
CAMINKERNEL = ../arm/zamin.c
|
||||
ZAMINKERNEL = ../arm/zamin.c
|
||||
|
||||
SMAXKERNEL = ../arm/max.c
|
||||
DMAXKERNEL = ../arm/max.c
|
||||
|
||||
SMINKERNEL = ../arm/min.c
|
||||
DMINKERNEL = ../arm/min.c
|
||||
|
||||
ISAMINKERNEL = ../arm/iamin.c
|
||||
IDAMINKERNEL = ../arm/iamin.c
|
||||
ICAMINKERNEL = ../arm/izamin.c
|
||||
IZAMINKERNEL = ../arm/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../arm/imax.c
|
||||
IDMAXKERNEL = ../arm/imax.c
|
||||
|
||||
ISMINKERNEL = ../arm/imin.c
|
||||
IDMINKERNEL = ../arm/imin.c
|
||||
|
||||
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
TRSMCOPYLN_M = trsm_lncopy_sve.c
|
||||
TRSMCOPYLT_M = trsm_ltcopy_sve.c
|
||||
TRSMCOPYUN_M = trsm_uncopy_sve.c
|
||||
TRSMCOPYUT_M = trsm_utcopy_sve.c
|
||||
|
||||
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
|
||||
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
|
||||
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
|
||||
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
|
||||
|
||||
|
||||
SAMAXKERNEL = amax.S
|
||||
DAMAXKERNEL = amax.S
|
||||
CAMAXKERNEL = zamax.S
|
||||
ZAMAXKERNEL = zamax.S
|
||||
|
||||
SAXPYKERNEL = axpy.S
|
||||
DAXPYKERNEL = axpy.S
|
||||
CAXPYKERNEL = zaxpy.S
|
||||
ZAXPYKERNEL = zaxpy.S
|
||||
|
||||
SROTKERNEL = rot.S
|
||||
DROTKERNEL = rot.S
|
||||
CROTKERNEL = zrot.S
|
||||
ZROTKERNEL = zrot.S
|
||||
|
||||
SSCALKERNEL = scal.S
|
||||
DSCALKERNEL = scal.S
|
||||
CSCALKERNEL = zscal.S
|
||||
ZSCALKERNEL = zscal.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n.S
|
||||
DGEMVNKERNEL = gemv_n.S
|
||||
CGEMVNKERNEL = zgemv_n.S
|
||||
ZGEMVNKERNEL = zgemv_n.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t.S
|
||||
DGEMVTKERNEL = gemv_t.S
|
||||
CGEMVTKERNEL = zgemv_t.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
|
||||
SASUMKERNEL = asum.S
|
||||
DASUMKERNEL = asum.S
|
||||
CASUMKERNEL = casum.S
|
||||
ZASUMKERNEL = zasum.S
|
||||
|
||||
SCOPYKERNEL = copy.S
|
||||
DCOPYKERNEL = copy.S
|
||||
CCOPYKERNEL = copy.S
|
||||
ZCOPYKERNEL = copy.S
|
||||
|
||||
SSWAPKERNEL = swap.S
|
||||
DSWAPKERNEL = swap.S
|
||||
CSWAPKERNEL = swap.S
|
||||
ZSWAPKERNEL = swap.S
|
||||
|
||||
ISAMAXKERNEL = iamax.S
|
||||
IDAMAXKERNEL = iamax.S
|
||||
ICAMAXKERNEL = izamax.S
|
||||
IZAMAXKERNEL = izamax.S
|
||||
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
DDOTKERNEL = dot.S
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
SDOTKERNEL = ../generic/dot.c
|
||||
else
|
||||
SDOTKERNEL = dot.S
|
||||
endif
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CDOTKERNEL = zdot.S
|
||||
ZDOTKERNEL = zdot.S
|
||||
else
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = ../arm/zdot.c
|
||||
endif
|
||||
DSDOTKERNEL = dot.S
|
||||
|
||||
DGEMM_BETA = dgemm_beta.S
|
||||
SGEMM_BETA = sgemm_beta.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
DSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
DSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
||||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
CSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
||||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||
include $(KERNELDIR)/KERNEL.ARMV8SVE
|
||||
|
|
|
|||
|
|
@ -21,12 +21,15 @@ SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
|||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
SGEMVNKERNEL = sgemv_n_8_lasx.S
|
||||
SGEMVTKERNEL = sgemv_t_8_lasx.S
|
||||
|
||||
DTRSMKERNEL_LN = dtrsm_kernel_LN_16x4_lasx.S
|
||||
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_lasx.S
|
||||
DTRSMKERNEL_RN = dtrsm_kernel_RN_16x4_lasx.S
|
||||
DTRSMKERNEL_RT = dtrsm_kernel_RT_16x4_lasx.S
|
||||
endif
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
|
|
|
|||
|
|
@ -341,7 +341,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
fmadd.d $f10, $f12, $f2, $f10
|
||||
.endm
|
||||
|
||||
.macro DGEMV_N XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req
|
||||
.macro DGEMV_N_LASX XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req
|
||||
PTR_SRLI J, N, 3
|
||||
beqz J, .L_\XW\()_N_7
|
||||
PTR_SLLI K_LDA, LDA, 3
|
||||
|
|
@ -541,13 +541,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.hword .L_GAP_1_0 - .L_GAP_TABLE
|
||||
.hword .L_GAP_1_1 - .L_GAP_TABLE
|
||||
.L_GAP_0_0: /* if (inc_x == 1) && (incy == 1) */
|
||||
DGEMV_N GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1
|
||||
DGEMV_N_LASX GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1
|
||||
.L_GAP_0_1: /* if (inc_x == 1) && (incy != 1) */
|
||||
DGEMV_N GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1
|
||||
DGEMV_N_LASX GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1
|
||||
.L_GAP_1_0: /* if (inc_x != 1) && (incy == 1) */
|
||||
DGEMV_N GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1
|
||||
DGEMV_N_LASX GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1
|
||||
.L_GAP_1_1: /* if (inc_x != 1) && (incy != 1) */
|
||||
DGEMV_N GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1
|
||||
DGEMV_N_LASX GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1
|
||||
.L_END:
|
||||
pop_if_used 17 + 7, 24 + 4
|
||||
jirl $r0, $r1, 0x0
|
||||
|
|
|
|||
|
|
@ -220,7 +220,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
GMADD xvf, d, TP0, A0, X0, TP0, TP1, A2, X0, TP1
|
||||
.endm
|
||||
|
||||
.macro DGEMV_T XW:req X8:req, X4:req
|
||||
.macro DGEMV_T_LASX XW:req X8:req, X4:req
|
||||
PTR_SRLI J, N, 3
|
||||
beqz J, .L_\XW\()_N_7
|
||||
PTR_SLLI K_LDA, LDA, 3
|
||||
|
|
@ -472,9 +472,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.hword .L_GAP_0 - .L_GAP_TABLE
|
||||
.hword .L_GAP_1 - .L_GAP_TABLE
|
||||
.L_GAP_0: /* if (incx == 1) */
|
||||
DGEMV_T GAP_0, X8, X4
|
||||
DGEMV_T_LASX GAP_0, X8, X4
|
||||
.L_GAP_1: /* if (incx != 1) */
|
||||
DGEMV_T GAP_1, X8_GAP, X4_GAP
|
||||
DGEMV_T_LASX GAP_1, X8_GAP, X4_GAP
|
||||
.L_END:
|
||||
pop_if_used 17 + 8, 24 + 3
|
||||
jirl $r0, $r1, 0x0
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,959 @@
|
|||
/*******************************************************************************
|
||||
Copyright (c) 2023, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*******************************************************************************/
|
||||
#define ASSEMBLER
|
||||
|
||||
#include "common.h"
|
||||
#include "loongarch64_asm.S"
|
||||
|
||||
/*********************************************************************
|
||||
* 2023/08/26 guxiwei
|
||||
* UTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
*
|
||||
*********************************************************************/
|
||||
|
||||
/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, FLOAT *a, FLOAT *b,
|
||||
* FLOAT *c, BLASLONG ldc, BLASLONG offset)
|
||||
*/
|
||||
|
||||
#define M $r4 // param 1: bm
|
||||
#define N $r5 // param 2: bn
|
||||
#define K $r6 // param 3: bk
|
||||
#define A $r7 // param 5: ba
|
||||
#define B $r8 // param 6: bb
|
||||
#define C $r9 // param 7: bc
|
||||
#define LDC $r10 // param 8: ldc
|
||||
#define OFFSET $r11 // param 9: offset
|
||||
|
||||
/* Cycle control parameters */
|
||||
#define I $r13
|
||||
#define J $r14
|
||||
#define L $r15
|
||||
#define TL $r16
|
||||
/* Matrix address */
|
||||
#define A0 $r17
|
||||
#define B0 $r18
|
||||
#define C0 $r19
|
||||
#define C1 $r20
|
||||
#define C2 $r23
|
||||
#define C3 $r24
|
||||
#define T0 $r25
|
||||
#define T1 $r26
|
||||
#define T2 $r27
|
||||
#define KK $r28
|
||||
#define AA $r29
|
||||
#define CC $r30
|
||||
#define BB B0
|
||||
#undef ZERO
|
||||
#define ZERO $r0
|
||||
|
||||
#define U0 $xr0
|
||||
#define U1 $xr1
|
||||
#define U2 $xr2
|
||||
#define U3 $xr3
|
||||
#define U4 $xr4
|
||||
#define U5 $xr5
|
||||
#define U6 $xr6
|
||||
#define U7 $xr7
|
||||
#define U8 $xr8
|
||||
#define U9 $xr9
|
||||
#define U10 $xr10
|
||||
#define U11 $xr11
|
||||
#define U12 $xr12
|
||||
#define U13 $xr13
|
||||
#define U14 $xr14
|
||||
#define U15 $xr15
|
||||
#define D0 $xr16
|
||||
#define D1 $xr17
|
||||
#define D2 $xr18
|
||||
#define D3 $xr19
|
||||
#define D4 $xr20
|
||||
#define D5 $xr21
|
||||
#define D6 $xr22
|
||||
#define D7 $xr23
|
||||
#define D8 $xr24
|
||||
#define D9 $xr25
|
||||
#define D10 $xr26
|
||||
#define D11 $xr27
|
||||
#define D12 $xr28
|
||||
#define D13 $xr29
|
||||
#define D14 $xr30
|
||||
#define D15 $xr31
|
||||
#define G0 D0
|
||||
#define G1 D1
|
||||
#define G2 D2
|
||||
#define G3 D3
|
||||
#define G4 D4
|
||||
#define G5 D5
|
||||
#define G6 D6
|
||||
#define G7 D7
|
||||
#define G8 D8
|
||||
#define G9 D9
|
||||
#define G10 D10
|
||||
#define G11 D11
|
||||
#define G12 D12
|
||||
#define G13 D13
|
||||
#define G14 D14
|
||||
#define G15 D15
|
||||
|
||||
/* Prefetch interval */
|
||||
#define A_PRE 0x400
|
||||
#define B_PRE 0x100
|
||||
|
||||
#include "dtrsm_kernel_macro.S"
|
||||
|
||||
.macro ldrepl_macro start, end, stride
|
||||
// Load Ux (x = 0...15)
|
||||
.if \start <= \end
|
||||
GLDREPL xv, d, $xr\start, A0, \stride * 8
|
||||
ldrepl_macro %start + 1, \end, %stride + 1
|
||||
.endif
|
||||
.endm
|
||||
.macro nmsub_macro start0, end0, start1, reg
|
||||
// Gx -= reg * Ux
|
||||
.if \start0 <= \end0
|
||||
xvfnmsub.d $xr\start0, \reg, $xr\start1, $xr\start0
|
||||
nmsub_macro %start0 + 1, \end0, %start1 + 1, \reg
|
||||
.endif
|
||||
.endm
|
||||
.macro B_st_macro start, end, stride, N
|
||||
// Store Gx(x = 16...31)
|
||||
.if \start <= \end
|
||||
.if \N == 4
|
||||
xvst $xr\start, B0, \stride * 0x20
|
||||
.elseif \N == 2
|
||||
vst $vr\start, B0, \stride * 0x10
|
||||
.elseif \N == 1
|
||||
fst.d $f\start, B0, \stride * 0x08
|
||||
.endif
|
||||
B_st_macro %start + 1, \end, %stride + 1, \N
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro dsolve_16 N
|
||||
// The data layout of C (4x16) is as follows (store 4 data in each register):
|
||||
// U0 U1 U2 U3
|
||||
// U4 U5 U6 U7
|
||||
// U8 U9 U10 U11
|
||||
// U12 U13 U14 U15
|
||||
// The first step is to transpose the result of C
|
||||
GTRANSPOSE4x4_D U3, U7, U11, U15, G12, G13, G14, G15, D0, D1
|
||||
GTRANSPOSE4x4_D U2, U6, U10, U14, G8, G9, G10, G11, D0, D1
|
||||
GTRANSPOSE4x4_D U1, U5, U9, U13, G4, G5, G6, G7, U3, U7
|
||||
GTRANSPOSE4x4_D U0, U4, U8, U12, G0, G1, G2, G3, U3, U7
|
||||
// Now we have the following memory layout of C:
|
||||
// 0 1 2 3 ... 15
|
||||
// 0 | | | | | | |
|
||||
// 1 | G0 | G1 | G2 | G3 | ... | G15 |
|
||||
// 2 | | | | | | |
|
||||
// 3 | | | | | | |
|
||||
// Next we are going to process matrix A with a size of 16x16,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix A is as follows, quite large.
|
||||
//0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
||||
// 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
|
||||
// 34 35 36 37 38 39 40 41 42 43 44 45 46 47
|
||||
// 51 52 53 54 55 56 57 58 59 60 61 62 63
|
||||
// 68 69 70 71 72 73 74 75 76 77 78 79
|
||||
// 85 86 87 88 89 90 91 92 93 94 95
|
||||
// 102 103 104 105 106 107 108 109 110 111
|
||||
// 119 120 121 122 123 124 125 126 127
|
||||
// 136 137 138 139 140 141 142 143
|
||||
// 153 154 155 156 157 158 159
|
||||
// 170 171 172 173 174 175
|
||||
// 187 188 189 190 191
|
||||
// 204 205 206 207
|
||||
// 221 222 223
|
||||
// 238 239
|
||||
// 255
|
||||
// Sequentially extract data from A in row order
|
||||
// Load 0
|
||||
ldrepl_macro 0, 15, 0
|
||||
GMUL xvf, d, G0, G0, U0
|
||||
nmsub_macro 17, 31, 1, G0
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 1
|
||||
ldrepl_macro 1, 15, 0
|
||||
GMUL xvf, d, G1, G1, U1
|
||||
nmsub_macro 18, 31, 2, G1
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 2
|
||||
ldrepl_macro 2, 15, 0
|
||||
GMUL xvf, d, G2, G2, U2
|
||||
nmsub_macro 19, 31, 3, G2
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 3
|
||||
ldrepl_macro 3, 15, 0
|
||||
GMUL xvf, d, G3, G3, U3
|
||||
nmsub_macro 20, 31, 4, G3
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 4
|
||||
ldrepl_macro 4, 15, 0
|
||||
GMUL xvf, d, G4, G4, U4
|
||||
nmsub_macro 21, 31, 5, G4
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 5
|
||||
ldrepl_macro 5, 15, 0
|
||||
GMUL xvf, d, G5, G5, U5
|
||||
nmsub_macro 22, 31, 6, G5
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 6
|
||||
ldrepl_macro 6, 15, 0
|
||||
GMUL xvf, d, G6, G6, U6
|
||||
nmsub_macro 23, 31, 7, G6
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 7
|
||||
ldrepl_macro 7, 15, 0
|
||||
GMUL xvf, d, G7, G7, U7
|
||||
nmsub_macro 24, 31, 8, G7
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 8
|
||||
ldrepl_macro 8, 15, 0
|
||||
GMUL xvf, d, G8, G8, U8
|
||||
nmsub_macro 25, 31, 9, G8
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 9
|
||||
ldrepl_macro 9, 15, 0
|
||||
GMUL xvf, d, G9, G9, U9
|
||||
nmsub_macro 26, 31, 10, G9
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 10
|
||||
ldrepl_macro 10, 15, 0
|
||||
GMUL xvf, d, G10, G10, U10
|
||||
nmsub_macro 27, 31, 11, G10
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 11
|
||||
ldrepl_macro 11, 15, 0
|
||||
GMUL xvf, d, G11, G11, U11
|
||||
nmsub_macro 28, 31, 12, G11
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 12
|
||||
ldrepl_macro 12, 15, 0
|
||||
GMUL xvf, d, G12, G12, U12
|
||||
nmsub_macro 29, 31, 13, G12
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 13
|
||||
ldrepl_macro 13, 15, 0
|
||||
GMUL xvf, d, G13, G13, U13
|
||||
nmsub_macro 30, 31, 14, G13
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 14
|
||||
ldrepl_macro 14, 15, 0
|
||||
GMUL xvf, d, G14, G14, U14
|
||||
nmsub_macro 31, 31, 15, G14
|
||||
PTR_ADDI A0, A0, 17 * 8
|
||||
// Load 15
|
||||
ldrepl_macro 15, 15, 0
|
||||
GMUL xvf, d, G15, G15, U15
|
||||
// Finally, We can store the result.
|
||||
// For B, stored sequentially, and C, first transpose and then store
|
||||
B_st_macro 16, 31, 0, \N
|
||||
GTRANSPOSE4x4_D G0, G1, G2, G3, G0, G1, G2, G3, U0, U1
|
||||
GTRANSPOSE4x4_D G4, G5, G6, G7, G4, G5, G6, G7, U0, U1
|
||||
GTRANSPOSE4x4_D G8, G9, G10, G11, G8, G9, G10, G11, U0, U1
|
||||
GTRANSPOSE4x4_D G12, G13, G14, G15, G12, G13, G14, G15, U0, U1
|
||||
.if \N == 4
|
||||
GST xv, , G0, C0, 0x00, G4, C0, 0x20, G8, C0, 0x40, G12, C0, 0x60, \
|
||||
G1, C1, 0x00, G5, C1, 0x20, G9, C1, 0x40, G13, C1, 0x60, \
|
||||
G2, C2, 0x00, G6, C2, 0x20, G10, C2, 0x40, G14, C2, 0x60, \
|
||||
G3, C3, 0x00, G7, C3, 0x20, G11, C3, 0x40, G15, C3, 0x60
|
||||
.elseif \N == 2
|
||||
GST xv, , G0, C0, 0x00, G4, C0, 0x20, G8, C0, 0x40, G12, C0, 0x60, \
|
||||
G1, C1, 0x00, G5, C1, 0x20, G9, C1, 0x40, G13, C1, 0x60
|
||||
.elseif \N == 1
|
||||
GST xv, , G0, C0, 0x00, G4, C0, 0x20, G8, C0, 0x40, G12, C0, 0x60
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_16x4
|
||||
bge ZERO, KK, .L_dsolve_16x4_load
|
||||
dgemm_16x4
|
||||
b .L_dsolve_16x4
|
||||
.L_dsolve_16x4_load:
|
||||
// Load C
|
||||
GLD xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60
|
||||
GLD xv, , U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60
|
||||
GLD xv, , U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60
|
||||
GLD xv, , U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60
|
||||
/********************** solver ******************/
|
||||
.L_dsolve_16x4:
|
||||
dsolve_16 4
|
||||
.endm
|
||||
|
||||
.macro dsolve_8 N
|
||||
// The data layout of C (4x8) is as follows (store 4 data in each register):
|
||||
// U0 U1
|
||||
// U2 U3
|
||||
// U4 U5
|
||||
// U6 U7
|
||||
// The first step is to transpose the result of C
|
||||
GTRANSPOSE4x4_D U1, U3, U5, U7, G4, G5, G6, G7, G8, G9
|
||||
GTRANSPOSE4x4_D U0, U2, U4, U6, G0, G1, G2, G3, G8, G9
|
||||
// Now we have the following memory layout of C:
|
||||
// 0 1 2 3 ... 7
|
||||
// 0 | | | | | | |
|
||||
// 1 | G0 | G1 | G2 | G3 | ... | G7 |
|
||||
// 2 | | | | | | |
|
||||
// 3 | | | | | | |
|
||||
// Next we are going to process matrix A with a size of 8x8,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix A is as follows:
|
||||
//0 1 2 3 4 5 6 7
|
||||
// 9 10 11 12 13 14 15
|
||||
// 18 19 20 21 22 23
|
||||
// 27 28 29 30 31
|
||||
// 36 37 38 39
|
||||
// 45 46 47
|
||||
// 54 55
|
||||
// 63
|
||||
// Sequentially extract data from A in row order
|
||||
// Load 0
|
||||
ldrepl_macro 0, 7, 0
|
||||
GMUL xvf, d, G0, G0, U0
|
||||
nmsub_macro 17, 23, 1, G0
|
||||
PTR_ADDI A0, A0, 9 * 8
|
||||
// Load 1
|
||||
ldrepl_macro 1, 7, 0
|
||||
GMUL xvf, d, G1, G1, U1
|
||||
nmsub_macro 18, 23, 2, G1
|
||||
PTR_ADDI A0, A0, 9 * 8
|
||||
// Load 2
|
||||
ldrepl_macro 2, 7, 0
|
||||
GMUL xvf, d, G2, G2, U2
|
||||
nmsub_macro 19, 23, 3, G2
|
||||
PTR_ADDI A0, A0, 9 * 8
|
||||
// Load 3
|
||||
ldrepl_macro 3, 7, 0
|
||||
GMUL xvf, d, G3, G3, U3
|
||||
nmsub_macro 20, 23, 4, G3
|
||||
PTR_ADDI A0, A0, 9 * 8
|
||||
// Load 4
|
||||
ldrepl_macro 4, 7, 0
|
||||
GMUL xvf, d, G4, G4, U4
|
||||
nmsub_macro 21, 23, 5, G4
|
||||
PTR_ADDI A0, A0, 9 * 8
|
||||
// Load 5
|
||||
ldrepl_macro 5, 7, 0
|
||||
GMUL xvf, d, G5, G5, U5
|
||||
nmsub_macro 22, 23, 6, G5
|
||||
PTR_ADDI A0, A0, 9 * 8
|
||||
// Load 6
|
||||
ldrepl_macro 6, 7, 0
|
||||
GMUL xvf, d, G6, G6, U6
|
||||
nmsub_macro 23, 23, 7, G6
|
||||
PTR_ADDI A0, A0, 9 * 8
|
||||
// Load 7
|
||||
ldrepl_macro 7, 7, 0
|
||||
GMUL xvf, d, G7, G7, U7
|
||||
// Finally, We can store the result.
|
||||
// For B, stored sequentially, and C, first transpose and then store
|
||||
B_st_macro 16, 23, 0, \N
|
||||
GTRANSPOSE4x4_D G0, G1, G2, G3, G0, G1, G2, G3, U0, U1
|
||||
GTRANSPOSE4x4_D G4, G5, G6, G7, G4, G5, G6, G7, U0, U1
|
||||
.if \N == 4
|
||||
GST xv, , G0, C0, 0x00, G4, C0, 0x20, \
|
||||
G1, C1, 0x00, G5, C1, 0x20, \
|
||||
G2, C2, 0x00, G6, C2, 0x20, \
|
||||
G3, C3, 0x00, G7, C3, 0x20
|
||||
.elseif \N == 2
|
||||
GST xv, , G0, C0, 0x00, G4, C0, 0x20, \
|
||||
G1, C1, 0x00, G5, C1, 0x20
|
||||
.elseif \N == 1
|
||||
GST xv, , G0, C0, 0x00, G4, C0, 0x20
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_8x4
|
||||
bge ZERO, L, .L_dsolve_8x4_load
|
||||
dgemm_8x4
|
||||
b .L_dsolve_8x4
|
||||
.L_dsolve_8x4_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
|
||||
/* Load C1 */
|
||||
xvld U2, C1, 0x00
|
||||
xvld U3, C1, 0x20
|
||||
|
||||
/* Load C2 */
|
||||
xvld U4, C2, 0x00
|
||||
xvld U5, C2, 0x20
|
||||
|
||||
/* Load C3 */
|
||||
xvld U6, C3, 0x00
|
||||
xvld U7, C3, 0x20
|
||||
/********* solver *********/
|
||||
.L_dsolve_8x4:
|
||||
dsolve_8 4
|
||||
.endm
|
||||
|
||||
.macro dsolve_4 N
|
||||
// The data layout of C (4x4) is as follows (store 4 data in each register):
|
||||
// U0
|
||||
// U1
|
||||
// U2
|
||||
// U3
|
||||
// The first step is to transpose the result of C
|
||||
GTRANSPOSE4x4_D U0, U1, U2, U3, G0, G1, G2, G3, G4, G5
|
||||
// Now we have the following memory layout of C:
|
||||
// 0 1 2 3
|
||||
// 0 | | | | |
|
||||
// 1 | G0 | G1 | G2 | G3 |
|
||||
// 2 | | | | |
|
||||
// 3 | | | | |
|
||||
// Next we are going to process matrix A with a size of 4x4,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix A is as follows:
|
||||
//0 1 2 3
|
||||
// 5 6 7
|
||||
// 10 11
|
||||
// 15
|
||||
// Sequentially extract data from A in row order
|
||||
// Load 0
|
||||
ldrepl_macro 0, 3, 0
|
||||
GMUL xvf, d, G0, G0, U0
|
||||
nmsub_macro 17, 19, 1, G0
|
||||
PTR_ADDI A0, A0, 5 * 8
|
||||
// Load 1
|
||||
ldrepl_macro 1, 3, 0
|
||||
GMUL xvf, d, G1, G1, U1
|
||||
nmsub_macro 18, 19, 2, G1
|
||||
PTR_ADDI A0, A0, 5 * 8
|
||||
// Load 2
|
||||
ldrepl_macro 2, 3, 0
|
||||
GMUL xvf, d, G2, G2, U2
|
||||
nmsub_macro 19, 19, 3, G2
|
||||
PTR_ADDI A0, A0, 5 * 8
|
||||
// Load 3
|
||||
ldrepl_macro 3, 3, 0
|
||||
GMUL xvf, d, G3, G3, U3
|
||||
// Finally, We can store the result.
|
||||
// For B, stored sequentially, and C, first transpose and then store
|
||||
B_st_macro 16, 19, 0, \N
|
||||
GTRANSPOSE4x4_D G0, G1, G2, G3, G0, G1, G2, G3, U0, U1
|
||||
.if \N == 4
|
||||
GST xv, , G0, C0, 0x00, G1, C1, 0x00, G2, C2, 0x00, G3, C3, 0x00
|
||||
.elseif \N == 2
|
||||
GST xv, , G0, C0, 0x00, G1, C1, 0x00
|
||||
.elseif \N == 1
|
||||
GST xv, , G0, C0, 0x00
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_4x4
|
||||
bge ZERO, L, .L_dsolve_4x4_load
|
||||
dgemm_4x4
|
||||
b .L_dsolve_4x4
|
||||
.L_dsolve_4x4_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
/* Load C1 */
|
||||
xvld U1, C1, 0x00
|
||||
/* Load C2 */
|
||||
xvld U2, C2, 0x00
|
||||
/* Load C3 */
|
||||
xvld U3, C3, 0x00
|
||||
/************** solver *****************/
|
||||
.L_dsolve_4x4:
|
||||
dsolve_4 4
|
||||
.endm
|
||||
|
||||
.macro dsolve_2 N
|
||||
// Transpose
|
||||
GSBUTTERFLY xv, d, G0, G1, U1, U0
|
||||
// Now we have the following memory layout of C:
|
||||
// 0 1
|
||||
// 0 | | |
|
||||
// 1 | G0 | G1 |
|
||||
// 2 | | |
|
||||
// 3 | | |
|
||||
// Next we are going to process matrix A with a size of 2x2,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix A is as follows:
|
||||
//0 1
|
||||
// 3
|
||||
// Sequentially extract data from A in row order
|
||||
// Load 0
|
||||
ldrepl_macro 0, 1, 0
|
||||
GMUL xvf, d, G0, G0, U0
|
||||
nmsub_macro 17, 17, 1, G0
|
||||
PTR_ADDI A0, A0, 3 * 8
|
||||
// Load 1
|
||||
ldrepl_macro 1, 1, 0
|
||||
GMUL xvf, d, G1, G1, U1
|
||||
// Finally, We can store the result.
|
||||
// For B, stored sequentially, and C, first transpose and then store
|
||||
B_st_macro 16, 17, 0, \N
|
||||
GSBUTTERFLY xv, d, U0, U1, G1, G0
|
||||
.if \N == 4
|
||||
vst $vr0, C0, 0x00
|
||||
vst $vr1, C1, 0x00
|
||||
xvstelm.d U0, C2, 0x00, 0x02
|
||||
xvstelm.d U1, C3, 0x00, 0x02
|
||||
xvstelm.d U0, C2, 0x08, 0x03
|
||||
xvstelm.d U1, C3, 0x08, 0x03
|
||||
.elseif \N == 2
|
||||
vst $vr0, C0, 0x00
|
||||
vst $vr1, C1, 0x00
|
||||
.elseif \N == 1
|
||||
vst $vr0, C0, 0x00
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_2x4
|
||||
bge ZERO, L, .L_dsolve_2x4_load
|
||||
dgemm_2x4
|
||||
b .L_dsolve_2x4
|
||||
.L_dsolve_2x4_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
/* Load C1 */
|
||||
xvld U1, C1, 0x00
|
||||
/* Load C2 */
|
||||
xvld U2, C2, 0x00
|
||||
/* Load C3 */
|
||||
xvld U3, C3, 0x00
|
||||
|
||||
xvpermi.q U0, U2, 0x02
|
||||
xvpermi.q U1, U3, 0x02
|
||||
/********************** solver ******************/
|
||||
.L_dsolve_2x4:
|
||||
dsolve_2 4
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_1x4
|
||||
bge ZERO, L, .L_dsolve_1x4_load
|
||||
dgemm_1x4
|
||||
b .L_dsolve_1x4
|
||||
.L_dsolve_1x4_load:
|
||||
// Load C
|
||||
fld.d $f0, C0, 0x00
|
||||
fld.d $f1, C1, 0x00
|
||||
fld.d $f2, C2, 0x00
|
||||
fld.d $f3, C3, 0x00
|
||||
xvinsve0.d U0, U1, 0x01
|
||||
xvinsve0.d U0, U2, 0x02
|
||||
xvinsve0.d U0, U3, 0x03
|
||||
.L_dsolve_1x4:
|
||||
GLDREPL xv, d, D0, A0, 0x00
|
||||
GMUL xvf, d, U0, U0, D0
|
||||
// Store C
|
||||
xvstelm.d U0, C0, 0x00, 0x00
|
||||
xvstelm.d U0, C1, 0x00, 0x01
|
||||
xvstelm.d U0, C2, 0x00, 0x02
|
||||
xvstelm.d U0, C3, 0x00, 0x03
|
||||
// Store B
|
||||
xvst U0, B0, 0x00
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_16x2
|
||||
bge ZERO, L, .L_dsolve_16x2_load
|
||||
dgemm_16x2
|
||||
b .L_dsolve_16x2
|
||||
.L_dsolve_16x2_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
xvld U2, C0, 0x40
|
||||
xvld U3, C0, 0x60
|
||||
/* Load C1 */
|
||||
xvld U4, C1, 0x00
|
||||
xvld U5, C1, 0x20
|
||||
xvld U6, C1, 0x40
|
||||
xvld U7, C1, 0x60
|
||||
.L_dsolve_16x2:
|
||||
dsolve_16 2
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_8x2
|
||||
bge ZERO, L, .L_dsolve_8x2_load
|
||||
dgemm_8x2
|
||||
b .L_dsolve_8x2
|
||||
.L_dsolve_8x2_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
/* Load C1 */
|
||||
xvld U2, C1, 0x00
|
||||
xvld U3, C1, 0x20
|
||||
.L_dsolve_8x2:
|
||||
dsolve_8 2
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_4x2
|
||||
bge ZERO, L, .L_dsolve_4x2_load
|
||||
dgemm_4x2
|
||||
b .L_dsolve_4x2
|
||||
.L_dsolve_4x2_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
/* Load C1 */
|
||||
xvld U1, C1, 0x00
|
||||
.L_dsolve_4x2:
|
||||
dsolve_4 2
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_1x2
|
||||
bge ZERO, L, .L_dsolve_1x2_load
|
||||
dgemm_1x2
|
||||
b .L_dsolve_1x2
|
||||
.L_dsolve_1x2_load:
|
||||
// Load C
|
||||
fld.d $f0, C0, 0x00
|
||||
fld.d $f1, C1, 0x00
|
||||
xvinsve0.d U0, U1, 0x01
|
||||
.L_dsolve_1x2:
|
||||
GLDREPL xv, d, D0, A0, 0x00
|
||||
GMUL xvf, d, U0, U0, D0
|
||||
// Store C
|
||||
xvstelm.d U0, C0, 0x00, 0x00
|
||||
xvstelm.d U0, C1, 0x00, 0x01
|
||||
// Store B
|
||||
vst $vr0, B0, 0x00
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_2x2
|
||||
bge ZERO, L, .L_dsolve_2x2_load
|
||||
dgemm_2x2
|
||||
b .L_dsolve_2x2
|
||||
.L_dsolve_2x2_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
/* Load C1 */
|
||||
xvld U1, C1, 0x00
|
||||
.L_dsolve_2x2:
|
||||
dsolve_2 2
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_16x1
|
||||
bge ZERO, L, .L_dsolve_16x1_load
|
||||
dgemm_16x1
|
||||
b .L_dsolve_16x1
|
||||
.L_dsolve_16x1_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
xvld U2, C0, 0x40
|
||||
xvld U3, C0, 0x60
|
||||
.L_dsolve_16x1:
|
||||
dsolve_16 1
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_8x1
|
||||
bge ZERO, L, .L_dsolve_8x1_load
|
||||
dgemm_8x1
|
||||
b .L_dsolve_8x1
|
||||
.L_dsolve_8x1_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
.L_dsolve_8x1:
|
||||
dsolve_8 1
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_4x1
|
||||
bge ZERO, L, .L_dsolve_4x1_load
|
||||
dgemm_4x1
|
||||
b .L_dsolve_4x1
|
||||
.L_dsolve_4x1_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
.L_dsolve_4x1:
|
||||
dsolve_4 1
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_2x1
|
||||
bge ZERO, L, .L_dsolve_2x1_load
|
||||
dgemm_2x1
|
||||
b .L_dsolve_2x1
|
||||
.L_dsolve_2x1_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
.L_dsolve_2x1:
|
||||
dsolve_2 1
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_1x1
|
||||
bge ZERO, L, .L_dsolve_1x1_load
|
||||
dgemm_1x1
|
||||
b .L_dsolve_1x1
|
||||
.L_dsolve_1x1_load:
|
||||
// Load C
|
||||
fld.d $f0, C0, 0x00
|
||||
.L_dsolve_1x1:
|
||||
GLDREPL xv, d, D0, A0, 0x00
|
||||
GMUL xvf, d, U0, U0, D0
|
||||
// Store C
|
||||
xvstelm.d U0, C0, 0x00, 0x00
|
||||
// Store B
|
||||
xvstelm.d U0, B0, 0x00, 0x00
|
||||
.endm
|
||||
|
||||
PROLOGUE
|
||||
push_if_used 26, 32
|
||||
PTR_SLLI LDC, LDC, 3
|
||||
/* if (!(N >> 2)) goto L_N3 */
|
||||
PTR_SRAI J, N, 2 /* J = bn >> 2 */
|
||||
andi N, N, 0x03
|
||||
beq ZERO, J, .L_N3
|
||||
.align 5
|
||||
.L_J1:
|
||||
PTR_ADDI J, J, -1
|
||||
move KK, OFFSET
|
||||
move AA, A
|
||||
move CC, C
|
||||
PTR_SRAI I, M, 4 // M >> 4
|
||||
beqz I, .L_M15
|
||||
.align 4
|
||||
.L_I1:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_16x4
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_SLLI T0, K, 7
|
||||
PTR_ADDI CC, CC, 0x80 // cc += 16
|
||||
PTR_ADDI KK, KK, 0x10 // kk += 16
|
||||
PTR_ADD AA, AA, T0 // aa += 16 * k
|
||||
bnez I, .L_I1
|
||||
.L_M15:
|
||||
andi I, M, 8
|
||||
beqz I, .L_M7
|
||||
.L_M8:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_8x4
|
||||
PTR_SLLI T0, K, 6
|
||||
PTR_ADDI CC, CC, 0x40 // cc += 8
|
||||
PTR_ADDI KK, KK, 0x08 // kk += 8
|
||||
PTR_ADD AA, AA, T0 // aa += 8 * k
|
||||
.L_M7:
|
||||
andi I, M, 4
|
||||
beqz I, .L_M3
|
||||
.L_M4:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_4x4
|
||||
PTR_SLLI T0, K, 5
|
||||
PTR_ADDI CC, CC, 0x20 // cc += 4
|
||||
PTR_ADDI KK, KK, 0x04 // kk += 4
|
||||
PTR_ADD AA, AA, T0 // aa += 4 * k
|
||||
.L_M3:
|
||||
andi I, M, 2
|
||||
beqz I, .L_M1
|
||||
.L_M2:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_2x4
|
||||
PTR_SLLI T0, K, 4
|
||||
PTR_ADDI CC, CC, 0x10 // cc += 2
|
||||
PTR_ADDI KK, KK, 0x02 // kk += 2
|
||||
PTR_ADD AA, AA, T0 // aa += 2 * k
|
||||
.L_M1:
|
||||
andi I, M, 1
|
||||
beqz I, .L_M0
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_1x4
|
||||
PTR_SLLI T0, K, 3
|
||||
PTR_ADDI CC, CC, 0x08 // cc += 1
|
||||
PTR_ADDI KK, KK, 0x01 // kk += 1
|
||||
PTR_ADD AA, AA, T0 // aa += 1 * k
|
||||
.L_M0:
|
||||
PTR_SLLI T0, K, 5
|
||||
PTR_SLLI T1, LDC, 2
|
||||
PTR_ADD B, B, T0 // b += 4 * k
|
||||
PTR_ADD C, C, T1 // c += 4 * ldc
|
||||
bnez J, .L_J1
|
||||
.L_N3:
|
||||
andi J, N, 2
|
||||
beq ZERO, J, .L_N1
|
||||
.L_N2:
|
||||
move KK, OFFSET
|
||||
move AA, A
|
||||
move CC, C
|
||||
PTR_SRAI I, M, 4 // M >> 4
|
||||
beqz I, .L_N2_M15
|
||||
.align 4
|
||||
.L_N2_I1:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_16x2
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_SLLI T0, K, 7
|
||||
PTR_ADDI CC, CC, 0x80 // cc += 16
|
||||
PTR_ADDI KK, KK, 0x10 // kk += 16
|
||||
PTR_ADD AA, AA, T0 // aa += 16 * k
|
||||
bnez I, .L_N2_I1
|
||||
.L_N2_M15:
|
||||
andi I, M, 8
|
||||
beqz I, .L_N2_M7
|
||||
.L_N2_M8:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_8x2
|
||||
PTR_SLLI T0, K, 6
|
||||
PTR_ADDI CC, CC, 0x40 // cc += 8
|
||||
PTR_ADDI KK, KK, 0x08 // kk += 8
|
||||
PTR_ADD AA, AA, T0 // aa += 8 * k
|
||||
.L_N2_M7:
|
||||
andi I, M, 4
|
||||
beqz I, .L_N2_M3
|
||||
.L_N2_M4:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_4x2
|
||||
PTR_SLLI T0, K, 5
|
||||
PTR_ADDI CC, CC, 0x20 // cc += 4
|
||||
PTR_ADDI KK, KK, 0x04 // kk += 4
|
||||
PTR_ADD AA, AA, T0 // aa += 4 * k
|
||||
.L_N2_M3:
|
||||
andi I, M, 2
|
||||
beqz I, .L_N2_M1
|
||||
.L_N2_M2:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_2x2
|
||||
PTR_SLLI T0, K, 4
|
||||
PTR_ADDI CC, CC, 0x10 // cc += 2
|
||||
PTR_ADDI KK, KK, 0x02 // kk += 2
|
||||
PTR_ADD AA, AA, T0 // aa += 2 * k
|
||||
.L_N2_M1:
|
||||
andi I, M, 1
|
||||
beqz I, .L_N2_M0
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_1x2
|
||||
PTR_SLLI T0, K, 3
|
||||
PTR_ADDI CC, CC, 0x08 // cc += 1
|
||||
PTR_ADDI KK, KK, 0x01 // kk += 1
|
||||
PTR_ADD AA, AA, T0 // aa += 1 * k
|
||||
.L_N2_M0:
|
||||
PTR_SLLI T0, K, 4
|
||||
PTR_SLLI T1, LDC, 1
|
||||
PTR_ADD B, B, T0 // b += 2 * k
|
||||
PTR_ADD C, C, T1 // c += 2 * ldc
|
||||
.L_N1:
|
||||
andi J, N, 1
|
||||
beq ZERO, J, .L_N0
|
||||
|
||||
move KK, OFFSET
|
||||
move AA, A
|
||||
move CC, C
|
||||
PTR_SRAI I, M, 4 // M >> 4
|
||||
beqz I, .L_N1_M15
|
||||
.align 4
|
||||
.L_N1_I1:
|
||||
GADD , d, C0, CC, ZERO
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_16x1
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_SLLI T0, K, 7
|
||||
PTR_ADDI CC, CC, 0x80 // cc += 16
|
||||
PTR_ADDI KK, KK, 0x10 // kk += 16
|
||||
PTR_ADD AA, AA, T0 // aa += 16 * k
|
||||
bnez I, .L_N1_I1
|
||||
.L_N1_M15:
|
||||
andi I, M, 8
|
||||
beqz I, .L_N1_M7
|
||||
.L_N1_M8:
|
||||
GADD , d, C0, CC, ZERO
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_8x1
|
||||
PTR_SLLI T0, K, 6
|
||||
PTR_ADDI CC, CC, 0x40 // cc += 8
|
||||
PTR_ADDI KK, KK, 0x08 // kk += 8
|
||||
PTR_ADD AA, AA, T0 // aa += 8 * k
|
||||
.L_N1_M7:
|
||||
andi I, M, 4
|
||||
beqz I, .L_N1_M3
|
||||
.L_N1_M4:
|
||||
GADD , d, C0, CC, ZERO
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_4x1
|
||||
PTR_SLLI T0, K, 5
|
||||
PTR_ADDI CC, CC, 0x20 // cc += 4
|
||||
PTR_ADDI KK, KK, 0x04 // kk += 4
|
||||
PTR_ADD AA, AA, T0 // aa += 4 * k
|
||||
.L_N1_M3:
|
||||
andi I, M, 2
|
||||
beqz I, .L_N1_M1
|
||||
.L_N1_M2:
|
||||
GADD , d, C0, CC, ZERO
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_2x1
|
||||
PTR_SLLI T0, K, 4
|
||||
PTR_ADDI CC, CC, 0x10 // cc += 2
|
||||
PTR_ADDI KK, KK, 0x02 // kk += 2
|
||||
PTR_ADD AA, AA, T0 // aa += 2 * k
|
||||
.L_N1_M1:
|
||||
andi I, M, 1
|
||||
beqz I, .L_N1_M0
|
||||
GADD , d, C0, CC, ZERO
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_1x1
|
||||
PTR_SLLI T0, K, 3
|
||||
PTR_ADDI CC, CC, 0x08 // cc += 1
|
||||
PTR_ADDI KK, KK, 0x01 // kk += 1
|
||||
PTR_ADD AA, AA, T0 // aa += 1 * k
|
||||
.L_N1_M0:
|
||||
.L_N0:
|
||||
pop_if_used 26, 32
|
||||
jirl $r0, $r1, 0x0
|
||||
EPILOGUE
|
||||
|
|
@ -0,0 +1,882 @@
|
|||
/*******************************************************************************
|
||||
Copyright (c) 2023, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*******************************************************************************/
|
||||
#define ASSEMBLER
|
||||
|
||||
#include "common.h"
|
||||
#include "loongarch64_asm.S"
|
||||
|
||||
/*********************************************************************
|
||||
* 2023/09/26 guxiwei
|
||||
* UTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
*
|
||||
*********************************************************************/
|
||||
|
||||
/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, FLOAT *a, FLOAT *b,
|
||||
* FLOAT *c, BLASLONG ldc, BLASLONG offset)
|
||||
*/
|
||||
|
||||
#define M $r4 // param 1: bm
|
||||
#define N $r5 // param 2: bn
|
||||
#define K $r6 // param 3: bk
|
||||
#define A $r7 // param 5: ba
|
||||
#define B $r8 // param 6: bb
|
||||
#define C $r9 // param 7: bc
|
||||
#define LDC $r10 // param 8: ldc
|
||||
#define OFFSET $r11 // param 9: offset
|
||||
|
||||
/* Cycle control parameters */
|
||||
#define I $r13
|
||||
#define J $r14
|
||||
#define L $r15
|
||||
#define TL $r16
|
||||
/* Matrix address */
|
||||
#define A0 $r17
|
||||
#define B0 $r18
|
||||
#define C0 $r19
|
||||
#define C1 $r20
|
||||
#define C2 $r23
|
||||
#define C3 $r24
|
||||
#define T0 $r25
|
||||
#define T1 $r26
|
||||
#define T2 $r27
|
||||
#define KK $r28
|
||||
#define AA $r29
|
||||
#define CC $r30
|
||||
#define BB B0
|
||||
#undef ZERO
|
||||
#define ZERO $r0
|
||||
|
||||
#define U0 $xr0
|
||||
#define U1 $xr1
|
||||
#define U2 $xr2
|
||||
#define U3 $xr3
|
||||
#define U4 $xr4
|
||||
#define U5 $xr5
|
||||
#define U6 $xr6
|
||||
#define U7 $xr7
|
||||
#define U8 $xr8
|
||||
#define U9 $xr9
|
||||
#define U10 $xr10
|
||||
#define U11 $xr11
|
||||
#define U12 $xr12
|
||||
#define U13 $xr13
|
||||
#define U14 $xr14
|
||||
#define U15 $xr15
|
||||
#define D0 $xr16
|
||||
#define D1 $xr17
|
||||
#define D2 $xr18
|
||||
#define D3 $xr19
|
||||
#define D4 $xr20
|
||||
#define D5 $xr21
|
||||
#define D6 $xr22
|
||||
#define D7 $xr23
|
||||
#define D8 $xr24
|
||||
#define D9 $xr25
|
||||
#define D10 $xr26
|
||||
#define D11 $xr27
|
||||
#define D12 $xr28
|
||||
#define D13 $xr29
|
||||
#define D14 $xr30
|
||||
#define D15 $xr31
|
||||
#define G0 D0
|
||||
#define G1 D1
|
||||
#define G2 D2
|
||||
#define G3 D3
|
||||
#define G4 D4
|
||||
#define G5 D5
|
||||
#define G6 D6
|
||||
#define G7 D7
|
||||
#define G8 D8
|
||||
#define G9 D9
|
||||
#define G10 D10
|
||||
#define G11 D11
|
||||
#define G12 D12
|
||||
#define G13 D13
|
||||
#define G14 D14
|
||||
#define G15 D15
|
||||
|
||||
/* Prefetch interval */
|
||||
#define A_PRE 0x400
|
||||
#define B_PRE 0x100
|
||||
|
||||
#include "dtrsm_kernel_macro.S"
|
||||
|
||||
.macro ldrepl_macro start, end, stride
|
||||
// Load Ux (x = 0...15)
|
||||
.if \start <= \end
|
||||
GLDREPL xv, d, $xr\start, B0, \stride * 8
|
||||
ldrepl_macro %start + 1, \end, %stride + 1
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro nmsub_macro start0, end0, start1, reg
|
||||
// Ux -= reg * Dx
|
||||
.if \start0 <= \end0
|
||||
xvfnmsub.d $xr\start0, \reg, $xr\start1, $xr\start0
|
||||
nmsub_macro %start0 + 1, \end0, %start1 + 1, \reg
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro A_st_macro start, end, stride, N
|
||||
// Store Ux(x = 0...15)
|
||||
.if \start <= \end
|
||||
.if \N == 4
|
||||
xvst $xr\start, A0, \stride * 0x20
|
||||
.elseif \N == 2
|
||||
vst $vr\start, A0, \stride * 0x10
|
||||
.elseif \N == 1
|
||||
fst.d $f\start, A0, \stride * 0x08
|
||||
.endif
|
||||
A_st_macro %start + 1, \end, %stride + 1, \N
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro dsolve_16x4
|
||||
// We are going to process matrix B with a size of 4x4,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0 1 2 3
|
||||
// 5 6 7
|
||||
// 10 11
|
||||
// 15
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 19, 0
|
||||
GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3
|
||||
ldrepl_macro 20, 22, 5
|
||||
nmsub_macro 4, 7, 0, D1
|
||||
ldrepl_macro 23, 24, 10
|
||||
GMUL xvf, d, U4, D4, U4, U5, D4, U5, U6, D4, U6, U7, D4, U7
|
||||
ldrepl_macro 25, 25, 15
|
||||
nmsub_macro 8, 11, 0, D2
|
||||
nmsub_macro 8, 11, 4, D5
|
||||
GMUL xvf, d, U8, D7, U8, U9, D7, U9, U10, D7, U10, U11, D7, U11
|
||||
nmsub_macro 12, 15, 0, D3
|
||||
nmsub_macro 12, 15, 4, D6
|
||||
nmsub_macro 12, 15, 8, D8
|
||||
GMUL xvf, d, U12, D9, U12, U13, D9, U13, U14, D9, U14, U15, D9, U15
|
||||
// Store A
|
||||
A_st_macro 0, 15, 0, 4
|
||||
// Store C
|
||||
GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \
|
||||
U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60, \
|
||||
U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60, \
|
||||
U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60
|
||||
.endm
|
||||
|
||||
.macro dsolve_16x2
|
||||
// We are going to process matrix B with a size of 2x2,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0 1
|
||||
// 3
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 17, 0
|
||||
GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3
|
||||
ldrepl_macro 18, 18, 3
|
||||
nmsub_macro 4, 7, 0, D1
|
||||
GMUL xvf, d, U4, D2, U4, U5, D2, U5, U6, D2, U6, U7, D2, U7
|
||||
// Store A
|
||||
A_st_macro 0, 7, 0, 4
|
||||
// Store C
|
||||
GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \
|
||||
U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60
|
||||
.endm
|
||||
|
||||
.macro dsolve_8x4
|
||||
// We are going to process matrix B with a size of 4x4,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0 1 2 3
|
||||
// 5 6 7
|
||||
// 10 11
|
||||
// 15
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 19, 0
|
||||
GMUL xvf, d, U0, D0, U0, U1, D0, U1
|
||||
ldrepl_macro 20, 22, 5
|
||||
nmsub_macro 2, 3, 0, D1
|
||||
ldrepl_macro 23, 24, 10
|
||||
GMUL xvf, d, U2, D4, U2, U3, D4, U3
|
||||
ldrepl_macro 25, 25, 15
|
||||
nmsub_macro 4, 5, 0, D2
|
||||
nmsub_macro 4, 5, 2, D5
|
||||
GMUL xvf, d, U4, D7, U4, U5, D7, U5
|
||||
nmsub_macro 6, 7, 0, D3
|
||||
nmsub_macro 6, 7, 2, D6
|
||||
nmsub_macro 6, 7, 4, D8
|
||||
GMUL xvf, d, U6, D9, U6, U7, D9, U7
|
||||
// Store A
|
||||
A_st_macro 0, 7, 0, 4
|
||||
// Store C
|
||||
GST xv, , U0, C0, 0x00, U1, C0, 0x20, \
|
||||
U2, C1, 0x00, U3, C1, 0x20, \
|
||||
U4, C2, 0x00, U5, C2, 0x20, \
|
||||
U6, C3, 0x00, U7, C3, 0x20
|
||||
.endm
|
||||
|
||||
.macro dsolve_8x2
|
||||
// We are going to process matrix B with a size of 2x2,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0 1
|
||||
// 3
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 17, 0
|
||||
GMUL xvf, d, U0, D0, U0, U1, D0, U1
|
||||
ldrepl_macro 18, 18, 3
|
||||
nmsub_macro 2, 3, 0, D1
|
||||
GMUL xvf, d, U2, D2, U2, U3, D2, U3
|
||||
// Store A
|
||||
A_st_macro 0, 3, 0, 4
|
||||
// Store C
|
||||
GST xv, , U0, C0, 0x00, U1, C0, 0x20, \
|
||||
U2, C1, 0x00, U3, C1, 0x20
|
||||
.endm
|
||||
|
||||
.macro dsolve_4x4
|
||||
// We are going to process matrix B with a size of 4x4,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0 1 2 3
|
||||
// 5 6 7
|
||||
// 10 11
|
||||
// 15
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 19, 0
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
ldrepl_macro 20, 22, 5
|
||||
nmsub_macro 1, 1, 0, D1
|
||||
ldrepl_macro 23, 24, 10
|
||||
GMUL xvf, d, U1, D4, U1
|
||||
ldrepl_macro 25, 25, 15
|
||||
nmsub_macro 2, 2, 0, D2
|
||||
nmsub_macro 2, 2, 1, D5
|
||||
GMUL xvf, d, U2, D7, U2
|
||||
nmsub_macro 3, 3, 0, D3
|
||||
nmsub_macro 3, 3, 1, D6
|
||||
nmsub_macro 3, 3, 2, D8
|
||||
GMUL xvf, d, U3, D9, U3
|
||||
// Store A
|
||||
A_st_macro 0, 3, 0, 4
|
||||
// Store C
|
||||
GST xv, , U0, C0, 0x00, U1, C1, 0x00, U2, C2, 0x00, U3, C3, 0x00
|
||||
.endm
|
||||
|
||||
.macro dsolve_4x2
|
||||
// We are going to process matrix B with a size of 2x2,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0 1
|
||||
// 3
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 17, 0
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
ldrepl_macro 18, 18, 3
|
||||
nmsub_macro 1, 1, 0, D1
|
||||
GMUL xvf, d, U1, D2, U1
|
||||
// Store A
|
||||
A_st_macro 0, 1, 0, 4
|
||||
// Store C
|
||||
GST xv, , U0, C0, 0x00, U1, C1, 0x00
|
||||
.endm
|
||||
|
||||
.macro dsolve_2x4
|
||||
// We are going to process matrix B with a size of 4x4,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0 1 2 3
|
||||
// 5 6 7
|
||||
// 10 11
|
||||
// 15
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 19, 0
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
ldrepl_macro 20, 22, 5
|
||||
nmsub_macro 1, 1, 0, D1
|
||||
ldrepl_macro 23, 24, 10
|
||||
GMUL xvf, d, U1, D4, U1
|
||||
|
||||
ldrepl_macro 25, 25, 15
|
||||
nmsub_macro 2, 2, 0, D2
|
||||
nmsub_macro 2, 2, 1, D5
|
||||
GMUL xvf, d, U2, D7, U2
|
||||
nmsub_macro 3, 3, 0, D3
|
||||
nmsub_macro 3, 3, 1, D6
|
||||
nmsub_macro 3, 3, 2, D8
|
||||
GMUL xvf, d, U3, D9, U3
|
||||
// Store A
|
||||
A_st_macro 0, 3, 0, 2
|
||||
// Store C
|
||||
GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00, $vr2, C2, 0x00, $vr3, C3, 0x00,
|
||||
.endm
|
||||
|
||||
.macro dsolve_2x2
|
||||
// We are going to process matrix B with a size of 2x2,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0 1
|
||||
// 3
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 17, 0
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
ldrepl_macro 18, 18, 3
|
||||
nmsub_macro 1, 1, 0, D1
|
||||
GMUL xvf, d, U1, D2, U1
|
||||
// Store A
|
||||
A_st_macro 0, 1, 0, 2
|
||||
// Store C
|
||||
GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00
|
||||
.endm
|
||||
|
||||
.macro dsolve_1x4
|
||||
// We are going to process matrix B with a size of 4x4,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0 1 2 3
|
||||
// 5 6 7
|
||||
// 10 11
|
||||
// 15
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 19, 0
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
ldrepl_macro 20, 22, 5
|
||||
nmsub_macro 1, 1, 0, D1
|
||||
ldrepl_macro 23, 24, 10
|
||||
GMUL xvf, d, U1, D4, U1
|
||||
|
||||
ldrepl_macro 25, 25, 15
|
||||
nmsub_macro 2, 2, 0, D2
|
||||
nmsub_macro 2, 2, 1, D5
|
||||
GMUL xvf, d, U2, D7, U2
|
||||
nmsub_macro 3, 3, 0, D3
|
||||
nmsub_macro 3, 3, 1, D6
|
||||
nmsub_macro 3, 3, 2, D8
|
||||
GMUL xvf, d, U3, D9, U3
|
||||
// Store A
|
||||
A_st_macro 0, 3, 0, 1
|
||||
// Store C
|
||||
GST f, d, $f0, C0, 0x00, $f1, C1, 0x00, $f2, C2, 0x00, $f3, C3, 0x00,
|
||||
.endm
|
||||
|
||||
.macro dsolve_1x2
|
||||
// We are going to process matrix B with a size of 2x2,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0 1
|
||||
// 3
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 17, 0
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
ldrepl_macro 18, 18, 3
|
||||
nmsub_macro 1, 1, 0, D1
|
||||
GMUL xvf, d, U1, D2, U1
|
||||
// Store A
|
||||
A_st_macro 0, 1, 0, 1
|
||||
// Store C
|
||||
GST f, d, $f0, C0, 0x00, $f1, C1, 0x00
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_16x4
|
||||
bge ZERO, L, .L_dsolve_16x4_load
|
||||
dgemm_16x4
|
||||
b .L_dsolve_16x4
|
||||
.L_dsolve_16x4_load:
|
||||
// Load C
|
||||
GLD xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60
|
||||
GLD xv, , U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60
|
||||
GLD xv, , U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60
|
||||
GLD xv, , U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60
|
||||
/********************** solver ******************/
|
||||
.L_dsolve_16x4:
|
||||
dsolve_16x4
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_8x4
|
||||
bge ZERO, L, .L_dsolve_8x4_load
|
||||
dgemm_8x4
|
||||
b .L_dsolve_8x4
|
||||
.L_dsolve_8x4_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
|
||||
/* Load C1 */
|
||||
xvld U2, C1, 0x00
|
||||
xvld U3, C1, 0x20
|
||||
|
||||
/* Load C2 */
|
||||
xvld U4, C2, 0x00
|
||||
xvld U5, C2, 0x20
|
||||
|
||||
/* Load C3 */
|
||||
xvld U6, C3, 0x00
|
||||
xvld U7, C3, 0x20
|
||||
/********* solver *********/
|
||||
.L_dsolve_8x4:
|
||||
dsolve_8x4
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_4x4
|
||||
bge ZERO, L, .L_dsolve_4x4_load
|
||||
dgemm_4x4
|
||||
b .L_dsolve_4x4
|
||||
.L_dsolve_4x4_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
/* Load C1 */
|
||||
xvld U1, C1, 0x00
|
||||
/* Load C2 */
|
||||
xvld U2, C2, 0x00
|
||||
/* Load C3 */
|
||||
xvld U3, C3, 0x00
|
||||
/************** solver *****************/
|
||||
.L_dsolve_4x4:
|
||||
dsolve_4x4
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_2x4
|
||||
bge ZERO, L, .L_dsolve_2x4_load
|
||||
dgemm_2x4
|
||||
xvpermi.q U2, U0, 0x01
|
||||
xvpermi.q U3, U1, 0x01
|
||||
b .L_dsolve_2x4
|
||||
.L_dsolve_2x4_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
/* Load C1 */
|
||||
xvld U1, C1, 0x00
|
||||
/* Load C2 */
|
||||
xvld U2, C2, 0x00
|
||||
/* Load C3 */
|
||||
xvld U3, C3, 0x00
|
||||
/********************** solver ******************/
|
||||
.L_dsolve_2x4:
|
||||
dsolve_2x4
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_1x4
|
||||
bge ZERO, L, .L_dsolve_1x4_load
|
||||
dgemm_1x4
|
||||
xvpackod.d U1, U0, U0
|
||||
xvpermi.q U2, U0, 0x01
|
||||
xvpermi.q U3, U1, 0x01
|
||||
b .L_dsolve_1x4
|
||||
.L_dsolve_1x4_load:
|
||||
// Load C
|
||||
fld.d $f0, C0, 0x00
|
||||
fld.d $f1, C1, 0x00
|
||||
fld.d $f2, C2, 0x00
|
||||
fld.d $f3, C3, 0x00
|
||||
.L_dsolve_1x4:
|
||||
dsolve_1x4
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_16x2
|
||||
bge ZERO, L, .L_dsolve_16x2_load
|
||||
dgemm_16x2
|
||||
b .L_dsolve_16x2
|
||||
.L_dsolve_16x2_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
xvld U2, C0, 0x40
|
||||
xvld U3, C0, 0x60
|
||||
/* Load C1 */
|
||||
xvld U4, C1, 0x00
|
||||
xvld U5, C1, 0x20
|
||||
xvld U6, C1, 0x40
|
||||
xvld U7, C1, 0x60
|
||||
.L_dsolve_16x2:
|
||||
dsolve_16x2
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_8x2
|
||||
bge ZERO, L, .L_dsolve_8x2_load
|
||||
dgemm_8x2
|
||||
b .L_dsolve_8x2
|
||||
.L_dsolve_8x2_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
/* Load C1 */
|
||||
xvld U2, C1, 0x00
|
||||
xvld U3, C1, 0x20
|
||||
.L_dsolve_8x2:
|
||||
dsolve_8x2
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_4x2
|
||||
bge ZERO, L, .L_dsolve_4x2_load
|
||||
dgemm_4x2
|
||||
b .L_dsolve_4x2
|
||||
.L_dsolve_4x2_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
/* Load C1 */
|
||||
xvld U1, C1, 0x00
|
||||
.L_dsolve_4x2:
|
||||
dsolve_4x2
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_2x2
|
||||
bge ZERO, L, .L_dsolve_2x2_load
|
||||
dgemm_2x2
|
||||
b .L_dsolve_2x2
|
||||
.L_dsolve_2x2_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
/* Load C1 */
|
||||
xvld U1, C1, 0x00
|
||||
.L_dsolve_2x2:
|
||||
dsolve_2x2
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_1x2
|
||||
bge ZERO, L, .L_dsolve_1x2_load
|
||||
dgemm_1x2
|
||||
xvpackod.d U1, U0, U0
|
||||
b .L_dsolve_1x2
|
||||
.L_dsolve_1x2_load:
|
||||
// Load C
|
||||
fld.d $f0, C0, 0x00
|
||||
fld.d $f1, C1, 0x00
|
||||
.L_dsolve_1x2:
|
||||
dsolve_1x2
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_16x1
|
||||
bge ZERO, L, .L_dsolve_16x1_load
|
||||
dgemm_16x1
|
||||
b .L_dsolve_16x1
|
||||
.L_dsolve_16x1_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
xvld U2, C0, 0x40
|
||||
xvld U3, C0, 0x60
|
||||
.L_dsolve_16x1:
|
||||
ldrepl_macro 16, 16, 0
|
||||
GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3
|
||||
// Store A
|
||||
A_st_macro 0, 3, 0, 4
|
||||
// Strore C
|
||||
GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_8x1
|
||||
bge ZERO, L, .L_dsolve_8x1_load
|
||||
dgemm_8x1
|
||||
b .L_dsolve_8x1
|
||||
.L_dsolve_8x1_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
.L_dsolve_8x1:
|
||||
ldrepl_macro 16, 16, 0
|
||||
GMUL xvf, d, U0, D0, U0, U1, D0, U1
|
||||
// Store A
|
||||
A_st_macro 0, 1, 0, 4
|
||||
// Strore C
|
||||
GST xv, , U0, C0, 0x00, U1, C0, 0x20
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_4x1
|
||||
bge ZERO, L, .L_dsolve_4x1_load
|
||||
dgemm_4x1
|
||||
b .L_dsolve_4x1
|
||||
.L_dsolve_4x1_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
.L_dsolve_4x1:
|
||||
ldrepl_macro 16, 16, 0
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
// Store A
|
||||
A_st_macro 0, 0, 0, 4
|
||||
// Strore C
|
||||
GST xv, , U0, C0, 0x00
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_2x1
|
||||
bge ZERO, L, .L_dsolve_2x1_load
|
||||
dgemm_2x1
|
||||
b .L_dsolve_2x1
|
||||
.L_dsolve_2x1_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
.L_dsolve_2x1:
|
||||
ldrepl_macro 16, 16, 0
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
// Store A
|
||||
A_st_macro 0, 0, 0, 2
|
||||
// Strore C
|
||||
GST v, , $vr0, C0, 0x00
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_1x1
|
||||
bge ZERO, L, .L_dsolve_1x1_load
|
||||
dgemm_1x1
|
||||
b .L_dsolve_1x1
|
||||
.L_dsolve_1x1_load:
|
||||
// Load C
|
||||
fld.d $f0, C0, 0x00
|
||||
.L_dsolve_1x1:
|
||||
ldrepl_macro 16, 16, 0
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
// Store A
|
||||
A_st_macro 0, 0, 0, 1
|
||||
// Strore C
|
||||
GST f, d, $f0, C0, 0x00
|
||||
.endm
|
||||
|
||||
PROLOGUE
|
||||
push_if_used 26, 32
|
||||
PTR_SLLI LDC, LDC, 3
|
||||
PTR_SUB KK, ZERO, OFFSET
|
||||
/* if (!(N >> 2)) goto L_N3 */
|
||||
PTR_SRAI J, N, 2 /* J = bn >> 2 */
|
||||
andi N, N, 0x03
|
||||
beq ZERO, J, .L_N3
|
||||
.align 5
|
||||
.L_J1:
|
||||
PTR_ADDI J, J, -1
|
||||
move AA, A
|
||||
move CC, C
|
||||
PTR_SRAI I, M, 4 // M >> 4
|
||||
beqz I, .L_M15
|
||||
.align 4
|
||||
.L_I1:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_16x4
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_SLLI T0, K, 7
|
||||
PTR_ADDI CC, CC, 0x80 // cc += 16
|
||||
PTR_ADD AA, AA, T0 // aa += 16 * k
|
||||
bnez I, .L_I1
|
||||
.L_M15:
|
||||
andi I, M, 8
|
||||
beqz I, .L_M7
|
||||
.L_M8:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_8x4
|
||||
PTR_SLLI T0, K, 6
|
||||
PTR_ADDI CC, CC, 0x40 // cc += 8
|
||||
PTR_ADD AA, AA, T0 // aa += 8 * k
|
||||
.L_M7:
|
||||
andi I, M, 4
|
||||
beqz I, .L_M3
|
||||
.L_M4:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_4x4
|
||||
PTR_SLLI T0, K, 5
|
||||
PTR_ADDI CC, CC, 0x20 // cc += 4
|
||||
PTR_ADD AA, AA, T0 // aa += 4 * k
|
||||
.L_M3:
|
||||
andi I, M, 2
|
||||
beqz I, .L_M1
|
||||
.L_M2:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_2x4
|
||||
PTR_SLLI T0, K, 4
|
||||
PTR_ADDI CC, CC, 0x10 // cc += 2
|
||||
PTR_ADD AA, AA, T0 // aa += 2 * k
|
||||
.L_M1:
|
||||
andi I, M, 1
|
||||
beqz I, .L_M0
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_1x4
|
||||
PTR_SLLI T0, K, 3
|
||||
PTR_ADDI CC, CC, 0x08 // cc += 1
|
||||
PTR_ADD AA, AA, T0 // aa += 1 * k
|
||||
.L_M0:
|
||||
PTR_SLLI T0, K, 5
|
||||
PTR_SLLI T1, LDC, 2
|
||||
PTR_ADD B, B, T0 // b += 4 * k
|
||||
PTR_ADD C, C, T1 // c += 4 * ldc
|
||||
PTR_ADDI KK, KK, 4 // kk += 4
|
||||
bnez J, .L_J1
|
||||
.L_N3:
|
||||
andi J, N, 2
|
||||
beq ZERO, J, .L_N1
|
||||
.L_N2:
|
||||
move AA, A
|
||||
move CC, C
|
||||
PTR_SRAI I, M, 4 // M >> 4
|
||||
beqz I, .L_N2_M15
|
||||
.align 4
|
||||
.L_N2_I1:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_16x2
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_SLLI T0, K, 7
|
||||
PTR_ADDI CC, CC, 0x80 // cc += 16
|
||||
PTR_ADD AA, AA, T0 // aa += 16 * k
|
||||
bnez I, .L_N2_I1
|
||||
.L_N2_M15:
|
||||
andi I, M, 8
|
||||
beqz I, .L_N2_M7
|
||||
.L_N2_M8:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_8x2
|
||||
PTR_SLLI T0, K, 6
|
||||
PTR_ADDI CC, CC, 0x40 // cc += 8
|
||||
PTR_ADD AA, AA, T0 // aa += 8 * k
|
||||
.L_N2_M7:
|
||||
andi I, M, 4
|
||||
beqz I, .L_N2_M3
|
||||
.L_N2_M4:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_4x2
|
||||
PTR_SLLI T0, K, 5
|
||||
PTR_ADDI CC, CC, 0x20 // cc += 4
|
||||
PTR_ADD AA, AA, T0 // aa += 4 * k
|
||||
.L_N2_M3:
|
||||
andi I, M, 2
|
||||
beqz I, .L_N2_M1
|
||||
.L_N2_M2:
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_2x2
|
||||
PTR_SLLI T0, K, 4
|
||||
PTR_ADDI CC, CC, 0x10 // cc += 2
|
||||
PTR_ADD AA, AA, T0 // aa += 2 * k
|
||||
.L_N2_M1:
|
||||
andi I, M, 1
|
||||
beqz I, .L_N2_M0
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_1x2
|
||||
PTR_SLLI T0, K, 3
|
||||
PTR_ADDI CC, CC, 0x08 // cc += 1
|
||||
PTR_ADD AA, AA, T0 // aa += 1 * k
|
||||
.L_N2_M0:
|
||||
PTR_SLLI T0, K, 4
|
||||
PTR_SLLI T1, LDC, 1
|
||||
PTR_ADD B, B, T0 // b += 2 * k
|
||||
PTR_ADD C, C, T1 // c += 2 * ldc
|
||||
PTR_ADDI KK, KK, 2 // kk += 2
|
||||
.L_N1:
|
||||
andi J, N, 1
|
||||
beq ZERO, J, .L_N0
|
||||
move AA, A
|
||||
move CC, C
|
||||
PTR_SRAI I, M, 4 // M >> 4
|
||||
beqz I, .L_N1_M15
|
||||
.align 4
|
||||
.L_N1_I1:
|
||||
GADD , d, C0, CC, ZERO
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_16x1
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_SLLI T0, K, 7
|
||||
PTR_ADDI CC, CC, 0x80 // cc += 16
|
||||
PTR_ADD AA, AA, T0 // aa += 16 * k
|
||||
bnez I, .L_N1_I1
|
||||
.L_N1_M15:
|
||||
andi I, M, 8
|
||||
beqz I, .L_N1_M7
|
||||
.L_N1_M8:
|
||||
GADD , d, C0, CC, ZERO
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_8x1
|
||||
PTR_SLLI T0, K, 6
|
||||
PTR_ADDI CC, CC, 0x40 // cc += 8
|
||||
PTR_ADD AA, AA, T0 // aa += 8 * k
|
||||
.L_N1_M7:
|
||||
andi I, M, 4
|
||||
beqz I, .L_N1_M3
|
||||
.L_N1_M4:
|
||||
GADD , d, C0, CC, ZERO
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_4x1
|
||||
PTR_SLLI T0, K, 5
|
||||
PTR_ADDI CC, CC, 0x20 // cc += 4
|
||||
PTR_ADD AA, AA, T0 // aa += 4 * k
|
||||
.L_N1_M3:
|
||||
andi I, M, 2
|
||||
beqz I, .L_N1_M1
|
||||
.L_N1_M2:
|
||||
GADD , d, C0, CC, ZERO
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_2x1
|
||||
PTR_SLLI T0, K, 4
|
||||
PTR_ADDI CC, CC, 0x10 // cc += 2
|
||||
PTR_ADD AA, AA, T0 // aa += 2 * k
|
||||
.L_N1_M1:
|
||||
andi I, M, 1
|
||||
beqz I, .L_N1_M0
|
||||
GADD , d, C0, CC, ZERO
|
||||
move A0, AA
|
||||
move B0, B
|
||||
move L, KK
|
||||
dgemm_dsolve_1x1
|
||||
PTR_SLLI T0, K, 3
|
||||
PTR_ADDI CC, CC, 0x08 // cc += 1
|
||||
PTR_ADD AA, AA, T0 // aa += 1 * k
|
||||
.L_N1_M0:
|
||||
.L_N0:
|
||||
pop_if_used 26, 32
|
||||
jirl $r0, $r1, 0x0
|
||||
EPILOGUE
|
||||
|
|
@ -0,0 +1,953 @@
|
|||
/*******************************************************************************
|
||||
Copyright (c) 2023, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*******************************************************************************/
|
||||
#define ASSEMBLER
|
||||
|
||||
#include "common.h"
|
||||
#include "loongarch64_asm.S"
|
||||
|
||||
/*********************************************************************
|
||||
* 2023/09/26 guxiwei
|
||||
* UTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
*
|
||||
*********************************************************************/
|
||||
|
||||
/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, FLOAT *a, FLOAT *b,
|
||||
* FLOAT *c, BLASLONG ldc, BLASLONG offset)
|
||||
*/
|
||||
#define M $r4 // param 1: bm
|
||||
#define N $r5 // param 2: bn
|
||||
#define K $r6 // param 3: bk
|
||||
#define A $r7 // param 5: ba
|
||||
#define B $r8 // param 6: bb
|
||||
#define C $r9 // param 7: bc
|
||||
#define LDC $r10 // param 8: ldc
|
||||
#define OFFSET $r11 // param 9: offset
|
||||
|
||||
/* Cycle control parameters */
|
||||
#define I $r13
|
||||
#define J $r14
|
||||
#define L $r15
|
||||
#define TL $r16
|
||||
/* Matrix address */
|
||||
#define A0 $r17
|
||||
#define B0 $r18
|
||||
#define C0 $r19
|
||||
#define C1 $r20
|
||||
#define C2 $r23
|
||||
#define C3 $r24
|
||||
#define T0 $r25
|
||||
#define T1 $r26
|
||||
#define T2 $r27
|
||||
#define KK $r28
|
||||
#define AA $r29
|
||||
#define CC $r30
|
||||
#define BB $r31
|
||||
#undef ZERO
|
||||
#define ZERO $r0
|
||||
|
||||
#define U0 $xr0
|
||||
#define U1 $xr1
|
||||
#define U2 $xr2
|
||||
#define U3 $xr3
|
||||
#define U4 $xr4
|
||||
#define U5 $xr5
|
||||
#define U6 $xr6
|
||||
#define U7 $xr7
|
||||
#define U8 $xr8
|
||||
#define U9 $xr9
|
||||
#define U10 $xr10
|
||||
#define U11 $xr11
|
||||
#define U12 $xr12
|
||||
#define U13 $xr13
|
||||
#define U14 $xr14
|
||||
#define U15 $xr15
|
||||
#define D0 $xr16
|
||||
#define D1 $xr17
|
||||
#define D2 $xr18
|
||||
#define D3 $xr19
|
||||
#define D4 $xr20
|
||||
#define D5 $xr21
|
||||
#define D6 $xr22
|
||||
#define D7 $xr23
|
||||
#define D8 $xr24
|
||||
#define D9 $xr25
|
||||
#define D10 $xr26
|
||||
#define D11 $xr27
|
||||
#define D12 $xr28
|
||||
#define D13 $xr29
|
||||
#define D14 $xr30
|
||||
#define D15 $xr31
|
||||
|
||||
/* Prefetch interval */
|
||||
#define A_PRE 0x400
|
||||
#define B_PRE 0x100
|
||||
|
||||
#include "dtrsm_kernel_macro.S"
|
||||
|
||||
.macro ldrepl_macro start, end, stride
|
||||
// Load Ux (x = 0...15)
|
||||
.if \start <= \end
|
||||
GLDREPL xv, d, $xr\start, B0, \stride * 8
|
||||
ldrepl_macro %start + 1, \end, %stride + 1
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro nmsub_macro start0, end0, start1, reg
|
||||
// Ux -= reg * Dx
|
||||
.if \start0 <= \end0
|
||||
xvfnmsub.d $xr\start0, \reg, $xr\start1, $xr\start0
|
||||
nmsub_macro %start0 + 1, \end0, %start1 + 1, \reg
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro A_st_macro start, end, stride, N
|
||||
// Store Ux(x = 0...15)
|
||||
.if \start <= \end
|
||||
.if \N == 4
|
||||
xvst $xr\start, A0, \stride * 0x20
|
||||
.elseif \N == 2
|
||||
vst $vr\start, A0, \stride * 0x10
|
||||
.elseif \N == 1
|
||||
fst.d $f\start, A0, \stride * 0x08
|
||||
.endif
|
||||
A_st_macro %start + 1, \end, %stride + 1, \N
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro dsolve_16x2
|
||||
// We are going to process matrix B with a size of 2x2,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0
|
||||
//2 3
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 16, 0
|
||||
ldrepl_macro 17, 18, 2
|
||||
GMUL xvf, d, U4, D2, U4, U5, D2, U5, U6, D2, U6, U7, D2, U7
|
||||
nmsub_macro 0, 3, 4, D1
|
||||
GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3
|
||||
// Store A
|
||||
A_st_macro 0, 7, 0, 4
|
||||
// Store C
|
||||
GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \
|
||||
U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60
|
||||
.endm
|
||||
|
||||
.macro dsolve_8x2
|
||||
// We are going to process matrix B with a size of 2x2,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0
|
||||
//2 3
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 16, 0
|
||||
ldrepl_macro 17, 18, 2
|
||||
GMUL xvf, d, U2, D2, U2, U3, D2, U3
|
||||
nmsub_macro 0, 1, 2, D1
|
||||
GMUL xvf, d, U0, D0, U0, U1, D0, U1
|
||||
// Store A
|
||||
A_st_macro 0, 3, 0, 4
|
||||
// Store C
|
||||
GST xv, , U0, C0, 0x00, U1, C0, 0x20, \
|
||||
U2, C1, 0x00, U3, C1, 0x20
|
||||
.endm
|
||||
|
||||
.macro dsolve_4x2
|
||||
// We are going to process matrix B with a size of 2x2,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0
|
||||
//2 3
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 16, 0
|
||||
ldrepl_macro 17, 18, 2
|
||||
GMUL xvf, d, U1, D2, U1
|
||||
nmsub_macro 0, 0, 1, D1
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
// Store A
|
||||
A_st_macro 0, 1, 0, 4
|
||||
// Store C
|
||||
GST xv, , U0, C0, 0x00, U1, C1, 0x00
|
||||
.endm
|
||||
|
||||
.macro dsolve_2x2
|
||||
// We are going to process matrix B with a size of 2x2,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0
|
||||
//2 3
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 16, 0
|
||||
ldrepl_macro 17, 18, 2
|
||||
GMUL xvf, d, U1, D2, U1
|
||||
nmsub_macro 0, 0, 1, D1
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
// Store A
|
||||
A_st_macro 0, 1, 0, 2
|
||||
// Store C
|
||||
GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00
|
||||
.endm
|
||||
|
||||
.macro dsolve_1x2
|
||||
// We are going to process matrix B with a size of 2x2,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0
|
||||
//2 3
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 16, 16, 0
|
||||
ldrepl_macro 17, 18, 2
|
||||
GMUL xvf, d, U1, D2, U1
|
||||
nmsub_macro 0, 0, 1, D1
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
// Store A
|
||||
A_st_macro 0, 1, 0, 1
|
||||
// Store C
|
||||
GST f, d, $f0, C0, 0x00, $f1, C1, 0x00
|
||||
.endm
|
||||
|
||||
.macro dsolve_16x4
|
||||
// We are going to process matrix B with a size of 4x4,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0
|
||||
//4 5
|
||||
//8 9 10
|
||||
//12 13 14 15
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 22, 25, 12
|
||||
GMUL xvf, d, U12, D9, U12, U13, D9, U13, U14, D9, U14, U15, D9, U15
|
||||
ldrepl_macro 19, 21, 8
|
||||
nmsub_macro 8, 11, 12, D8
|
||||
ldrepl_macro 17, 18, 4
|
||||
GMUL xvf, d, U8, D5, U8, U9, D5, U9, U10, D5, U10, U11, D5, U11
|
||||
ldrepl_macro 16, 16, 0
|
||||
nmsub_macro 4, 7, 12, D7
|
||||
nmsub_macro 4, 7, 8, D4
|
||||
GMUL xvf, d, U4, D2, U4, U5, D2, U5, U6, D2, U6, U7, D2, U7
|
||||
nmsub_macro 0, 3, 12, D6
|
||||
nmsub_macro 0, 3, 8, D3
|
||||
nmsub_macro 0, 3, 4, D1
|
||||
GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3
|
||||
// Store A
|
||||
A_st_macro 0, 15, 0, 4
|
||||
// Store C
|
||||
GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \
|
||||
U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60, \
|
||||
U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60, \
|
||||
U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60
|
||||
.endm
|
||||
|
||||
.macro dsolve_8x4
|
||||
// We are going to process matrix B with a size of 4x4,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0
|
||||
//4 5
|
||||
//8 9 10
|
||||
//12 13 14 15
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 22, 25, 12
|
||||
GMUL xvf, d, U6, D9, U6, U7, D9, U7
|
||||
ldrepl_macro 19, 21, 8
|
||||
nmsub_macro 4, 5, 6, D8
|
||||
ldrepl_macro 17, 18, 4
|
||||
GMUL xvf, d, U4, D5, U4, U5, D5, U5
|
||||
ldrepl_macro 16, 16, 0
|
||||
nmsub_macro 2, 3, 6, D7
|
||||
nmsub_macro 2, 3, 4, D4
|
||||
GMUL xvf, d, U2, D2, U2, U3, D2, U3
|
||||
nmsub_macro 0, 1, 6, D6
|
||||
nmsub_macro 0, 1, 4, D3
|
||||
nmsub_macro 0, 1, 2, D1
|
||||
GMUL xvf, d, U0, D0, U0, U1, D0, U1
|
||||
// Store A
|
||||
A_st_macro 0, 7, 0, 4
|
||||
// Store C
|
||||
GST xv, , U0, C0, 0x00, U1, C0, 0x20, \
|
||||
U2, C1, 0x00, U3, C1, 0x20, \
|
||||
U4, C2, 0x00, U5, C2, 0x20, \
|
||||
U6, C3, 0x00, U7, C3, 0x20
|
||||
.endm
|
||||
|
||||
.macro dsolve_4x4
|
||||
// We are going to process matrix B with a size of 4x4,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0
|
||||
//4 5
|
||||
//8 9 10
|
||||
//12 13 14 15
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 22, 25, 12
|
||||
GMUL xvf, d, U3, D9, U3
|
||||
ldrepl_macro 19, 21, 8
|
||||
nmsub_macro 2, 2, 3, D8
|
||||
ldrepl_macro 17, 18, 4
|
||||
GMUL xvf, d, U2, D5, U2
|
||||
ldrepl_macro 16, 16, 0
|
||||
nmsub_macro 1, 1, 3, D7
|
||||
nmsub_macro 1, 1, 2, D4
|
||||
GMUL xvf, d, U1, D2, U1
|
||||
nmsub_macro 0, 0, 3, D6
|
||||
nmsub_macro 0, 0, 2, D3
|
||||
nmsub_macro 0, 0, 1, D1
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
// Store A
|
||||
A_st_macro 0, 3, 0, 4
|
||||
// Store C
|
||||
GST xv, , U0, C0, 0x00, U1, C1, 0x00, U2, C2, 0x00, U3, C3, 0x00
|
||||
.endm
|
||||
|
||||
.macro dsolve_2x4
|
||||
// We are going to process matrix B with a size of 4x4,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0
|
||||
//4 5
|
||||
//8 9 10
|
||||
//12 13 14 15
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 22, 25, 12
|
||||
GMUL xvf, d, U3, D9, U3
|
||||
ldrepl_macro 19, 21, 8
|
||||
nmsub_macro 2, 2, 3, D8
|
||||
ldrepl_macro 17, 18, 4
|
||||
GMUL xvf, d, U2, D5, U2
|
||||
ldrepl_macro 16, 16, 0
|
||||
nmsub_macro 1, 1, 3, D7
|
||||
nmsub_macro 1, 1, 2, D4
|
||||
GMUL xvf, d, U1, D2, U1
|
||||
nmsub_macro 0, 0, 3, D6
|
||||
nmsub_macro 0, 0, 2, D3
|
||||
nmsub_macro 0, 0, 1, D1
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
// Store A
|
||||
A_st_macro 0, 3, 0, 2
|
||||
// Store C
|
||||
GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00, $vr2, C2, 0x00, $vr3, C3, 0x00
|
||||
.endm
|
||||
|
||||
.macro dsolve_1x4
|
||||
// We are going to process matrix B with a size of 4x4,
|
||||
// using only the upper triangular portion. The memory layout of
|
||||
// matrix B is as follows:
|
||||
//0
|
||||
//4 5
|
||||
//8 9 10
|
||||
//12 13 14 15
|
||||
// Sequentially extract data from B in row order
|
||||
ldrepl_macro 22, 25, 12
|
||||
GMUL xvf, d, U3, D9, U3
|
||||
ldrepl_macro 19, 21, 8
|
||||
nmsub_macro 2, 2, 3, D8
|
||||
ldrepl_macro 17, 18, 4
|
||||
GMUL xvf, d, U2, D5, U2
|
||||
ldrepl_macro 16, 16, 0
|
||||
nmsub_macro 1, 1, 3, D7
|
||||
nmsub_macro 1, 1, 2, D4
|
||||
GMUL xvf, d, U1, D2, U1
|
||||
nmsub_macro 0, 0, 3, D6
|
||||
nmsub_macro 0, 0, 2, D3
|
||||
nmsub_macro 0, 0, 1, D1
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
// Store A
|
||||
A_st_macro 0, 3, 0, 1
|
||||
// Store C
|
||||
GST f, d, $f0, C0, 0x00, $f1, C1, 0x00, $f2, C2, 0x00, $f3, C3, 0x00,
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_16x1
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_16x1_load
|
||||
dgemm_16x1
|
||||
b .L_dsolve_16x1
|
||||
.L_dsolve_16x1_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
xvld U2, C0, 0x40
|
||||
xvld U3, C0, 0x60
|
||||
.L_dsolve_16x1:
|
||||
PTR_ADDI A0, T1, -16 * 8
|
||||
PTR_ADDI B0, T2, -1 * 8
|
||||
ldrepl_macro 16, 16, 0
|
||||
GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3
|
||||
// Store A
|
||||
A_st_macro 0, 3, 0, 4
|
||||
// Strore C
|
||||
GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_8x1
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_8x1_load
|
||||
dgemm_8x1
|
||||
b .L_dsolve_8x1
|
||||
.L_dsolve_8x1_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
.L_dsolve_8x1:
|
||||
PTR_ADDI A0, T1, -8 * 8
|
||||
PTR_ADDI B0, T2, -1 * 8
|
||||
ldrepl_macro 16, 16, 0
|
||||
GMUL xvf, d, U0, D0, U0, U1, D0, U1
|
||||
// Store A
|
||||
A_st_macro 0, 1, 0, 4
|
||||
// Strore C
|
||||
GST xv, , U0, C0, 0x00, U1, C0, 0x20
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_4x1
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_4x1_load
|
||||
dgemm_4x1
|
||||
b .L_dsolve_4x1
|
||||
.L_dsolve_4x1_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
.L_dsolve_4x1:
|
||||
PTR_ADDI A0, T1, -4 * 8
|
||||
PTR_ADDI B0, T2, -1 * 8
|
||||
ldrepl_macro 16, 16, 0
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
// Store A
|
||||
A_st_macro 0, 0, 0, 4
|
||||
// Strore C
|
||||
GST xv, , U0, C0, 0x00
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_2x1
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_2x1_load
|
||||
dgemm_2x1
|
||||
b .L_dsolve_2x1
|
||||
.L_dsolve_2x1_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
.L_dsolve_2x1:
|
||||
PTR_ADDI A0, T1, -2 * 8
|
||||
PTR_ADDI B0, T2, -1 * 8
|
||||
ldrepl_macro 16, 16, 0
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
// Store A
|
||||
A_st_macro 0, 0, 0, 2
|
||||
// Strore C
|
||||
GST v, , $vr0, C0, 0x00
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_1x1
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_1x1_load
|
||||
dgemm_1x1
|
||||
b .L_dsolve_1x1
|
||||
.L_dsolve_1x1_load:
|
||||
// Load C
|
||||
fld.d $f0, C0, 0x00
|
||||
.L_dsolve_1x1:
|
||||
PTR_ADDI A0, T1, -1 * 8
|
||||
PTR_ADDI B0, T2, -1 * 8
|
||||
ldrepl_macro 16, 16, 0
|
||||
GMUL xvf, d, U0, D0, U0
|
||||
// Store A
|
||||
A_st_macro 0, 0, 0, 1
|
||||
// Strore C
|
||||
GST f, d, $f0, C0, 0x00
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_16x2
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_16x2_load
|
||||
dgemm_16x2
|
||||
b .L_dsolve_16x2
|
||||
.L_dsolve_16x2_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
xvld U2, C0, 0x40
|
||||
xvld U3, C0, 0x60
|
||||
/* Load C1 */
|
||||
xvld U4, C1, 0x00
|
||||
xvld U5, C1, 0x20
|
||||
xvld U6, C1, 0x40
|
||||
xvld U7, C1, 0x60
|
||||
.L_dsolve_16x2:
|
||||
PTR_ADDI A0, T1, -(16 * 2) * 8
|
||||
PTR_ADDI B0, T2, -(2 * 2) * 8
|
||||
dsolve_16x2
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_8x2
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_8x2_load
|
||||
dgemm_8x2
|
||||
b .L_dsolve_8x2
|
||||
.L_dsolve_8x2_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
/* Load C1 */
|
||||
xvld U2, C1, 0x00
|
||||
xvld U3, C1, 0x20
|
||||
.L_dsolve_8x2:
|
||||
PTR_ADDI A0, T1, -(8 * 2) * 8
|
||||
PTR_ADDI B0, T2, -(2 * 2) * 8
|
||||
dsolve_8x2
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_4x2
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_4x2_load
|
||||
dgemm_4x2
|
||||
b .L_dsolve_4x2
|
||||
.L_dsolve_4x2_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
/* Load C1 */
|
||||
xvld U1, C1, 0x00
|
||||
.L_dsolve_4x2:
|
||||
PTR_ADDI A0, T1, -(4 * 2) * 8
|
||||
PTR_ADDI B0, T2, -(2 * 2) * 8
|
||||
dsolve_4x2
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_2x2
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_2x2_load
|
||||
dgemm_2x2
|
||||
b .L_dsolve_2x2
|
||||
.L_dsolve_2x2_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
/* Load C1 */
|
||||
xvld U1, C1, 0x00
|
||||
.L_dsolve_2x2:
|
||||
PTR_ADDI A0, T1, -(2 * 2) * 8
|
||||
PTR_ADDI B0, T2, -(2 * 2) * 8
|
||||
dsolve_2x2
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_1x2
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_1x2_load
|
||||
dgemm_1x2
|
||||
xvpackod.d U1, U0, U0
|
||||
b .L_dsolve_1x2
|
||||
.L_dsolve_1x2_load:
|
||||
// Load C
|
||||
fld.d $f0, C0, 0x00
|
||||
fld.d $f1, C1, 0x00
|
||||
.L_dsolve_1x2:
|
||||
PTR_ADDI A0, T1, -(1 * 2) * 8
|
||||
PTR_ADDI B0, T2, -(2 * 2) * 8
|
||||
dsolve_1x2
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_16x4
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_16x4_load
|
||||
dgemm_16x4
|
||||
b .L_dsolve_16x4
|
||||
.L_dsolve_16x4_load:
|
||||
// Load C
|
||||
GLD xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60
|
||||
GLD xv, , U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60
|
||||
GLD xv, , U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60
|
||||
GLD xv, , U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60
|
||||
/********************** solver ******************/
|
||||
.L_dsolve_16x4:
|
||||
PTR_ADDI A0, T1, -(16 * 4) * 8
|
||||
PTR_ADDI B0, T2, -(4 * 4) * 8
|
||||
dsolve_16x4
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_8x4
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_8x4_load
|
||||
dgemm_8x4
|
||||
b .L_dsolve_8x4
|
||||
.L_dsolve_8x4_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
xvld U1, C0, 0x20
|
||||
|
||||
/* Load C1 */
|
||||
xvld U2, C1, 0x00
|
||||
xvld U3, C1, 0x20
|
||||
|
||||
/* Load C2 */
|
||||
xvld U4, C2, 0x00
|
||||
xvld U5, C2, 0x20
|
||||
|
||||
/* Load C3 */
|
||||
xvld U6, C3, 0x00
|
||||
xvld U7, C3, 0x20
|
||||
/********* solver *********/
|
||||
.L_dsolve_8x4:
|
||||
PTR_ADDI A0, T1, -(8 * 4) * 8
|
||||
PTR_ADDI B0, T2, -(4 * 4) * 8
|
||||
dsolve_8x4
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_4x4
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_4x4_load
|
||||
dgemm_4x4
|
||||
b .L_dsolve_4x4
|
||||
.L_dsolve_4x4_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
/* Load C1 */
|
||||
xvld U1, C1, 0x00
|
||||
/* Load C2 */
|
||||
xvld U2, C2, 0x00
|
||||
/* Load C3 */
|
||||
xvld U3, C3, 0x00
|
||||
/************** solver *****************/
|
||||
.L_dsolve_4x4:
|
||||
PTR_ADDI A0, T1, -(4 * 4) * 8
|
||||
PTR_ADDI B0, T2, -(4 * 4) * 8
|
||||
dsolve_4x4
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_2x4
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_2x4_load
|
||||
dgemm_2x4
|
||||
xvpermi.q U2, U0, 0x01
|
||||
xvpermi.q U3, U1, 0x01
|
||||
b .L_dsolve_2x4
|
||||
.L_dsolve_2x4_load:
|
||||
/* Load C0 */
|
||||
xvld U0, C0, 0x00
|
||||
/* Load C1 */
|
||||
xvld U1, C1, 0x00
|
||||
/* Load C2 */
|
||||
xvld U2, C2, 0x00
|
||||
/* Load C3 */
|
||||
xvld U3, C3, 0x00
|
||||
/********************** solver ******************/
|
||||
.L_dsolve_2x4:
|
||||
PTR_ADDI A0, T1, -(2 * 4) * 8
|
||||
PTR_ADDI B0, T2, -(4 * 4) * 8
|
||||
dsolve_2x4
|
||||
.endm
|
||||
|
||||
.macro dgemm_dsolve_1x4
|
||||
or T1, A0, A0
|
||||
or T2, B0, B0
|
||||
bge ZERO, L, .L_dsolve_1x4_load
|
||||
dgemm_1x4
|
||||
xvpackod.d U1, U0, U0
|
||||
xvpermi.q U2, U0, 0x01
|
||||
xvpermi.q U3, U1, 0x01
|
||||
b .L_dsolve_1x4
|
||||
.L_dsolve_1x4_load:
|
||||
// Load C
|
||||
fld.d $f0, C0, 0x00
|
||||
fld.d $f1, C1, 0x00
|
||||
fld.d $f2, C2, 0x00
|
||||
fld.d $f3, C3, 0x00
|
||||
.L_dsolve_1x4:
|
||||
PTR_ADDI A0, T1, -(1 * 4) * 8
|
||||
PTR_ADDI B0, T2, -(4 * 4) * 8
|
||||
dsolve_1x4
|
||||
.endm
|
||||
|
||||
PROLOGUE
|
||||
push_if_used 26, 32
|
||||
PTR_SLLI LDC, LDC, 3
|
||||
PTR_SUB KK, N, OFFSET
|
||||
PTR_MUL T0, N, LDC
|
||||
PTR_MUL T1, N, K
|
||||
PTR_ADD C, C, T0 // c += n * ldc
|
||||
PTR_SLLI T1, T1, 3
|
||||
PTR_ADD B, B, T1
|
||||
|
||||
andi J, N, 1
|
||||
beqz J, .L_N2
|
||||
.L_N1:
|
||||
move AA, A
|
||||
PTR_SUB C, C, LDC // c -= ldc
|
||||
PTR_SLLI T0, K, 3
|
||||
PTR_SLLI T1, KK, 3
|
||||
PTR_SUB B, B, T0 // b -= k
|
||||
PTR_ADD BB, B, T1 // bb = b + kk
|
||||
move CC, C
|
||||
|
||||
PTR_SRAI I, M, 4 // M >> 4
|
||||
beqz I, .L_N1_M15
|
||||
.align 4
|
||||
.L_N1_I1:
|
||||
PTR_SLLI T1, KK, 7
|
||||
GADD , d, C0, CC, ZERO
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + 16 * kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_16x1
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_SLLI T0, K, 7
|
||||
PTR_ADDI CC, CC, 0x80 // cc += 16
|
||||
PTR_ADD AA, AA, T0 // aa += 16 * k
|
||||
bnez I, .L_N1_I1
|
||||
.L_N1_M15:
|
||||
andi I, M, 8
|
||||
beqz I, .L_N1_M7
|
||||
.L_N1_M8:
|
||||
PTR_SLLI T1, KK, 6
|
||||
GADD , d, C0, CC, ZERO
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + 8 * kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_8x1
|
||||
PTR_SLLI T0, K, 6
|
||||
PTR_ADDI CC, CC, 0x40 // cc += 8
|
||||
PTR_ADD AA, AA, T0 // aa += 8 * k
|
||||
.L_N1_M7:
|
||||
andi I, M, 4
|
||||
beqz I, .L_N1_M3
|
||||
.L_N1_M4:
|
||||
PTR_SLLI T1, KK, 5
|
||||
GADD , d, C0, CC, ZERO
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + 4 * kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_4x1
|
||||
PTR_SLLI T0, K, 5
|
||||
PTR_ADDI CC, CC, 0x20 // cc += 4
|
||||
PTR_ADD AA, AA, T0 // aa += 4 * k
|
||||
.L_N1_M3:
|
||||
andi I, M, 2
|
||||
beqz I, .L_N1_M1
|
||||
.L_N1_M2:
|
||||
PTR_SLLI T1, KK, 4
|
||||
GADD , d, C0, CC, ZERO
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + 2 * kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_2x1
|
||||
PTR_SLLI T0, K, 4
|
||||
PTR_ADDI CC, CC, 0x10 // cc += 2
|
||||
PTR_ADD AA, AA, T0 // aa += 2 * k
|
||||
.L_N1_M1:
|
||||
andi I, M, 1
|
||||
beqz I, .L_N1_M0
|
||||
PTR_SLLI T1, KK, 3
|
||||
GADD , d, C0, CC, ZERO
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_1x1
|
||||
PTR_SLLI T0, K, 3
|
||||
PTR_ADDI CC, CC, 0x08 // cc += 1
|
||||
PTR_ADD AA, AA, T0 // aa += 1 * k
|
||||
.L_N1_M0:
|
||||
PTR_ADDI KK, KK, -1
|
||||
.L_N2:
|
||||
andi J, N, 2
|
||||
beq ZERO, J, .L_N4
|
||||
move AA, A
|
||||
PTR_SLLI T0, LDC, 1
|
||||
PTR_SLLI T1, K, 4
|
||||
PTR_SLLI T2, KK, 4
|
||||
PTR_SUB B, B, T1
|
||||
PTR_SUB C, C, T0
|
||||
PTR_ADD BB, B, T2
|
||||
move CC, C
|
||||
PTR_SRAI I, M, 4 // M >> 4
|
||||
beqz I, .L_N2_M15
|
||||
.align 4
|
||||
.L_N2_I1:
|
||||
PTR_SLLI T1, KK, 7
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + 16 * kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_16x2
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_SLLI T0, K, 7
|
||||
PTR_ADDI CC, CC, 0x80 // cc += 16
|
||||
PTR_ADD AA, AA, T0 // aa += 16 * k
|
||||
bnez I, .L_N2_I1
|
||||
.L_N2_M15:
|
||||
andi I, M, 8
|
||||
beqz I, .L_N2_M7
|
||||
.L_N2_M8:
|
||||
PTR_SLLI T1, KK, 6
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + 8 * kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_8x2
|
||||
PTR_SLLI T0, K, 6
|
||||
PTR_ADDI CC, CC, 0x40 // cc += 8
|
||||
PTR_ADD AA, AA, T0 // aa += 8 * k
|
||||
.L_N2_M7:
|
||||
andi I, M, 4
|
||||
beqz I, .L_N2_M3
|
||||
.L_N2_M4:
|
||||
PTR_SLLI T1, KK, 5
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + 4 * kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_4x2
|
||||
PTR_SLLI T0, K, 5
|
||||
PTR_ADDI CC, CC, 0x20 // cc += 4
|
||||
PTR_ADD AA, AA, T0 // aa += 4 * k
|
||||
.L_N2_M3:
|
||||
andi I, M, 2
|
||||
beqz I, .L_N2_M1
|
||||
.L_N2_M2:
|
||||
PTR_SLLI T1, KK, 4
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + 2 * kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_2x2
|
||||
PTR_SLLI T0, K, 4
|
||||
PTR_ADDI CC, CC, 0x10 // cc += 2
|
||||
PTR_ADD AA, AA, T0 // aa += 2 * k
|
||||
.L_N2_M1:
|
||||
andi I, M, 1
|
||||
beqz I, .L_N2_M0
|
||||
PTR_SLLI T1, KK, 3
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_1x2
|
||||
PTR_SLLI T0, K, 3
|
||||
PTR_ADDI CC, CC, 0x08 // cc += 1
|
||||
PTR_ADD AA, AA, T0 // aa += 1 * k
|
||||
.L_N2_M0:
|
||||
PTR_ADDI KK, KK, -2
|
||||
.L_N4:
|
||||
PTR_SRAI J, N, 2 /* J = bn >> 2 */
|
||||
beq ZERO, J, .L_N0
|
||||
.align 5
|
||||
.L_J1:
|
||||
PTR_ADDI J, J, -1
|
||||
move AA, A
|
||||
PTR_SLLI T0, LDC, 2
|
||||
PTR_SLLI T1, K, 5
|
||||
PTR_SLLI T2, KK, 5
|
||||
PTR_SUB B, B, T1
|
||||
PTR_SUB C, C, T0
|
||||
PTR_ADD BB, B, T2
|
||||
move CC, C
|
||||
PTR_SRAI I, M, 4 // M >> 4
|
||||
beqz I, .L_M15
|
||||
.align 4
|
||||
.L_I1:
|
||||
PTR_SLLI T1, KK, 7
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + 16 * kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_16x4
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_SLLI T0, K, 7
|
||||
PTR_ADDI CC, CC, 0x80 // cc += 16
|
||||
PTR_ADD AA, AA, T0 // aa += 16 * k
|
||||
bnez I, .L_I1
|
||||
.L_M15:
|
||||
andi I, M, 8
|
||||
beqz I, .L_M7
|
||||
.L_M8:
|
||||
PTR_SLLI T1, KK, 6
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + 8 * kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_8x4
|
||||
PTR_SLLI T0, K, 6
|
||||
PTR_ADDI CC, CC, 0x40 // cc += 8
|
||||
PTR_ADD AA, AA, T0 // aa += 8 * k
|
||||
.L_M7:
|
||||
andi I, M, 4
|
||||
beqz I, .L_M3
|
||||
.L_M4:
|
||||
PTR_SLLI T1, KK, 5
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + 4 * kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_4x4
|
||||
PTR_SLLI T0, K, 5
|
||||
PTR_ADDI CC, CC, 0x20 // cc += 4
|
||||
PTR_ADD AA, AA, T0 // aa += 4 * k
|
||||
.L_M3:
|
||||
andi I, M, 2
|
||||
beqz I, .L_M1
|
||||
.L_M2:
|
||||
PTR_SLLI T1, KK, 4
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + 2 * kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_2x4
|
||||
PTR_SLLI T0, K, 4
|
||||
PTR_ADDI CC, CC, 0x10 // cc += 2
|
||||
PTR_ADD AA, AA, T0 // aa += 2 * k
|
||||
.L_M1:
|
||||
andi I, M, 1
|
||||
beqz I, .L_M0
|
||||
PTR_SLLI T1, KK, 3
|
||||
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
|
||||
PTR_ADD A0, AA, T1 // a0 = aa + kk
|
||||
move B0, BB
|
||||
PTR_SUB L, K, KK // L = K - KK
|
||||
dgemm_dsolve_1x4
|
||||
PTR_SLLI T0, K, 3
|
||||
PTR_ADDI CC, CC, 0x08 // cc += 1
|
||||
PTR_ADD AA, AA, T0 // aa += 1 * k
|
||||
.L_M0:
|
||||
PTR_ADDI KK, KK, -4
|
||||
bnez J, .L_J1
|
||||
.L_N0:
|
||||
pop_if_used 26, 32
|
||||
jirl $r0, $r1, 0x0
|
||||
EPILOGUE
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,463 @@
|
|||
/*******************************************************************************
|
||||
Copyright (c) 2023, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*******************************************************************************/
|
||||
#define ASSEMBLER
|
||||
|
||||
#include "common.h"
|
||||
#include "loongarch64_asm.S"
|
||||
|
||||
/*********************************************************************
|
||||
* 2023/08/30 guxiwei
|
||||
* UTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
*
|
||||
*********************************************************************/
|
||||
|
||||
/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha,
|
||||
* FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
|
||||
*/
|
||||
#define M $r4
|
||||
#define N $r5
|
||||
#define ALPHA $f0
|
||||
#define A $r7
|
||||
#define LDA $r8
|
||||
#define X $r9
|
||||
#define INC_X $r10
|
||||
#define Y $r11
|
||||
#define INC_Y $r6
|
||||
|
||||
#define J $r12
|
||||
#define I $r13
|
||||
#define K $r14
|
||||
#define Y_ORG $r15
|
||||
#define OFFSET $r16
|
||||
#define K_LDA $r17
|
||||
#define M4 $r18
|
||||
#define T0 $r19
|
||||
#define PA0 $r20
|
||||
#define PA1 $r23
|
||||
#define PA2 $r24
|
||||
#define PA3 $r25
|
||||
#define PA4 $r26
|
||||
#define PA5 $r27
|
||||
#define PA6 $r28
|
||||
#define PA7 $r29
|
||||
|
||||
#define VALPHA $xr1
|
||||
#define X0 $xr2
|
||||
#define X1 $xr3
|
||||
#define X2 $xr4
|
||||
#define X3 $xr5
|
||||
#define X4 $xr6
|
||||
#define X5 $xr7
|
||||
#define X6 $xr8
|
||||
#define X7 $xr9
|
||||
#define Y0 $xr10
|
||||
#define A0 $xr11
|
||||
#define A1 $xr12
|
||||
#define A2 $xr13
|
||||
#define A3 $xr14
|
||||
#define A4 $xr15
|
||||
#define A5 $xr16
|
||||
#define A6 $xr17
|
||||
#define A7 $xr18
|
||||
|
||||
#define X0_F $f2
|
||||
#define X1_F $f3
|
||||
#define X2_F $f4
|
||||
#define X3_F $f5
|
||||
#define X4_F $f6
|
||||
#define X5_F $f7
|
||||
#define X6_F $f8
|
||||
#define X7_F $f9
|
||||
#define Y0_F $f10
|
||||
#define A0_F $f11
|
||||
#define A1_F $f12
|
||||
#define A2_F $f13
|
||||
#define A3_F $f14
|
||||
#define A4_F $f15
|
||||
#define A5_F $f16
|
||||
#define A6_F $f17
|
||||
#define A7_F $f18
|
||||
|
||||
.macro SLOAD_X_8
|
||||
GLDREPL xv, w, X0, X, 0x00, X1, X, 0x04, X2, X, 0x08, X3, X, 0x0C, \
|
||||
X4, X, 0x10, X5, X, 0x14, X6, X, 0x18, X7, X, 0x1C
|
||||
GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA, \
|
||||
X4, X4, VALPHA, X5, X5, VALPHA, X6, X6, VALPHA, X7, X7, VALPHA
|
||||
.endm
|
||||
|
||||
.macro SLOAD_X_8_GAP
|
||||
xvldrepl.w X0, X, 0x00
|
||||
PTR_ADD T0, X, INC_X
|
||||
xvldrepl.w X1, T0, 0x00
|
||||
PTR_ADD T0, T0, INC_X
|
||||
xvldrepl.w X2, T0, 0x00
|
||||
PTR_ADD T0, T0, INC_X
|
||||
xvldrepl.w X3, T0, 0x00
|
||||
PTR_ADD T0, T0, INC_X
|
||||
xvldrepl.w X4, T0, 0x00
|
||||
PTR_ADD T0, T0, INC_X
|
||||
xvldrepl.w X5, T0, 0x00
|
||||
PTR_ADD T0, T0, INC_X
|
||||
xvldrepl.w X6, T0, 0x00
|
||||
PTR_ADD T0, T0, INC_X
|
||||
xvldrepl.w X7, T0, 0x00
|
||||
GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA, \
|
||||
X4, X4, VALPHA, X5, X5, VALPHA, X6, X6, VALPHA, X7, X7, VALPHA
|
||||
.endm
|
||||
|
||||
.macro SLOAD_X_4
|
||||
GLDREPL xv, w, X0, X, 0x00, X1, X, 0x04, X2, X, 0x08, X3, X, 0x0C
|
||||
GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA
|
||||
.endm
|
||||
|
||||
.macro SLOAD_X_4_GAP
|
||||
xvldrepl.w X0, X, 0x00
|
||||
PTR_ADD T0, X, INC_X
|
||||
xvldrepl.w X1, T0, 0x00
|
||||
PTR_ADD T0, T0, INC_X
|
||||
xvldrepl.w X2, T0, 0x00
|
||||
PTR_ADD T0, T0, INC_X
|
||||
xvldrepl.w X3, T0, 0x00
|
||||
GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA
|
||||
.endm
|
||||
|
||||
.macro SLOAD_X_2
|
||||
GLDREPL xv, w, X0, X, 0x00, X1, X, 0x04
|
||||
GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA
|
||||
.endm
|
||||
|
||||
.macro SLOAD_X_2_GAP
|
||||
xvldrepl.w X0, X, 0x00
|
||||
PTR_ADD T0, X, INC_X
|
||||
xvldrepl.w X1, T0, 0x00
|
||||
GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA
|
||||
.endm
|
||||
|
||||
.macro SLOAD_X_1
|
||||
GLDREPL xv, w, X0, X, 0x00
|
||||
GMUL xvf, s, X0, X0, VALPHA
|
||||
.endm
|
||||
|
||||
.macro SLOAD_Y_8
|
||||
GLD xv, , Y0, Y, 0
|
||||
.endm
|
||||
|
||||
.macro SLOAD_Y_8_GAP
|
||||
fld.s Y0_F, Y, 0
|
||||
fldx.s A0_F, Y, INC_Y
|
||||
PTR_ALSL T0, INC_Y, Y, 1
|
||||
fld.s A1_F, T0, 0
|
||||
fldx.s A2_F, T0, INC_Y
|
||||
PTR_ALSL T0, INC_Y, Y, 2
|
||||
fld.s A3_F, T0, 0
|
||||
fldx.s A4_F, T0, INC_Y
|
||||
PTR_ADD T0, T0, INC_Y
|
||||
PTR_ADD T0, T0, INC_Y
|
||||
fld.s A5_F, T0, 0
|
||||
fldx.s A6_F, T0, INC_Y
|
||||
GINSVE0 xv, w, Y0, A0, 1, Y0, A1, 2, Y0, A2, 3, Y0, A3, 4, \
|
||||
Y0, A4, 5, Y0, A5, 6, Y0, A6, 7
|
||||
.endm
|
||||
|
||||
.macro SLOAD_Y_1
|
||||
GLD f, s, Y0_F, Y, 0
|
||||
.endm
|
||||
|
||||
.macro SGEMV_N_8x8
|
||||
GLD_INC xv, , 0x20, \
|
||||
A0, PA0, 0, A1, PA1, 0, \
|
||||
A2, PA2, 0, A3, PA3, 0, \
|
||||
A4, PA4, 0, A5, PA5, 0, \
|
||||
A6, PA6, 0, A7, PA7, 0
|
||||
GMADD xvf, s, Y0, A0, X0, Y0, Y0, A1, X1, Y0, \
|
||||
Y0, A2, X2, Y0, Y0, A3, X3, Y0, \
|
||||
Y0, A4, X4, Y0, Y0, A5, X5, Y0, \
|
||||
Y0, A6, X6, Y0, Y0, A7, X7, Y0
|
||||
.endm
|
||||
|
||||
.macro SGEMV_N_1x8
|
||||
GLD_INC f, s, 0x04, \
|
||||
A0_F, PA0, 0, A1_F, PA1, 0, \
|
||||
A2_F, PA2, 0, A3_F, PA3, 0, \
|
||||
A4_F, PA4, 0, A5_F, PA5, 0, \
|
||||
A6_F, PA6, 0, A7_F, PA7, 0
|
||||
GMADD f, s, Y0_F, A0_F, X0_F, Y0_F, Y0_F, A1_F, X1_F, Y0_F, \
|
||||
Y0_F, A2_F, X2_F, Y0_F, Y0_F, A3_F, X3_F, Y0_F, \
|
||||
Y0_F, A4_F, X4_F, Y0_F, Y0_F, A5_F, X5_F, Y0_F, \
|
||||
Y0_F, A6_F, X6_F, Y0_F, Y0_F, A7_F, X7_F, Y0_F
|
||||
.endm
|
||||
|
||||
.macro SGEMV_N_8x4
|
||||
GLD_INC xv, , 0x20, \
|
||||
A0, PA0, 0, A1, PA1, 0, \
|
||||
A2, PA2, 0, A3, PA3, 0
|
||||
GMADD xvf, s, Y0, A0, X0, Y0, Y0, A1, X1, Y0, \
|
||||
Y0, A2, X2, Y0, Y0, A3, X3, Y0
|
||||
.endm
|
||||
|
||||
.macro SGEMV_N_1x4
|
||||
GLD_INC f, s, 0x04, \
|
||||
A0_F, PA0, 0, A1_F, PA1, 0, \
|
||||
A2_F, PA2, 0, A3_F, PA3, 0
|
||||
GMADD f, s, Y0_F, A0_F, X0_F, Y0_F, Y0_F, A1_F, X1_F, Y0_F, \
|
||||
Y0_F, A2_F, X2_F, Y0_F, Y0_F, A3_F, X3_F, Y0_F
|
||||
.endm
|
||||
|
||||
.macro SGEMV_N_8x2
|
||||
GLD_INC xv, , 0x20, \
|
||||
A0, PA0, 0, A1, PA1, 0
|
||||
GMADD xvf, s, Y0, A0, X0, Y0, Y0, A1, X1, Y0
|
||||
.endm
|
||||
|
||||
.macro SGEMV_N_1x2
|
||||
GLD_INC f, s, 0x04, \
|
||||
A0_F, PA0, 0, A1_F, PA1, 0
|
||||
GMADD f, s, Y0_F, A0_F, X0_F, Y0_F, Y0_F, A1_F, X1_F, Y0_F
|
||||
.endm
|
||||
|
||||
.macro SGEMV_N_1x1
|
||||
GLD_INC f, s, 0x04, A0_F, PA0, 0
|
||||
GMADD f, s, Y0_F, A0_F, X0_F, Y0_F
|
||||
.endm
|
||||
|
||||
.macro SSTORE_Y_8
|
||||
GST xv, , Y0, Y, 0
|
||||
.endm
|
||||
|
||||
.macro SSTORE_Y_8_GAP
|
||||
xvstelm.w Y0, Y, 0, 0
|
||||
PTR_ADD T0, Y, INC_Y
|
||||
xvstelm.w Y0, T0, 0, 1
|
||||
PTR_ADD T0, T0, INC_Y
|
||||
xvstelm.w Y0, T0, 0, 2
|
||||
PTR_ADD T0, T0, INC_Y
|
||||
xvstelm.w Y0, T0, 0, 3
|
||||
|
||||
PTR_ADD T0, T0, INC_Y
|
||||
xvstelm.w Y0, T0, 0, 4
|
||||
PTR_ADD T0, T0, INC_Y
|
||||
xvstelm.w Y0, T0, 0, 5
|
||||
PTR_ADD T0, T0, INC_Y
|
||||
xvstelm.w Y0, T0, 0, 6
|
||||
PTR_ADD T0, T0, INC_Y
|
||||
xvstelm.w Y0, T0, 0, 7
|
||||
.endm
|
||||
|
||||
.macro SSTORE_Y_1
|
||||
GST f, s, Y0_F, Y, 0
|
||||
.endm
|
||||
|
||||
.macro SGEMV_N_LASX XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req
|
||||
PTR_SRLI J, N, 3
|
||||
beqz J, .L_\XW\()_N_7
|
||||
PTR_SLLI K_LDA, LDA, 3
|
||||
PTR_SUB K_LDA, K_LDA, M4
|
||||
.L_\XW\()_N_L8:
|
||||
SLOAD_\X_8
|
||||
xor K, K, K
|
||||
move Y, Y_ORG
|
||||
PTR_SRLI I, M, 3
|
||||
beqz I, .L_\XW\()_M_7
|
||||
.align 5
|
||||
.L_\XW\()_M_L8:
|
||||
SLOAD_\Y_8
|
||||
SGEMV_N_8x8
|
||||
SSTORE_\Y_8
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_ALSL Y, INC_Y, Y, 3
|
||||
PTR_ADDI K, K, 8
|
||||
bnez I, .L_\XW\()_M_L8
|
||||
.L_\XW\()_M_7:
|
||||
andi I, M, 7
|
||||
beqz I, .L_\XW\()_M_END
|
||||
.align 5
|
||||
.L_\XW\()_M_L1:
|
||||
SLOAD_\Y_1
|
||||
SGEMV_N_1x8
|
||||
SSTORE_\Y_1
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_ADD Y, Y, INC_Y
|
||||
PTR_ADDI K, K, 1
|
||||
bnez I, .L_\XW\()_M_L1
|
||||
.L_\XW\()_M_END:
|
||||
PTR_ADDI J, J, -1
|
||||
#if __loongarch_grlen == 64
|
||||
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \
|
||||
PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA
|
||||
#elif __loongarch_grlen == 32
|
||||
GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \
|
||||
PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA
|
||||
#else
|
||||
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \
|
||||
PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA
|
||||
#endif
|
||||
PTR_ALSL X, INC_X, X, 3
|
||||
bnez J, .L_\XW\()_N_L8
|
||||
.L_\XW\()_N_7:
|
||||
andi J, N, 4
|
||||
beqz J, .L_\XW\()_N_3
|
||||
SLOAD_\X_4
|
||||
xor K, K, K
|
||||
move Y, Y_ORG
|
||||
|
||||
PTR_SRLI I, M, 3
|
||||
beqz I, .L_\XW\()_N_4_M_7
|
||||
.align 5
|
||||
.L_\XW\()_N_4_M_L8:
|
||||
SLOAD_\Y_8
|
||||
SGEMV_N_8x4
|
||||
SSTORE_\Y_8
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_ADDI K, K, 8
|
||||
PTR_ALSL Y, INC_Y, Y, 3
|
||||
bnez I, .L_\XW\()_N_4_M_L8
|
||||
.L_\XW\()_N_4_M_7:
|
||||
andi I, M, 7
|
||||
beqz I, .L_\XW\()_N_4_M_END
|
||||
.align 5
|
||||
.L_\XW\()_N_4_M_L1:
|
||||
SLOAD_\Y_1
|
||||
SGEMV_N_1x4
|
||||
SSTORE_\Y_1
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_ADD Y, Y, INC_Y
|
||||
PTR_ADDI K, K, 1
|
||||
bnez I, .L_\XW\()_N_4_M_L1
|
||||
.L_\XW\()_N_4_M_END:
|
||||
PTR_SLLI K_LDA, LDA, 2
|
||||
PTR_SUB K_LDA, K_LDA, M4
|
||||
#if __loongarch_grlen == 64
|
||||
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA
|
||||
#elif __loongarch_grlen == 32
|
||||
GADD , w PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA
|
||||
#else
|
||||
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA
|
||||
#endif
|
||||
PTR_ALSL X, INC_X, X, 2
|
||||
.L_\XW\()_N_3:
|
||||
andi J, N, 2
|
||||
beqz J, .L_\XW\()_N_1
|
||||
SLOAD_\X_2
|
||||
xor K, K, K
|
||||
move Y, Y_ORG
|
||||
PTR_SRLI I, M, 3
|
||||
beqz I, .L_\XW\()_N_2_M_7
|
||||
.align 5
|
||||
.L_\XW\()_N_2_M_L8:
|
||||
SLOAD_\Y_8
|
||||
SGEMV_N_8x2
|
||||
SSTORE_\Y_8
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_ADDI K, K, 8
|
||||
PTR_ALSL Y, INC_Y, Y, 3
|
||||
bnez I, .L_\XW\()_N_2_M_L8
|
||||
.L_\XW\()_N_2_M_7:
|
||||
andi I, M, 7
|
||||
beqz I, .L_\XW\()_N_2_M_END
|
||||
.align 5
|
||||
.L_\XW\()_N_2_M_L1:
|
||||
SLOAD_\Y_1
|
||||
SGEMV_N_1x2
|
||||
SSTORE_\Y_1
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_ADD Y, Y, INC_Y
|
||||
PTR_ADDI K, K, 1
|
||||
bnez I, .L_\XW\()_N_2_M_L1
|
||||
.L_\XW\()_N_2_M_END:
|
||||
PTR_SLLI K_LDA, LDA, 1
|
||||
PTR_SUB K_LDA, K_LDA, M4
|
||||
PTR_ADD PA0, PA0, K_LDA
|
||||
PTR_ADD PA1, PA1, K_LDA
|
||||
PTR_ALSL X, INC_X, X, 1
|
||||
.L_\XW\()_N_1:
|
||||
andi J, N, 1
|
||||
beqz J, .L_END
|
||||
SLOAD_\X_1
|
||||
xor K, K, K
|
||||
move Y, Y_ORG
|
||||
move I, M
|
||||
beqz I, .L_END
|
||||
.align 5
|
||||
.L_\XW\()_N_1_M_L1:
|
||||
SLOAD_\Y_1
|
||||
SGEMV_N_1x1
|
||||
SSTORE_\Y_1
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_ADD Y, Y, INC_Y
|
||||
PTR_ADDI K, K, 1
|
||||
bnez I, .L_\XW\()_N_1_M_L1
|
||||
b .L_END
|
||||
.endm
|
||||
|
||||
PROLOGUE
|
||||
PTR_LD INC_Y, $sp, 0
|
||||
push_if_used 17 + 7, 19
|
||||
PTR_ADDI K, $r0, 0x01
|
||||
PTR_SUB I, INC_X, K
|
||||
PTR_SUB J, INC_Y, K
|
||||
maskeqz I, K, I /* if(inc_x == 1) I = 0; else I = 1; */
|
||||
maskeqz J, K, J /* if(inc_y == 1) j = 0; else j = 1; */
|
||||
PTR_ALSL I, I, J, 1
|
||||
GSLLI , d, LDA, LDA, 2, INC_X, INC_X, 2, INC_Y, INC_Y, 2, M4, M, 2
|
||||
xvreplve0.w VALPHA, $xr0
|
||||
move Y_ORG, Y
|
||||
move PA0, A
|
||||
#if __loongarch_grlen == 64
|
||||
GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \
|
||||
PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA
|
||||
#elif __loongarch_grlen == 32
|
||||
GADD , w, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \
|
||||
PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA
|
||||
#else
|
||||
GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \
|
||||
PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA
|
||||
#endif
|
||||
la.local T0, .L_GAP_TABLE
|
||||
PTR_ALSL I, I, T0, 1
|
||||
ld.h K, I, 0
|
||||
PTR_ADD T0, T0, K
|
||||
jirl $r0, T0, 0
|
||||
.L_GAP_TABLE:
|
||||
.hword .L_GAP_0_0 - .L_GAP_TABLE
|
||||
.hword .L_GAP_0_1 - .L_GAP_TABLE
|
||||
.hword .L_GAP_1_0 - .L_GAP_TABLE
|
||||
.hword .L_GAP_1_1 - .L_GAP_TABLE
|
||||
.L_GAP_0_0: /* if (inc_x == 1) && (incy == 1) */
|
||||
SGEMV_N_LASX GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1
|
||||
.L_GAP_0_1: /* if (inc_x == 1) && (incy != 1) */
|
||||
SGEMV_N_LASX GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1
|
||||
.L_GAP_1_0: /* if (inc_x != 1) && (incy == 1) */
|
||||
SGEMV_N_LASX GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1
|
||||
.L_GAP_1_1: /* if (inc_x != 1) && (incy != 1) */
|
||||
SGEMV_N_LASX GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1
|
||||
.L_END:
|
||||
pop_if_used 17 + 7, 19
|
||||
jirl $r0, $r1, 0x0
|
||||
EPILOGUE
|
||||
|
|
@ -0,0 +1,405 @@
|
|||
/*******************************************************************************
|
||||
Copyright (c) 2023, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*******************************************************************************/
|
||||
#define ASSEMBLER
|
||||
|
||||
#include "common.h"
|
||||
#include "loongarch64_asm.S"
|
||||
|
||||
/*********************************************************************
|
||||
* 2023/08/30 guxiwei
|
||||
* UTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
*
|
||||
*********************************************************************/
|
||||
|
||||
/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha,
|
||||
* FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
|
||||
*/
|
||||
#define M $r4
|
||||
#define N $r5
|
||||
#define ALPHA $f0
|
||||
#define A $r7
|
||||
#define LDA $r8
|
||||
#define X $r9
|
||||
#define INC_X $r10
|
||||
#define Y $r11
|
||||
#define INC_Y $r6
|
||||
|
||||
#define J $r12
|
||||
#define I $r13
|
||||
#define K $r14
|
||||
#define PY0 $r14
|
||||
#define X_ORG $r15
|
||||
#define PY1 $r16
|
||||
#define K_LDA $r17
|
||||
#define PY2 $r18
|
||||
#define T0 $r19
|
||||
#define PA0 $r20
|
||||
#define PA1 $r23
|
||||
#define PA2 $r24
|
||||
#define PA3 $r25
|
||||
#define PA4 $r26
|
||||
#define PA5 $r27
|
||||
#define PA6 $r28
|
||||
#define PA7 $r29
|
||||
#define M4 $r30
|
||||
|
||||
#define VALPHA $xr0
|
||||
#define X0 $xr1
|
||||
#define A0 $xr2
|
||||
#define A1 $xr3
|
||||
#define A2 $xr4
|
||||
#define A3 $xr5
|
||||
#define A4 $xr6
|
||||
#define A5 $xr7
|
||||
#define A6 $xr8
|
||||
#define A7 $xr9
|
||||
#define TP0 $xr10
|
||||
#define TP1 $xr11
|
||||
#define TP2 $xr12
|
||||
#define TP3 $xr13
|
||||
#define TP4 $xr14
|
||||
#define TP5 $xr15
|
||||
#define TP6 $xr16
|
||||
#define TP7 $xr17
|
||||
#define Y0 $xr2
|
||||
#define Y1 $xr3
|
||||
#define Y2 $xr4
|
||||
#define Y3 $xr5
|
||||
#define Y4 $xr6
|
||||
#define Y5 $xr7
|
||||
#define Y6 $xr8
|
||||
#define Y7 $xr9
|
||||
|
||||
.macro ZERO_Y8
|
||||
GXOR xv, v, TP0, TP0, TP0, TP1, TP1, TP1, TP2, TP2, TP2, TP3, TP3, TP3, \
|
||||
TP4, TP4, TP4, TP5, TP5, TP5, TP6, TP6, TP6, TP7, TP7, TP7
|
||||
.endm
|
||||
|
||||
.macro ZERO_Y4
|
||||
GXOR xv, v, TP0, TP0, TP0, TP1, TP1, TP1, TP2, TP2, TP2, TP3, TP3, TP3
|
||||
.endm
|
||||
|
||||
.macro ZERO_Y2
|
||||
GXOR xv, v, TP0, TP0, TP0, TP1, TP1, TP1
|
||||
.endm
|
||||
|
||||
.macro ZERO_Y1
|
||||
GXOR xv, v, TP0, TP0, TP0
|
||||
.endm
|
||||
|
||||
.macro SLOAD_X8
|
||||
GLD xv, , X0, X, 0x00
|
||||
.endm
|
||||
|
||||
.macro SLOAD_X8_GAP
|
||||
fld.s $f1, X, 0x00
|
||||
fldx.s $f2, X, INC_X
|
||||
PTR_ALSL T0, INC_X, X, 1
|
||||
fld.s $f3, T0, 0x00
|
||||
fldx.s $f4, T0, INC_X
|
||||
GINSVE0 xv, w, X0, A0, 1, X0, A1, 2, X0, A2, 3
|
||||
PTR_ALSL T0, INC_X, X, 2
|
||||
fld.s $f2, T0, 0x00
|
||||
fldx.s $f3, T0, INC_X
|
||||
PTR_ALSL T0, INC_X, T0, 1
|
||||
fld.s $f4, T0, 0x00
|
||||
fldx.s $f5, T0, INC_X
|
||||
GINSVE0 xv, w, X0, A0, 4, X0, A1, 5, X0, A2, 6, X0, A3, 7
|
||||
.endm
|
||||
|
||||
.macro SGEMV_T_8x8
|
||||
GLD_INC xv, , 0x20, \
|
||||
A0, PA0, 0, A1, PA1, 0, \
|
||||
A2, PA2, 0, A3, PA3, 0, \
|
||||
A4, PA4, 0, A5, PA5, 0, \
|
||||
A6, PA6, 0, A7, PA7, 0
|
||||
GMADD xvf, s, TP0, A0, X0, TP0, TP1, A1, X0, TP1, \
|
||||
TP2, A2, X0, TP2, TP3, A3, X0, TP3, \
|
||||
TP4, A4, X0, TP4, TP5, A5, X0, TP5, \
|
||||
TP6, A6, X0, TP6, TP7, A7, X0, TP7
|
||||
.endm
|
||||
|
||||
.macro SGEMV_T_4x8
|
||||
GLD_INC xv, , 0x20, \
|
||||
A0, PA0, 0, A1, PA1, 0, \
|
||||
A2, PA2, 0, A3, PA3, 0
|
||||
GMADD xvf, s, TP0, A0, X0, TP0, TP1, A1, X0, TP1, \
|
||||
TP2, A2, X0, TP2, TP3, A3, X0, TP3
|
||||
.endm
|
||||
|
||||
.macro SGEMV_T_2x8
|
||||
GLD_INC xv, , 0x20, \
|
||||
A0, PA0, 0, A1, PA1, 0
|
||||
GMADD xvf, s, TP0, A0, X0, TP0, TP1, A1, X0, TP1
|
||||
.endm
|
||||
|
||||
.macro SGEMV_T_LASX XW:req X8:req, X4:req
|
||||
PTR_SRLI J, N, 3
|
||||
beqz J, .L_\XW\()_N_7
|
||||
PTR_SLLI K_LDA, LDA, 3
|
||||
PTR_SUB K_LDA, K_LDA, M4
|
||||
.L_\XW\()_N_L8:
|
||||
ZERO_Y8
|
||||
move X, X_ORG
|
||||
PTR_SRLI I, M, 3
|
||||
beqz I, .L_\XW\()_M_7
|
||||
.align 5
|
||||
.L_\XW\()_M_L8:
|
||||
SLOAD_\X8
|
||||
SGEMV_T_8x8
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_ALSL X, INC_X, X, 3
|
||||
bnez I, .L_\XW\()_M_L8
|
||||
.L_\XW\()_M_7:
|
||||
// Accumulated
|
||||
GACC xvf, s, Y0, TP0, Y1, TP1, Y2, TP2, Y3, TP3, Y4, TP4, \
|
||||
Y5, TP5, Y6, TP6, Y7, TP7
|
||||
andi I, M, 7
|
||||
beqz I, .L_\XW\()_M_END
|
||||
.align 5
|
||||
.L_\XW\()_M_L1:
|
||||
fld.s $f1, X, 0x00
|
||||
fld.s $f10, PA0, 0x00
|
||||
fld.s $f11, PA1, 0x00
|
||||
fld.s $f12, PA2, 0x00
|
||||
fld.s $f13, PA3, 0x00
|
||||
fld.s $f14, PA4, 0x00
|
||||
fld.s $f15, PA5, 0x00
|
||||
fld.s $f16, PA6, 0x00
|
||||
fld.s $f17, PA7, 0x00
|
||||
#if __loongarch_grlen == 64
|
||||
GADDI , d, PA0, PA0, 0x04, PA1, PA1, 0x04, PA2, PA2, 0x04, PA3, PA3, 0x04, \
|
||||
PA4, PA4, 0x04, PA5, PA5, 0x04, PA6, PA6, 0x04, PA7, PA7, 0x04
|
||||
#elif __loongarch_grlen == 32
|
||||
GADDI , w, PA0, PA0, 0x04, PA1, PA1, 0x04, PA2, PA2, 0x04, PA3, PA3, 0x04, \
|
||||
PA4, PA4, 0x04, PA5, PA5, 0x04, PA6, PA6, 0x04, PA7, PA7, 0x04
|
||||
#else
|
||||
GADDI , d, PA0, PA0, 0x04, PA1, PA1, 0x04, PA2, PA2, 0x04, PA3, PA3, 0x04, \
|
||||
PA4, PA4, 0x04, PA5, PA5, 0x04, PA6, PA6, 0x04, PA7, PA7, 0x04
|
||||
#endif
|
||||
GMADD f, s, $f2, $f10, $f1, $f2, $f3, $f11, $f1, $f3, $f4, $f12, $f1, $f4, $f5, $f13, $f1, $f5, \
|
||||
$f6, $f14, $f1, $f6, $f7, $f15, $f1, $f7, $f8, $f16, $f1, $f8, $f9, $f17, $f1, $f9,
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_ADD X, X, INC_X
|
||||
bnez I, .L_\XW\()_M_L1
|
||||
.L_\XW\()_M_END:
|
||||
fld.s $f10, Y, 0x00
|
||||
fldx.s $f11, Y, INC_Y
|
||||
PTR_ALSL PY0, INC_Y, Y, 1
|
||||
fld.s $f12, PY0, 0x00
|
||||
fldx.s $f13, PY0, INC_Y
|
||||
PTR_ALSL PY1, INC_Y, Y, 2
|
||||
fld.s $f14, PY1, 0x00
|
||||
fldx.s $f15, PY1, INC_Y
|
||||
PTR_ALSL PY2, INC_Y, PY1, 1
|
||||
fld.s $f16, PY2, 0x00
|
||||
fldx.s $f17, PY2, INC_Y
|
||||
|
||||
GMADD f, s, $f10, ALPHA, $f2, $f10, $f11, ALPHA, $f3, $f11, $f12, ALPHA, $f4, $f12, $f13, ALPHA, $f5, $f13, \
|
||||
$f14, ALPHA, $f6, $f14, $f15, ALPHA, $f7, $f15, $f16, ALPHA, $f8, $f16, $f17, ALPHA, $f9, $f17
|
||||
|
||||
PTR_ADDI J, J, -1
|
||||
#if __loongarch_grlen == 64
|
||||
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \
|
||||
PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA
|
||||
#elif __loongarch_grlen == 32
|
||||
GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \
|
||||
PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA
|
||||
#else
|
||||
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \
|
||||
PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA
|
||||
#endif
|
||||
fst.s $f10, Y, 0x00
|
||||
fstx.s $f11, Y, INC_Y
|
||||
fst.s $f12, PY0, 0x00
|
||||
fstx.s $f13, PY0, INC_Y
|
||||
fst.s $f14, PY1, 0x00
|
||||
fstx.s $f15, PY1, INC_Y
|
||||
fst.s $f16, PY2, 0x00
|
||||
fstx.s $f17, PY2, INC_Y
|
||||
|
||||
PTR_ALSL Y, INC_Y, Y, 3
|
||||
bnez J, .L_\XW\()_N_L8
|
||||
.L_\XW\()_N_7:
|
||||
andi J, N, 4
|
||||
beqz J, .L_\XW\()_N_3
|
||||
ZERO_Y4
|
||||
move X, X_ORG
|
||||
PTR_SRLI I, M, 3
|
||||
beqz I, .L_\XW\()_N_4_M_7
|
||||
.align 5
|
||||
.L_\XW\()_N_4_M_L8:
|
||||
SLOAD_\X8
|
||||
SGEMV_T_4x8
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_ALSL X, INC_X, X, 3
|
||||
bnez I, .L_\XW\()_N_4_M_L8
|
||||
.L_\XW\()_N_4_M_7:
|
||||
// Accumulated
|
||||
GACC xvf, s, Y0, TP0, Y1, TP1, Y2, TP2, Y3, TP3
|
||||
andi I, M, 7
|
||||
beqz I, .L_\XW\()_N_4_M_END
|
||||
.align 5
|
||||
.L_\XW\()_N_4_M_L1:
|
||||
fld.s $f1, X, 0x00
|
||||
GLD_INC f, s, 0x04, $f10, PA0, 0x00, $f11, PA1, 0x00, $f12, PA2, 0x00, $f13, PA3, 0x00
|
||||
GMADD f, s, $f2, $f10, $f1, $f2, $f3, $f11, $f1, $f3, $f4, $f12, $f1, $f4, $f5, $f13, $f1, $f5
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_ADD X, X, INC_X
|
||||
bnez I, .L_\XW\()_N_4_M_L1
|
||||
.L_\XW\()_N_4_M_END:
|
||||
fld.s $f10, Y, 0x00
|
||||
fldx.s $f11, Y, INC_Y
|
||||
PTR_ALSL PY0, INC_Y, Y, 1
|
||||
fld.s $f12, PY0, 0x00
|
||||
fldx.s $f13, PY0, INC_Y
|
||||
|
||||
GMADD f, s, $f10, ALPHA, $f2, $f10, $f11, ALPHA, $f3, $f11, $f12, ALPHA, $f4, $f12, $f13, ALPHA, $f5, $f13
|
||||
|
||||
PTR_SLLI K_LDA, LDA, 2
|
||||
PTR_SUB K_LDA, K_LDA, M4
|
||||
|
||||
#if __loongarch_grlen == 64
|
||||
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA
|
||||
#elif __loongarch_grlen == 32
|
||||
GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA
|
||||
#else
|
||||
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA
|
||||
#endif
|
||||
fst.s $f10, Y, 0x00
|
||||
fstx.s $f11, Y, INC_Y
|
||||
fst.s $f12, PY0, 0x00
|
||||
fstx.s $f13, PY0, INC_Y
|
||||
PTR_ALSL Y, INC_Y, Y, 2
|
||||
.L_\XW\()_N_3:
|
||||
andi J, N, 2
|
||||
beqz J, .L_\XW\()_N_1
|
||||
ZERO_Y2
|
||||
move X, X_ORG
|
||||
PTR_SRLI I, M, 3
|
||||
beqz I, .L_\XW\()_N_2_M_7
|
||||
.align 5
|
||||
.L_\XW\()_N_2_M_L8:
|
||||
SLOAD_\X8
|
||||
SGEMV_T_2x8
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_ALSL X, INC_X, X, 3
|
||||
bnez I, .L_\XW\()_N_2_M_L8
|
||||
.L_\XW\()_N_2_M_7:
|
||||
// Accumulated
|
||||
GACC xvf, s, Y0, TP0, Y1, TP1
|
||||
andi I, M, 7
|
||||
beqz I, .L_\XW\()_N_2_M_END
|
||||
.align 5
|
||||
.L_\XW\()_N_2_M_L1:
|
||||
fld.s $f1, X, 0x00
|
||||
GLD_INC f, s, 0x04, $f10, PA0, 0x00, $f11, PA1, 0x00
|
||||
GMADD f, s, $f2, $f10, $f1, $f2, $f3, $f11, $f1, $f3
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_ADD X, X, INC_X
|
||||
bnez I, .L_\XW\()_N_2_M_L1
|
||||
.L_\XW\()_N_2_M_END:
|
||||
fld.s $f10, Y, 0x00
|
||||
fldx.s $f11, Y, INC_Y
|
||||
|
||||
GMADD f, s, $f10, ALPHA, $f2, $f10, $f11, ALPHA, $f3, $f11
|
||||
|
||||
PTR_SLLI K_LDA, LDA, 1
|
||||
PTR_SUB K_LDA, K_LDA, M4
|
||||
|
||||
#if __loongarch_grlen == 64
|
||||
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA
|
||||
#elif __loongarch_grlen == 32
|
||||
GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA
|
||||
#else
|
||||
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA
|
||||
#endif
|
||||
fst.s $f10, Y, 0x00
|
||||
fstx.s $f11, Y, INC_Y
|
||||
PTR_ALSL Y, INC_Y, Y, 1
|
||||
.L_\XW\()_N_1:
|
||||
andi J, N, 1
|
||||
beqz J, .L_END
|
||||
ZERO_Y1
|
||||
move X, X_ORG
|
||||
move I, M
|
||||
beqz I, .L_END
|
||||
.align 5
|
||||
.L_\XW\()_N_1_M_L1:
|
||||
fld.s $f2, PA0, 0x00
|
||||
fld.s $f1, X, 0x00
|
||||
fmadd.s $f10, $f2, $f1, $f10
|
||||
PTR_ADDI I, I, -1
|
||||
PTR_ADD X, X, INC_X
|
||||
PTR_ADDI PA0, PA0, 0x04
|
||||
bnez I, .L_\XW\()_N_1_M_L1
|
||||
|
||||
fld.s $f2, Y, 0x00
|
||||
fmadd.s $f2, ALPHA, $f10, $f2
|
||||
fst.s $f2, Y, 0x00
|
||||
b .L_END
|
||||
.endm
|
||||
|
||||
PROLOGUE
|
||||
PTR_LD INC_Y, $sp, 0
|
||||
push_if_used 17 + 8, 18
|
||||
PTR_ADDI K, $r0, 0x01
|
||||
PTR_SUB I, INC_X, K
|
||||
maskeqz I, K, I /* if(inc_x == 1) I = 0; else I = 1; */
|
||||
GSLLI , d, LDA, LDA, 2, INC_X, INC_X, 2, INC_Y, INC_Y, 2, M4, M, 2
|
||||
xvreplve0.w VALPHA, $xr0
|
||||
move X_ORG, X
|
||||
move PA0, A
|
||||
#if __loongarch_grlen == 64
|
||||
GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \
|
||||
PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA
|
||||
#elif __loongarch_grlen == 32
|
||||
GADD , w, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \
|
||||
PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA
|
||||
#else
|
||||
GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \
|
||||
PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA
|
||||
#endif
|
||||
la.local T0, .L_GAP_TABLE
|
||||
PTR_ALSL I, I, T0, 1
|
||||
ld.h K, I, 0
|
||||
PTR_ADD T0, T0, K
|
||||
jirl $r0, T0, 0
|
||||
.L_GAP_TABLE:
|
||||
.hword .L_GAP_0 - .L_GAP_TABLE
|
||||
.hword .L_GAP_1 - .L_GAP_TABLE
|
||||
.L_GAP_0: /* if (incx == 1) */
|
||||
SGEMV_T_LASX GAP_0, X8, X4
|
||||
.L_GAP_1: /* if (incx != 1) */
|
||||
SGEMV_T_LASX GAP_1, X8_GAP, X4_GAP
|
||||
.L_END:
|
||||
pop_if_used 17 + 8, 18
|
||||
jirl $r0, $r1, 0x0
|
||||
EPILOGUE
|
||||
|
|
@ -1,6 +1,3 @@
|
|||
ifeq ($(HAVE_GAS), 1)
|
||||
include $(KERNELDIR)/KERNEL.POWER8
|
||||
else
|
||||
#SGEMM_BETA = ../generic/gemm_beta.c
|
||||
#DGEMM_BETA = ../generic/gemm_beta.c
|
||||
#CGEMM_BETA = ../generic/zgemm_beta.c
|
||||
|
|
@ -19,8 +16,13 @@ SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
|||
|
||||
STRMMKERNEL = sgemm_kernel_power10.c
|
||||
DTRMMKERNEL = dgemm_kernel_power10.c
|
||||
ifeq ($(OSNAME), AIX)
|
||||
CTRMMKERNEL = ctrmm_kernel_8x4_power8.S
|
||||
ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S
|
||||
else
|
||||
CTRMMKERNEL = cgemm_kernel_power10.S
|
||||
ZTRMMKERNEL = zgemm_kernel_power10.S
|
||||
endif
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_power10.c
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||
|
|
@ -62,10 +64,18 @@ DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_power10.c
|
|||
DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_power10.c
|
||||
DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_power10.c
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
CGEMMKERNEL = cgemm_kernel_8x4_power8.S
|
||||
else
|
||||
CGEMMKERNEL = cgemm_kernel_power10.S
|
||||
endif
|
||||
#CGEMMKERNEL = cgemm_kernel_8x4_power8.S
|
||||
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
|
||||
ifeq ($(OSNAME), AIX)
|
||||
CGEMMITCOPY = cgemm_tcopy_8_power8.S
|
||||
else
|
||||
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
|
||||
endif
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
|
|
@ -73,7 +83,11 @@ CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
|||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
ZGEMMKERNEL = zgemm_kernel_8x2_power8.S
|
||||
else
|
||||
ZGEMMKERNEL = zgemm_kernel_power10.S
|
||||
endif
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c
|
||||
|
|
@ -124,6 +138,7 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
|||
#SMINKERNEL = ../arm/min.c
|
||||
#DMINKERNEL = ../arm/min.c
|
||||
#
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
ifneq ($(GCCVERSIONGTEQ9),1)
|
||||
ISAMAXKERNEL = isamax_power9.S
|
||||
else
|
||||
|
|
@ -148,6 +163,15 @@ ICAMINKERNEL = icamin_power9.S
|
|||
else
|
||||
ICAMINKERNEL = icamin.c
|
||||
endif
|
||||
else
|
||||
ISAMAXKERNEL = isamax.c
|
||||
IDAMAXKERNEL = idamax.c
|
||||
ICAMAXKERNEL = icamax.c
|
||||
IZAMAXKERNEL = izamax.c
|
||||
ISAMINKERNEL = isamin.c
|
||||
IDAMINKERNEL = idamin.c
|
||||
ICAMINKERNEL = icamin.c
|
||||
endif
|
||||
IZAMINKERNEL = izamin.c
|
||||
#
|
||||
#ISMAXKERNEL = ../arm/imax.c
|
||||
|
|
@ -238,4 +262,3 @@ QCABS_KERNEL = ../generic/cabs.c
|
|||
#Dump kernel
|
||||
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
||||
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
||||
endif
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#define ABS_K(a) ((a) > 0 ? (a) : (-(a)))
|
||||
#endif
|
||||
|
||||
#if defined(SKYLAKEX)
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
|
||||
#include "casum_microk_skylakex-2.c"
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#define ABS_K(a) ((a) > 0 ? (a) : (-(a)))
|
||||
#endif
|
||||
|
||||
#if defined(SKYLAKEX)
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
|
||||
#include "dasum_microk_skylakex-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#include "dasum_microk_haswell-2.c"
|
||||
|
|
|
|||
|
|
@ -159,7 +159,7 @@ static int dot_thread_function(BLASLONG n, BLASLONG dummy0,
|
|||
|
||||
extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n,
|
||||
BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb,
|
||||
void *c, BLASLONG ldc, int (*function)(), int nthreads);
|
||||
void *c, BLASLONG ldc, int (*function)(void), int nthreads);
|
||||
#endif
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ static int rot_thread_function(blas_arg_t *args)
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(), int nthreads);
|
||||
extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(void), int nthreads);
|
||||
#endif
|
||||
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(SKYLAKEX)
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
|
||||
#include "sasum_microk_skylakex-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#include "sasum_microk_haswell-2.c"
|
||||
|
|
|
|||
|
|
@ -171,7 +171,7 @@ static int rot_thread_function(blas_arg_t *args)
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(), int nthreads);
|
||||
extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(void), int nthreads);
|
||||
#endif
|
||||
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#define ABS_K(a) ((a) > 0 ? (a) : (-(a)))
|
||||
#endif
|
||||
|
||||
#if defined(SKYLAKEX)
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
|
||||
#include "zasum_microk_skylakex-2.c"
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ static void zdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d)
|
|||
#if defined(SMP)
|
||||
extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n,
|
||||
BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb,
|
||||
void *c, BLASLONG ldc, int (*function)(), int nthreads);
|
||||
void *c, BLASLONG ldc, int (*function)(void), int nthreads);
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
|
|||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
|
||||
#define sig_die(s, kill) { exit(1); }
|
||||
#define s_stop(s, n) {exit(0);}
|
||||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
||||
#define z_abs(z) (cabs(Cd(z)))
|
||||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
|
||||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
|
||||
|
|
@ -261,24 +260,7 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
|
|
@ -291,217 +273,7 @@ static double dpow_ui(double x, integer n) {
|
|||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i]) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i]) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* -- translated by f2c (version 20000121).
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
|
|
|
|||
|
|
@ -223,7 +223,6 @@ typedef struct Namelist Namelist;
|
|||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
|
||||
#define sig_die(s, kill) { exit(1); }
|
||||
#define s_stop(s, n) {exit(0);}
|
||||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
||||
#define z_abs(z) (cabs(Cd(z)))
|
||||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
|
||||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
|
||||
|
|
@ -237,145 +236,5 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i]) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i]) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -223,7 +223,6 @@ typedef struct Namelist Namelist;
|
|||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
|
||||
#define sig_die(s, kill) { exit(1); }
|
||||
#define s_stop(s, n) {exit(0);}
|
||||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
||||
#define z_abs(z) (cabs(Cd(z)))
|
||||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
|
||||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
|
||||
|
|
@ -237,149 +236,10 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
|
||||
#endif
|
||||
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i]) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i]) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
/* -- translated by f2c (version 20000121).
|
||||
/*
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
|
|||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
|
||||
#define sig_die(s, kill) { exit(1); }
|
||||
#define s_stop(s, n) {exit(0);}
|
||||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
||||
#define z_abs(z) (cabs(Cd(z)))
|
||||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
|
||||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
|
||||
|
|
@ -261,248 +260,8 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i]) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i]) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
/* -- translated by f2c (version 20000121).
|
||||
/*
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -223,7 +223,6 @@ typedef struct Namelist Namelist;
|
|||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
|
||||
#define sig_die(s, kill) { exit(1); }
|
||||
#define s_stop(s, n) {exit(0);}
|
||||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
||||
#define z_abs(z) (cabs(Cd(z)))
|
||||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
|
||||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
|
||||
|
|
@ -237,149 +236,10 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
|
||||
#endif
|
||||
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i]) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i]) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
/* -- translated by f2c (version 20000121).
|
||||
/*
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
|
|||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
|
||||
#define sig_die(s, kill) { exit(1); }
|
||||
#define s_stop(s, n) {exit(0);}
|
||||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
||||
#define z_abs(z) (cabs(Cd(z)))
|
||||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
|
||||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
|
||||
|
|
@ -261,11 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
|
|
@ -279,229 +273,6 @@ static float spow_ui(float x, integer n) {
|
|||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i]) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i]) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
/* -- translated by f2c (version 20000121).
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
|
|
|
|||
|
|
@ -223,7 +223,6 @@ typedef struct Namelist Namelist;
|
|||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
|
||||
#define sig_die(s, kill) { exit(1); }
|
||||
#define s_stop(s, n) {exit(0);}
|
||||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
||||
#define z_abs(z) (cabs(Cd(z)))
|
||||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
|
||||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
|
||||
|
|
@ -237,145 +236,5 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i]) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i]) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ void LAPACKE_set_nancheck( int flag )
|
|||
nancheck_flag = ( flag ) ? 1 : 0;
|
||||
}
|
||||
|
||||
int LAPACKE_get_nancheck( )
|
||||
int LAPACKE_get_nancheck( void )
|
||||
{
|
||||
char* env;
|
||||
if ( nancheck_flag != -1 ) {
|
||||
|
|
|
|||
|
|
@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
|
|||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
|
||||
#define sig_die(s, kill) { exit(1); }
|
||||
#define s_stop(s, n) {exit(0);}
|
||||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
||||
#define z_abs(z) (cabs(Cd(z)))
|
||||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
|
||||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
|
||||
|
|
@ -261,247 +260,7 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i]) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i]) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
/* -- translated by f2c (version 20000121).
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
|
|
|
|||
|
|
@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
|
|||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
|
||||
#define sig_die(s, kill) { exit(1); }
|
||||
#define s_stop(s, n) {exit(0);}
|
||||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
||||
#define z_abs(z) (cabs(Cd(z)))
|
||||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
|
||||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
|
||||
|
|
@ -261,247 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
|
|||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
|
||||
static float spow_ui(float x, integer n) {
|
||||
float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static double dpow_ui(double x, integer n) {
|
||||
double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
static _Fcomplex cpow_ui(complex x, integer n) {
|
||||
complex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow.r *= x.r, pow.i *= x.i;
|
||||
if(u >>= 1) x.r *= x.r, x.i *= x.i;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Fcomplex p={pow.r, pow.i};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex float cpow_ui(_Complex float x, integer n) {
|
||||
_Complex float pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
|
||||
_Dcomplex pow={1.0,0.0}; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
|
||||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
|
||||
else break;
|
||||
}
|
||||
}
|
||||
_Dcomplex p = {pow._Val[0], pow._Val[1]};
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
static _Complex double zpow_ui(_Complex double x, integer n) {
|
||||
_Complex double pow=1.0; unsigned long int u;
|
||||
if(n != 0) {
|
||||
if(n < 0) n = -n, x = 1/x;
|
||||
for(u = n; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
#endif
|
||||
static integer pow_ii(integer x, integer n) {
|
||||
integer pow; unsigned long int u;
|
||||
if (n <= 0) {
|
||||
if (n == 0 || x == 1) pow = 1;
|
||||
else if (x != -1) pow = x == 0 ? 1/x : 0;
|
||||
else n = -n;
|
||||
}
|
||||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
|
||||
u = n;
|
||||
for(pow = 1; ; ) {
|
||||
if(u & 01) pow *= x;
|
||||
if(u >>= 1) x *= x;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return pow;
|
||||
}
|
||||
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
|
||||
{
|
||||
double m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static integer smaxloc_(float *w, integer s, integer e, integer *n)
|
||||
{
|
||||
float m; integer i, mi;
|
||||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
|
||||
if (w[i-1]>m) mi=i ,m=w[i-1];
|
||||
return mi-s+1;
|
||||
}
|
||||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Fcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex float zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i]) * Cf(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCf(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
|
||||
integer n = *n_, incx = *incx_, incy = *incy_, i;
|
||||
#ifdef _MSC_VER
|
||||
_Dcomplex zdotc = {0.0, 0.0};
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
|
||||
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#else
|
||||
_Complex double zdotc = 0.0;
|
||||
if (incx == 1 && incy == 1) {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i]) * Cd(&y[i]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
|
||||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
|
||||
}
|
||||
}
|
||||
pCd(z) = zdotc;
|
||||
}
|
||||
#endif
|
||||
/* -- translated by f2c (version 20000121).
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,479 @@
|
|||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <complex.h>
|
||||
#ifdef complex
|
||||
#undef complex
|
||||
#endif
|
||||
#ifdef I
|
||||
#undef I
|
||||
#endif
|
||||
|
||||
#if defined(_WIN64)
|
||||
typedef long long BLASLONG;
|
||||
typedef unsigned long long BLASULONG;
|
||||
#else
|
||||
typedef long BLASLONG;
|
||||
typedef unsigned long BLASULONG;
|
||||
#endif
|
||||
|
||||
#ifdef LAPACK_ILP64
|
||||
typedef BLASLONG blasint;
|
||||
#if defined(_WIN64)
|
||||
#define blasabs(x) llabs(x)
|
||||
#else
|
||||
#define blasabs(x) labs(x)
|
||||
#endif
|
||||
#else
|
||||
typedef int blasint;
|
||||
#define blasabs(x) abs(x)
|
||||
#endif
|
||||
|
||||
typedef blasint integer;
|
||||
|
||||
typedef unsigned int uinteger;
|
||||
typedef char *address;
|
||||
typedef short int shortint;
|
||||
typedef float real;
|
||||
typedef double doublereal;
|
||||
typedef struct { real r, i; } complex;
|
||||
typedef struct { doublereal r, i; } doublecomplex;
|
||||
#ifdef _MSC_VER
|
||||
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
|
||||
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
|
||||
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
|
||||
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
|
||||
#else
|
||||
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
|
||||
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
|
||||
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
|
||||
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
|
||||
#endif
|
||||
#define pCf(z) (*_pCf(z))
|
||||
#define pCd(z) (*_pCd(z))
|
||||
typedef int logical;
|
||||
typedef short int shortlogical;
|
||||
typedef char logical1;
|
||||
typedef char integer1;
|
||||
|
||||
#define TRUE_ (1)
|
||||
#define FALSE_ (0)
|
||||
|
||||
/* Extern is for use with -E */
|
||||
#ifndef Extern
|
||||
#define Extern extern
|
||||
#endif
|
||||
|
||||
/* I/O stuff */
|
||||
|
||||
typedef int flag;
|
||||
typedef int ftnlen;
|
||||
typedef int ftnint;
|
||||
|
||||
/*external read, write*/
|
||||
typedef struct
|
||||
{ flag cierr;
|
||||
ftnint ciunit;
|
||||
flag ciend;
|
||||
char *cifmt;
|
||||
ftnint cirec;
|
||||
} cilist;
|
||||
|
||||
/*internal read, write*/
|
||||
typedef struct
|
||||
{ flag icierr;
|
||||
char *iciunit;
|
||||
flag iciend;
|
||||
char *icifmt;
|
||||
ftnint icirlen;
|
||||
ftnint icirnum;
|
||||
} icilist;
|
||||
|
||||
/*open*/
|
||||
typedef struct
|
||||
{ flag oerr;
|
||||
ftnint ounit;
|
||||
char *ofnm;
|
||||
ftnlen ofnmlen;
|
||||
char *osta;
|
||||
char *oacc;
|
||||
char *ofm;
|
||||
ftnint orl;
|
||||
char *oblnk;
|
||||
} olist;
|
||||
|
||||
/*close*/
|
||||
typedef struct
|
||||
{ flag cerr;
|
||||
ftnint cunit;
|
||||
char *csta;
|
||||
} cllist;
|
||||
|
||||
/*rewind, backspace, endfile*/
|
||||
typedef struct
|
||||
{ flag aerr;
|
||||
ftnint aunit;
|
||||
} alist;
|
||||
|
||||
/* inquire */
|
||||
typedef struct
|
||||
{ flag inerr;
|
||||
ftnint inunit;
|
||||
char *infile;
|
||||
ftnlen infilen;
|
||||
ftnint *inex; /*parameters in standard's order*/
|
||||
ftnint *inopen;
|
||||
ftnint *innum;
|
||||
ftnint *innamed;
|
||||
char *inname;
|
||||
ftnlen innamlen;
|
||||
char *inacc;
|
||||
ftnlen inacclen;
|
||||
char *inseq;
|
||||
ftnlen inseqlen;
|
||||
char *indir;
|
||||
ftnlen indirlen;
|
||||
char *infmt;
|
||||
ftnlen infmtlen;
|
||||
char *inform;
|
||||
ftnint informlen;
|
||||
char *inunf;
|
||||
ftnlen inunflen;
|
||||
ftnint *inrecl;
|
||||
ftnint *innrec;
|
||||
char *inblank;
|
||||
ftnlen inblanklen;
|
||||
} inlist;
|
||||
|
||||
#define VOID void
|
||||
|
||||
union Multitype { /* for multiple entry points */
|
||||
integer1 g;
|
||||
shortint h;
|
||||
integer i;
|
||||
/* longint j; */
|
||||
real r;
|
||||
doublereal d;
|
||||
complex c;
|
||||
doublecomplex z;
|
||||
};
|
||||
|
||||
typedef union Multitype Multitype;
|
||||
|
||||
struct Vardesc { /* for Namelist */
|
||||
char *name;
|
||||
char *addr;
|
||||
ftnlen *dims;
|
||||
int type;
|
||||
};
|
||||
typedef struct Vardesc Vardesc;
|
||||
|
||||
struct Namelist {
|
||||
char *name;
|
||||
Vardesc **vars;
|
||||
int nvars;
|
||||
};
|
||||
typedef struct Namelist Namelist;
|
||||
|
||||
#define abs(x) ((x) >= 0 ? (x) : -(x))
|
||||
#define dabs(x) (fabs(x))
|
||||
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
|
||||
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
|
||||
#define dmin(a,b) (f2cmin(a,b))
|
||||
#define dmax(a,b) (f2cmax(a,b))
|
||||
#define bit_test(a,b) ((a) >> (b) & 1)
|
||||
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b)))
|
||||
#define bit_set(a,b) ((a) | ((uinteger)1 << (b)))
|
||||
|
||||
#define abort_() { sig_die("Fortran abort routine called", 1); }
|
||||
#define c_abs(z) (cabsf(Cf(z)))
|
||||
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
|
||||
#ifdef _MSC_VER
|
||||
#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
|
||||
#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
|
||||
#else
|
||||
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
|
||||
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
|
||||
#endif
|
||||
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
|
||||
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
|
||||
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
|
||||
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
|
||||
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
|
||||
#define d_abs(x) (fabs(*(x)))
|
||||
#define d_acos(x) (acos(*(x)))
|
||||
#define d_asin(x) (asin(*(x)))
|
||||
#define d_atan(x) (atan(*(x)))
|
||||
#define d_atn2(x, y) (atan2(*(x),*(y)))
|
||||
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
|
||||
#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
|
||||
#define d_cos(x) (cos(*(x)))
|
||||
#define d_cosh(x) (cosh(*(x)))
|
||||
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
|
||||
#define d_exp(x) (exp(*(x)))
|
||||
#define d_imag(z) (cimag(Cd(z)))
|
||||
#define r_imag(z) (cimagf(Cf(z)))
|
||||
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
|
||||
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
|
||||
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
|
||||
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
|
||||
#define d_log(x) (log(*(x)))
|
||||
#define d_mod(x, y) (fmod(*(x), *(y)))
|
||||
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
|
||||
#define d_nint(x) u_nint(*(x))
|
||||
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
|
||||
#define d_sign(a,b) u_sign(*(a),*(b))
|
||||
#define r_sign(a,b) u_sign(*(a),*(b))
|
||||
#define d_sin(x) (sin(*(x)))
|
||||
#define d_sinh(x) (sinh(*(x)))
|
||||
#define d_sqrt(x) (sqrt(*(x)))
|
||||
#define d_tan(x) (tan(*(x)))
|
||||
#define d_tanh(x) (tanh(*(x)))
|
||||
#define i_abs(x) abs(*(x))
|
||||
#define i_dnnt(x) ((integer)u_nint(*(x)))
|
||||
#define i_len(s, n) (n)
|
||||
#define i_nint(x) ((integer)u_nint(*(x)))
|
||||
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
|
||||
#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
|
||||
#define pow_si(B,E) spow_ui(*(B),*(E))
|
||||
#define pow_ri(B,E) spow_ui(*(B),*(E))
|
||||
#define pow_di(B,E) dpow_ui(*(B),*(E))
|
||||
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
|
||||
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
|
||||
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
|
||||
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; }
|
||||
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
|
||||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
|
||||
#define sig_die(s, kill) { exit(1); }
|
||||
#define s_stop(s, n) {exit(0);}
|
||||
#define z_abs(z) (cabs(Cd(z)))
|
||||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
|
||||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
|
||||
#define myexit_() break;
|
||||
#define mycycle_() continue;
|
||||
#define myceiling_(w) {ceil(w)}
|
||||
#define myhuge_(w) {HUGE_VAL}
|
||||
#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
|
||||
|
||||
/* procedure parameter types for -A and -C++ */
|
||||
|
||||
#define F2C_proc_par_types 1
|
||||
#ifdef __cplusplus
|
||||
typedef logical (*L_fp)(...);
|
||||
#else
|
||||
typedef logical (*L_fp)();
|
||||
#endif
|
||||
|
||||
/* -- translated by f2c (version 20000121).
|
||||
You must link the resulting object file with the libraries:
|
||||
-lf2c -lm (in that order)
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
static complex c_b1 = {0.f,0.f};
|
||||
static complex c_b2 = {1.f,0.f};
|
||||
|
||||
/* > \brief \b CGELQS */
|
||||
|
||||
/* =========== DOCUMENTATION =========== */
|
||||
|
||||
/* Online html documentation available at */
|
||||
/* http://www.netlib.org/lapack/explore-html/ */
|
||||
|
||||
/* Definition: */
|
||||
/* =========== */
|
||||
|
||||
/* SUBROUTINE CGELQS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, */
|
||||
/* INFO ) */
|
||||
|
||||
/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */
|
||||
/* COMPLEX A( LDA, * ), B( LDB, * ), TAU( * ), */
|
||||
/* $ WORK( LWORK ) */
|
||||
|
||||
|
||||
/* > \par Purpose: */
|
||||
/* ============= */
|
||||
/* > */
|
||||
/* > \verbatim */
|
||||
/* > */
|
||||
/* > Compute a minimum-norm solution */
|
||||
/* > f2cmin || A*X - B || */
|
||||
/* > using the LQ factorization */
|
||||
/* > A = L*Q */
|
||||
/* > computed by CGELQF. */
|
||||
/* > \endverbatim */
|
||||
|
||||
/* Arguments: */
|
||||
/* ========== */
|
||||
|
||||
/* > \param[in] M */
|
||||
/* > \verbatim */
|
||||
/* > M is INTEGER */
|
||||
/* > The number of rows of the matrix A. M >= 0. */
|
||||
/* > \endverbatim */
|
||||
/* > */
|
||||
/* > \param[in] N */
|
||||
/* > \verbatim */
|
||||
/* > N is INTEGER */
|
||||
/* > The number of columns of the matrix A. N >= M >= 0. */
|
||||
/* > \endverbatim */
|
||||
/* > */
|
||||
/* > \param[in] NRHS */
|
||||
/* > \verbatim */
|
||||
/* > NRHS is INTEGER */
|
||||
/* > The number of columns of B. NRHS >= 0. */
|
||||
/* > \endverbatim */
|
||||
/* > */
|
||||
/* > \param[in] A */
|
||||
/* > \verbatim */
|
||||
/* > A is COMPLEX array, dimension (LDA,N) */
|
||||
/* > Details of the LQ factorization of the original matrix A as */
|
||||
/* > returned by CGELQF. */
|
||||
/* > \endverbatim */
|
||||
/* > */
|
||||
/* > \param[in] LDA */
|
||||
/* > \verbatim */
|
||||
/* > LDA is INTEGER */
|
||||
/* > The leading dimension of the array A. LDA >= M. */
|
||||
/* > \endverbatim */
|
||||
/* > */
|
||||
/* > \param[in] TAU */
|
||||
/* > \verbatim */
|
||||
/* > TAU is COMPLEX array, dimension (M) */
|
||||
/* > Details of the orthogonal matrix Q. */
|
||||
/* > \endverbatim */
|
||||
/* > */
|
||||
/* > \param[in,out] B */
|
||||
/* > \verbatim */
|
||||
/* > B is COMPLEX array, dimension (LDB,NRHS) */
|
||||
/* > On entry, the m-by-nrhs right hand side matrix B. */
|
||||
/* > On exit, the n-by-nrhs solution matrix X. */
|
||||
/* > \endverbatim */
|
||||
/* > */
|
||||
/* > \param[in] LDB */
|
||||
/* > \verbatim */
|
||||
/* > LDB is INTEGER */
|
||||
/* > The leading dimension of the array B. LDB >= N. */
|
||||
/* > \endverbatim */
|
||||
/* > */
|
||||
/* > \param[out] WORK */
|
||||
/* > \verbatim */
|
||||
/* > WORK is COMPLEX array, dimension (LWORK) */
|
||||
/* > \endverbatim */
|
||||
/* > */
|
||||
/* > \param[in] LWORK */
|
||||
/* > \verbatim */
|
||||
/* > LWORK is INTEGER */
|
||||
/* > The length of the array WORK. LWORK must be at least NRHS, */
|
||||
/* > and should be at least NRHS*NB, where NB is the block size */
|
||||
/* > for this environment. */
|
||||
/* > \endverbatim */
|
||||
/* > */
|
||||
/* > \param[out] INFO */
|
||||
/* > \verbatim */
|
||||
/* > INFO is INTEGER */
|
||||
/* > = 0: successful exit */
|
||||
/* > < 0: if INFO = -i, the i-th argument had an illegal value */
|
||||
/* > \endverbatim */
|
||||
|
||||
/* Authors: */
|
||||
/* ======== */
|
||||
|
||||
/* > \author Univ. of Tennessee */
|
||||
/* > \author Univ. of California Berkeley */
|
||||
/* > \author Univ. of Colorado Denver */
|
||||
/* > \author NAG Ltd. */
|
||||
|
||||
/* > \ingroup complex_lin */
|
||||
|
||||
/* ===================================================================== */
|
||||
/* Subroutine */ int cgelqs_(integer *m, integer *n, integer *nrhs, complex *
|
||||
a, integer *lda, complex *tau, complex *b, integer *ldb, complex *
|
||||
work, integer *lwork, integer *info)
|
||||
{
|
||||
/* System generated locals */
|
||||
integer a_dim1, a_offset, b_dim1, b_offset, i__1;
|
||||
|
||||
/* Local variables */
|
||||
extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
|
||||
integer *, integer *, complex *, complex *, integer *, complex *,
|
||||
integer *), claset_(char *,
|
||||
integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *), cunmlq_(char *, char
|
||||
*, integer *, integer *, integer *, complex *, integer *, complex
|
||||
*, complex *, integer *, complex *, integer *, integer *);
|
||||
|
||||
|
||||
/* -- LAPACK test routine -- */
|
||||
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
|
||||
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
|
||||
|
||||
|
||||
/* ===================================================================== */
|
||||
|
||||
|
||||
/* Test the input parameters. */
|
||||
|
||||
/* Parameter adjustments */
|
||||
a_dim1 = *lda;
|
||||
a_offset = 1 + a_dim1 * 1;
|
||||
a -= a_offset;
|
||||
--tau;
|
||||
b_dim1 = *ldb;
|
||||
b_offset = 1 + b_dim1 * 1;
|
||||
b -= b_offset;
|
||||
--work;
|
||||
|
||||
/* Function Body */
|
||||
*info = 0;
|
||||
if (*m < 0) {
|
||||
*info = -1;
|
||||
} else if (*n < 0 || *m > *n) {
|
||||
*info = -2;
|
||||
} else if (*nrhs < 0) {
|
||||
*info = -3;
|
||||
} else if (*lda < f2cmax(1,*m)) {
|
||||
*info = -5;
|
||||
} else if (*ldb < f2cmax(1,*n)) {
|
||||
*info = -8;
|
||||
} else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) {
|
||||
*info = -10;
|
||||
}
|
||||
if (*info != 0) {
|
||||
i__1 = -(*info);
|
||||
xerbla_("CGELQS", &i__1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Quick return if possible */
|
||||
|
||||
if (*n == 0 || *nrhs == 0 || *m == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Solve L*X = B(1:m,:) */
|
||||
|
||||
ctrsm_("Left", "Lower", "No transpose", "Non-unit", m, nrhs, &c_b2, &a[
|
||||
a_offset], lda, &b[b_offset], ldb);
|
||||
|
||||
/* Set B(m+1:n,:) to zero */
|
||||
|
||||
if (*m < *n) {
|
||||
i__1 = *n - *m;
|
||||
claset_("Full", &i__1, nrhs, &c_b1, &c_b1, &b[*m + 1 + b_dim1], ldb);
|
||||
}
|
||||
|
||||
/* B := Q' * B */
|
||||
|
||||
cunmlq_("Left", "Conjugate transpose", n, nrhs, m, &a[a_offset], lda, &
|
||||
tau[1], &b[b_offset], ldb, &work[1], lwork, info);
|
||||
|
||||
return 0;
|
||||
|
||||
/* End of CGELQS */
|
||||
|
||||
} /* cgelqs_ */
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue