Merge pull request #4316 from OpenMathLib/develop

Merge develop into release-0.3.0 for 0.3.25
This commit is contained in:
Martin Kroeker 2023-11-12 22:55:00 +01:00 committed by GitHub
commit 5e1a429eab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
491 changed files with 14375 additions and 39847 deletions

View File

@ -148,6 +148,16 @@ FreeBSD_task:
- ls -l /usr/local/lib
- gmake CC=gcc INTERFACE64=1
FreeBSD_task:
name: FreeBSD-clang-openmp
freebsd_instance:
image_family: freebsd-13-2
install_script:
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
- ln -s /usr/local/lib/gcc12/libgfortran.so.5.0.0 /usr/lib/libgfortran.so
compile_script:
- gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1
#task:
# name: Windows/LLVM16 --- too slow ---
# windows_container:

16
.cirun.yml Normal file
View File

@ -0,0 +1,16 @@
# Self-Hosted Github Action Runners on AWS via Cirun.io
# Reference: https://docs.cirun.io/reference/yaml
runners:
- name: "aws-runner-graviton"
# Cloud Provider: AWS
cloud: "aws"
region: "us-east-1"
# Cheapest VM on AWS
instance_type: "c7g.large"
# Ubuntu-22.04, ami image
machine_image: "ami-0a0c8eebcdd6dcbd0"
preemptible: false
# Add this label in the "runs-on" param in .github/workflows/<workflow-name>.yml
# So that this runner is created for running the workflow
labels:
- "cirun-aws-runner-graviton"

139
.github/workflows/arm64_graviton.yml vendored Normal file
View File

@ -0,0 +1,139 @@
name: arm64 graviton cirun
on:
push:
branches:
- develop
- release-**
pull_request:
branches:
- develop
- release-**
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}"
strategy:
fail-fast: false
matrix:
fortran: [gfortran]
build: [cmake, make]
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Print system information
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
cat /proc/cpuinfo
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
- name: Install Dependencies
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt update
sudo apt-get install -y gfortran cmake ccache libtinfo5
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
# GNU make and cmake call the compilers differently. It looks like
# that causes the cache to mismatch. Keep the ccache for both build
# tools separate to avoid polluting each other.
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
ccache-${{ runner.os }}-${{ matrix.build }}
- name: Configure ccache
run: |
if [ "${{ matrix.build }}" = "make" ]; then
# Add ccache to path
if [ "$RUNNER_OS" = "Linux" ]; then
echo "/usr/lib/ccache" >> $GITHUB_PATH
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
fi
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Build OpenBLAS
run: |
case "${{ matrix.build }}" in
"make")
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
;;
"cmake")
mkdir build && cd build
cmake -DDYNAMIC_ARCH=1 \
-DNOFORTRAN=0 \
-DBUILD_WITHOUT_LAPACK=0 \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..
cmake --build .
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac
- name: Show ccache status
continue-on-error: true
run: ccache -s
- name: Run tests
timeout-minutes: 60
run: |
case "${{ matrix.build }}" in
"make")
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
echo "::group::Tests in 'test' directory"
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'ctest' directory"
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'utest' directory"
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
;;
"cmake")
cd build && ctest
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac

View File

@ -2,11 +2,16 @@ name: c910v qemu test
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
env:
xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1663142514282

View File

@ -2,11 +2,16 @@ name: continuous build
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ${{ matrix.os }}
strategy:
@ -146,18 +151,19 @@ jobs:
msys2:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: windows-latest
strategy:
fail-fast: false
matrix:
msystem: [MINGW64, MINGW32, CLANG64, CLANG32]
msystem: [UCRT64, MINGW32, CLANG64, CLANG32]
idx: [int32, int64]
build-type: [Release]
include:
- msystem: MINGW64
- msystem: UCRT64
idx: int32
target-prefix: mingw-w64-x86_64
target-prefix: mingw-w64-ucrt-x86_64
fc-pkg: fc
- msystem: MINGW32
idx: int32
@ -175,10 +181,10 @@ jobs:
target-prefix: mingw-w64-clang-i686
fc-pkg: cc
c-lapack-flags: -DC_LAPACK=ON
- msystem: MINGW64
- msystem: UCRT64
idx: int64
idx64-flags: -DBINARY=64 -DINTERFACE64=1
target-prefix: mingw-w64-x86_64
target-prefix: mingw-w64-ucrt-x86_64
fc-pkg: fc
- msystem: CLANG64
idx: int64
@ -188,9 +194,9 @@ jobs:
# Compiling with Flang 16 seems to cause test errors on machines
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
no-avx512-flags: -DNO_AVX512=1
- msystem: MINGW64
- msystem: UCRT64
idx: int32
target-prefix: mingw-w64-x86_64
target-prefix: mingw-w64-ucrt-x86_64
fc-pkg: fc
build-type: None
exclude:
@ -312,6 +318,7 @@ jobs:
cross_build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-22.04
strategy:

View File

@ -2,8 +2,13 @@ name: loongarch64 qemu test
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
strategy:
fail-fast: false
@ -18,6 +23,9 @@ jobs:
- target: LOONGSON2K1000
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 TARGET=LOONGSON2K1000
- target: DYNAMIC_ARCH
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
steps:
- name: Checkout repository

View File

@ -2,11 +2,16 @@ name: mips64 qemu test
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
strategy:
fail-fast: false

View File

@ -18,11 +18,16 @@ on:
name: Nightly-Homebrew-Build
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
build-OpenBLAS-with-Homebrew:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: macos-latest
env:
DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer

View File

@ -1,4 +1,50 @@
OpenBLAS ChangeLog
====================================================================
Version 0.3.25
12-Nov-2023
general:
- improved the error message shown on exceeding the maximum thread count
- improved the code to add supplementary thread buffers in case of overflow
- fixed a potential division by zero in ?ROTG
- improved the ?MATCOPY functions to accept zero-sized rows or columns
- corrected empty prototypes in function declarations
- cleaned up unused declarations in the f2c-converted versions of the LAPACK sources
- fixed compilation with the Cray CCE Compiler suite
- improved link line rewriting to avoid mixed libgomp/libomp builds with clang&gfortran
- worked around OPENMP builds with LLVM14's libomp hanging on FreeBSD
- improved the Makefiles to require less option duplication on "make install"
- imported the following changes from the upcoming release 3.12 of Reference-LAPACK
- deprecate utility functions ?GELQS and ?GEQRS (LAPACK PR 900)
- apply rounding up to workspace calculations done in floating point (LAPACK PR 904)
- avoid overflow in STGEX2/DTGEX2 (LAPACK PR 907)
- fix accumulation in ?LASSQ (LAPACK PR 909)
- fix handling of NaN values in ?GECON (LAPACK PR 926)
- avoid overflow in CBDSQR/ZBDSQR (LAPACK PR 927)
- fix poor vector orthogonalizations in ?ORBDB5/?UNBDB5 (LAPACK PR 928 & 930)
x86-64:
- fixed compile-time autodetection of AMD Ryzen3 and Ryzen4 cpus
- fixed capability-based fallback selection for unknown cpus in DYNAMIC_ARCH
- added AVX512 optimizations for ?ASUM on Sapphire Rapids and Cooper Lake
ARM64:
- fixed building on Apple with homebrew gcc
- fixed building with XCODE 15
- fixed building on A64FX and Cortex A710/X1/X2
- increased the default buffer size for recent ARM server cpus
POWER:
- fixed building with the IBM xlf 16.1.1 compiler
- fixed building with IBM XL C
- added support for DYNAMIC_ARCH builds with clang
- fixed union declaration in the BFLOAT16 test case
- enable optimizations for the AIX assembler on POWER10
LOONGARCH64:
- added an optimized SGEMV kernel
- added an optimized DTRSM kernel
====================================================================
Version 0.3.24
03-Sep-2023

View File

@ -35,7 +35,11 @@ export NO_LAPACK
export C_LAPACK
endif
ifeq ($(F_COMPILER),CRAY)
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -Og -Os,$(LAPACK_FFLAGS))
else
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS))
endif
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test
@ -206,9 +210,25 @@ ifeq ($(DYNAMIC_OLDER), 1)
@echo DYNAMIC_OLDER=1 >> Makefile.conf_last
endif
endif
@echo TARGET=$(CORE) >> Makefile.conf_last
ifdef USE_THREAD
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
endif
ifdef SMP
ifdef NUM_THREADS
@echo NUM_THREADS=$(NUM_THREADS) >> Makefile.conf_last
else
@echo NUM_THREADS=$(NUM_CORES) >> Makefile.conf_last
endif
endif
ifeq ($(USE_OPENMP),1)
@echo USE_OPENMP=1 >> Makefile.conf_last
endif
ifeq ($(INTERFACE64),1)
@echo INTERFACE64=1 >> Makefile.conf_last
endif
@echo THELIBNAME=$(LIBNAME) >> Makefile.conf_last
@echo THELIBSONAME=$(LIBSONAME) >> Makefile.conf_last
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
@touch lib.grd

File diff suppressed because it is too large Load Diff

View File

@ -3,6 +3,14 @@ export GOTOBLAS_MAKEFILE = 1
-include $(TOPDIR)/Makefile.conf_last
include ./Makefile.system
ifdef THELIBNAME
LIBNAME=$(THELIBNAME)
LIBSONAME=$(THELIBSONAME)
endif
ifeq ($(INTERFACE64),1)
USE_64BITINT=1
endif
PREFIX ?= /opt/OpenBLAS
OPENBLAS_INCLUDE_DIR := $(PREFIX)/include

View File

@ -13,9 +13,9 @@ ifeq ($(CORE), POWER10)
ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
endif
endif
endif
@ -38,19 +38,18 @@ CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
endif
ifneq ($(F_COMPILER), PGI)
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr9 -qtune=pwr9 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O2 -frecursive -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -fno-fast-math -mcpu=power9 -mtune=power9
endif
ifeq ($(C_COMPILER), GCC)
ifeq ($(F_COMPILER), GFORTRAN)
ifneq ($(GCCVERSIONGT4), 1)
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
FCOMMON_OPT += -mcpu=power8 -mtune=power8
else
FCOMMON_OPT += -mcpu=power9 -mtune=power9
endif
else
FCOMMON_OPT += -mcpu=power9 -mtune=power9
endif
else
FCOMMON_OPT += -O2 -Mrecursive
@ -66,12 +65,16 @@ endif
ifneq ($(F_COMPILER), PGI)
ifeq ($(OSNAME), AIX)
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
endif
else
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
endif
endif
else
FCOMMON_OPT += -O2 -Mrecursive
@ -84,13 +87,20 @@ CCOMMON_OPT += -DUSE_OPENMP -fopenmp
else
CCOMMON_OPT += -DUSE_OPENMP -mp
endif
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -DUSE_OPENMP
else
ifneq ($(F_COMPILER), PGI)
FCOMMON_OPT += -DUSE_OPENMP -fopenmp
else
FCOMMON_OPT += -DUSE_OPENMP -mp
endif
endif
endif
ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT += -fno-integrated-as
endif
# workaround for C->FORTRAN ABI violation in LAPACKE
ifeq ($(F_COMPILER), GFORTRAN)
FCOMMON_OPT += -fno-optimize-sibling-calls
@ -125,8 +135,19 @@ endif
ifdef BINARY64
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), GCCIBMAIX)
$(error Using GCC and XLF on AIX is not a supported combination.)
endif
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), CLANGGFORTRANAIX)
$(error Using Clang and gFortran on AIX is not a supported combination.)
endif
ifeq ($(OSNAME), AIX)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mpowerpc64 -maix64
else
CCOMMON_OPT += -m64
endif
ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mpowerpc64 -maix64
endif

View File

@ -277,10 +277,6 @@ endif
ifndef GOTOBLAS_MAKEFILE
export GOTOBLAS_MAKEFILE = 1
# Determine if the assembler is GNU Assembler
HAVE_GAS := $(shell $(AS) -v < /dev/null 2>&1 | grep GNU 2>&1 >/dev/null ; echo $$?)
GETARCH_FLAGS += -DHAVE_GAS=$(HAVE_GAS)
# Generating Makefile.conf and config.h
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" HOST_CFLAGS="$(GETARCH_FLAGS)" CFLAGS="$(CFLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) DYNAMIC_ARCH=$(DYNAMIC_ARCH) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
@ -397,11 +393,21 @@ ifeq ($(OSNAME), Darwin)
ifndef MACOSX_DEPLOYMENT_TARGET
ifeq ($(ARCH), arm64)
export MACOSX_DEPLOYMENT_TARGET=11.0
ifeq ($(C_COMPILER), GCC)
export NO_SVE = 1
endif
else
export MACOSX_DEPLOYMENT_TARGET=10.8
endif
endif
MD5SUM = md5 -r
XCVER = $(shell pkgutil --pkg-info=com.apple.pkg.Xcode |awk '/version:/ {print $2}'|cut -d: -f2|cut -f1 -d.)
ifeq (x$(XCVER)x,xx)
XCVER = $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables |awk '/version:/ {print $2}'|cut -d: -f2|cut -f1 -d.)
endif
ifeq (x$(XCVER), x 15)
CCOMMON_OPT += -Wl,-ld_classic
endif
endif
ifneq (,$(findstring $(OSNAME), FreeBSD OpenBSD DragonFly))
@ -602,6 +608,9 @@ endif
ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT += -fopenmp
ifeq ($(F_COMPILER), GFORTRAN)
FEXTRALIB := $(subst -lgomp,-lomp,$(FEXTRALIB))
endif
endif
ifeq ($(C_COMPILER), INTEL)
@ -750,7 +759,11 @@ DYNAMIC_CORE += POWER9
else
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
endif
ifeq ($(OSNAME), AIX)
LDVERSIONGTEQ35 := 1
else
LDVERSIONGTEQ35 := $(shell expr `$(CC) -Wl,--version 2> /dev/null | head -1 | cut -f2 -d "." | cut -f1 -d "-"` \>= 35)
endif
ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11)
DYNAMIC_CORE += POWER10
CCOMMON_OPT += -DHAVE_P10_SUPPORT
@ -1164,6 +1177,10 @@ endif
ifeq ($(F_COMPILER), IBM)
CCOMMON_OPT += -DF_INTERFACE_IBM
FEXTRALIB += -lxlf90
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG))
FCOMMON_OPT += -qextname
endif
# FCOMMON_OPT += -qarch=440
ifdef BINARY64
FCOMMON_OPT += -q64
@ -1360,6 +1377,8 @@ ifeq ($(F_COMPILER), SUN)
FCOMMON_OPT += -pic
else ifeq ($(F_COMPILER), NAG)
FCOMMON_OPT += -PIC
else ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -qpic=large
else
FCOMMON_OPT += -fPIC
endif
@ -1612,9 +1631,11 @@ override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
ifeq ($(NEED_PIC), 1)
ifeq (,$(findstring PIC,$(FFLAGS)))
ifneq ($(F_COMPILER),IBM)
override FFLAGS += -fPIC
endif
endif
endif
#For LAPACK Fortran codes.
#Disable -fopenmp for LAPACK Fortran codes on Windows.
@ -1628,11 +1649,11 @@ endif
ifeq ($(F_COMPILER),NAG)
LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
override FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
endif
ifeq ($(F_COMPILER),CRAY)
LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
override FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
endif
LAPACK_CFLAGS = $(CFLAGS)

View File

@ -54,10 +54,15 @@ Building OpenBLAS requires the following to be installed:
Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically.
To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`.
The full target list is in the file `TargetList.txt`. For building with `cmake`, the
usual conventions apply, i.e. create a build directory either underneath the toplevel
OpenBLAS source directory or separate from it, and invoke `cmake` there with the path
to the source tree and any build options you plan to set.
The full target list is in the file `TargetList.txt`, other build optionss are documented in Makefile.rule and
can either be set there (typically by removing the comment character from the respective line), or used on the
`make` command line.
Note that when you run `make install` after building, you need to repeat all command line options you provided to `make`
in the build step, as some settings like the supported maximum number of threads are automatically derived from the
build host by default, which might not be what you want.
For building with `cmake`, the usual conventions apply, i.e. create a build directory either underneath the toplevel
OpenBLAS source directory or separate from it, and invoke `cmake` there with the path to the source tree and any
build options you plan to set.
### Cross compile
@ -117,7 +122,7 @@ Use `PREFIX=` when invoking `make`, for example
```sh
make install PREFIX=your_installation_directory
```
(along with all options you added on the `make` command line in the preceding build step)
The default installation directory is `/opt/OpenBLAS`.
## Supported CPUs and Operating Systems
@ -137,7 +142,7 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
- **AMD ZEN**: Uses Haswell codes with some optimizations.
- **AMD ZEN**: Uses Haswell codes with some optimizations for Zen 2/3 (use SkylakeX for Zen4)
#### MIPS32
@ -169,13 +174,16 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
- **TSV110**: Optimized some Level-3 helper functions
- **EMAG 8180**: preliminary support based on A57
- **Neoverse N1**: (AWS Graviton2) preliminary support
- **Apple Vortex**: preliminary support based on ARMV8
- **Neoverse V1**: (AWS Graviton3) optimized Level-3 BLAS
- **Apple Vortex**: preliminary support based on ThunderX2/3
- **A64FX**: preliminary support, optimized Level-3 BLAS
- **ARMV8SVE**: any ARMV8 cpu with SVE extensions
#### PPC/PPC64
- **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1`
- **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only.
- **POWER10**:
- **POWER10**: Optimized Level-3 BLAS including SBGEMM and some Level-1,2.
#### IBM zEnterprise System

View File

@ -167,11 +167,10 @@ jobs:
- job: OSX_OpenMP_Clang
pool:
vmImage: 'macOS-11'
vmImage: 'macOS-latest'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib
MACOSX_DEPLOYMENT_TARGET: 11.0
steps:
- script: |
brew update
@ -180,7 +179,7 @@ jobs:
- job: OSX_OpenMP_Clang_cmake
pool:
vmImage: 'macOS-11'
vmImage: 'macOS-latest'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib
@ -210,7 +209,7 @@ jobs:
- job: OSX_Ifort_Clang
pool:
vmImage: 'macOS-11'
vmImage: 'macOS-latest'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/17643/m_HPCKit_p_2021.2.0.2903_offline.dmg

18
c_check
View File

@ -96,11 +96,19 @@ esac
defined=0
if [ "$os" = "AIX" ]; then
case "$BINARY" in
32) compiler_name="$compiler_name -maix32" ;;
64) compiler_name="$compiler_name -maix64" ;;
esac
defined=1
if [ "$compiler" = "GCC" ]; then
case "$BINARY" in
32) compiler_name="$compiler_name -maix32" ;;
64) compiler_name="$compiler_name -maix64" ;;
esac
defined=1
else
case "$BINARY" in
32) compiler_name="$compiler_name -m32" ;;
64) compiler_name="$compiler_name -m64" ;;
esac
defined=1
fi
fi
case "$architecture" in

View File

@ -438,15 +438,19 @@ endif()
if(BUILD_LAPACK_DEPRECATED)
list(APPEND SLASRC DEPRECATED/sgegs.f DEPRECATED/sgegv.f
DEPRECATED/sgelqs.f DEPRECATED/sgeqrs.f
DEPRECATED/sgeqpf.f DEPRECATED/sgelsx.f DEPRECATED/sggsvd.f
DEPRECATED/sggsvp.f DEPRECATED/slahrd.f DEPRECATED/slatzm.f DEPRECATED/stzrqf.f)
list(APPEND DLASRC DEPRECATED/dgegs.f DEPRECATED/dgegv.f
DEPRECATED/dgelqs.f DEPRECATED/dgeqrs.f
DEPRECATED/dgeqpf.f DEPRECATED/dgelsx.f DEPRECATED/dggsvd.f
DEPRECATED/dggsvp.f DEPRECATED/dlahrd.f DEPRECATED/dlatzm.f DEPRECATED/dtzrqf.f)
list(APPEND CLASRC DEPRECATED/cgegs.f DEPRECATED/cgegv.f
DEPRECATED/cgelqs.f DEPRECATED/cgeqrs.f
DEPRECATED/cgeqpf.f DEPRECATED/cgelsx.f DEPRECATED/cggsvd.f
DEPRECATED/cggsvp.f DEPRECATED/clahrd.f DEPRECATED/clatzm.f DEPRECATED/ctzrqf.f)
list(APPEND ZLASRC DEPRECATED/zgegs.f DEPRECATED/zgegv.f
DEPRECATED/zgelqs.f DEPRECATED/zgeqrs.f
DEPRECATED/zgeqpf.f DEPRECATED/zgelsx.f DEPRECATED/zggsvd.f
DEPRECATED/zggsvp.f DEPRECATED/zlahrd.f DEPRECATED/zlatzm.f DEPRECATED/ztzrqf.f)
message(STATUS "Building deprecated routines")
@ -935,15 +939,19 @@ endif()
if(BUILD_LAPACK_DEPRECATED)
list(APPEND SLASRC DEPRECATED/sgegs.c DEPRECATED/sgegv.c
DEPRECATED/sgelqs.c DEPRECATED/sgeqrs.c
DEPRECATED/sgeqpf.c DEPRECATED/sgelsx.c DEPRECATED/sggsvd.c
DEPRECATED/sggsvp.c DEPRECATED/slahrd.c DEPRECATED/slatzm.c DEPRECATED/stzrqf.c)
list(APPEND DLASRC DEPRECATED/dgegs.c DEPRECATED/dgegv.c
DEPRECATED/dgelqs.c DEPRECATED/dgeqrs.c
DEPRECATED/dgeqpf.c DEPRECATED/dgelsx.c DEPRECATED/dggsvd.c
DEPRECATED/dggsvp.c DEPRECATED/dlahrd.c DEPRECATED/dlatzm.c DEPRECATED/dtzrqf.c)
list(APPEND CLASRC DEPRECATED/cgegs.c DEPRECATED/cgegv.c
DEPRECATED/cgelqs.c DEPRECATED/cgeqrs.c
DEPRECATED/cgeqpf.c DEPRECATED/cgelsx.c DEPRECATED/cggsvd.c
DEPRECATED/cggsvp.c DEPRECATED/clahrd.c DEPRECATED/clatzm.c DEPRECATED/ctzrqf.c)
list(APPEND ZLASRC DEPRECATED/zgegs.c DEPRECATED/zgegv.c
DEPRECATED/zgelqs.c DEPRECATED/zgeqrs.c
DEPRECATED/zgeqpf.c DEPRECATED/zgelsx.c DEPRECATED/zggsvd.c
DEPRECATED/zggsvp.c DEPRECATED/zlahrd.c DEPRECATED/zlatzm.c DEPRECATED/ztzrqf.c)
message(STATUS "Building deprecated routines")

View File

@ -162,7 +162,11 @@ REALNAME:
#define HUGE_PAGESIZE ( 4 << 20)
#ifndef BUFFERSIZE
#if defined(NEOVERSEN1) || defined(NEOVERSEN2) || defined(NEOVERSEV1) || defined(A64FX) || defined(ARMV8SVE)
#define BUFFER_SIZE (32 << 22)
#else
#define BUFFER_SIZE (32 << 20)
#endif
#else
#define BUFFER_SIZE (32 << BUFFERSIZE)
#endif

View File

@ -192,27 +192,27 @@ int exec_blas(BLASLONG num_cpu, blas_param_t *param, void *buffer);
int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
void *a, BLASLONG lda,
void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int threads);
void *c, BLASLONG ldc, int (*function)(void), int threads);
int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *, void *, BLASLONG);
int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT*, FLOAT*, BLASLONG), void *, void *, BLASLONG);
int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *, void *, BLASLONG);
int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG, BLASLONG);
int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *, void *, BLASLONG, BLASLONG);
int trsm_thread(int mode, BLASLONG m, BLASLONG n,
double alpha_r, double alpha_i,
void *a, BLASLONG lda,
void *c, BLASLONG ldc, int (*function)(), void *buffer);
void *c, BLASLONG ldc, int (*function)(void), void *buffer);
int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*, FLOAT *, FLOAT *, BLASLONG), void*, void*, BLASLONG);
int getrf_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k,
void *offsetA, BLASLONG lda,
void *offsetB, BLASLONG jb,
void *ipiv, BLASLONG offset, int (*function)(), void *buffer);
void *ipiv, BLASLONG offset, int (*function)(void), void *buffer);
#endif /* ENDIF ASSEMBLER */

View File

@ -270,6 +270,7 @@ int detect(void)
sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0);
if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1
if (value64 == 3660830781) return CPU_VORTEX; //A15/M2
if (value64 == 2271604202) return CPU_VORTEX; //A16/M3
#endif
return CPU_ARMV8;
#endif

View File

@ -194,7 +194,7 @@ static C_INLINE void xgetbv(int op, int * eax, int * edx){
}
#endif
int support_avx(){
int support_avx(void){
#ifndef NO_AVX
int eax, ebx, ecx, edx;
int ret=0;
@ -212,7 +212,7 @@ int support_avx(){
#endif
}
int support_avx2(){
int support_avx2(void){
#ifndef NO_AVX2
int eax, ebx, ecx=0, edx;
int ret=0;
@ -228,7 +228,7 @@ int support_avx2(){
#endif
}
int support_avx512(){
int support_avx512(void){
#if !defined(NO_AVX) && !defined(NO_AVX512)
int eax, ebx, ecx, edx;
int ret=0;
@ -250,7 +250,7 @@ int support_avx512(){
#endif
}
int support_avx512_bf16(){
int support_avx512_bf16(void){
#if !defined(NO_AVX) && !defined(NO_AVX512)
int eax, ebx, ecx, edx;
int ret=0;
@ -271,7 +271,7 @@ int support_avx512_bf16(){
#define BIT_AMX_BF16 0x00400000
#define BIT_AMX_ENBD 0x00060000
int support_amx_bf16() {
int support_amx_bf16(void) {
#if !defined(NO_AVX) && !defined(NO_AVX512)
int eax, ebx, ecx, edx;
int ret=0;
@ -1660,7 +1660,13 @@ int get_cpuname(void){
else
return CPUTYPE_BARCELONA;
}
case 10: // Zen3
case 10: // Zen3/4
#ifndef NO_AVX512
if(support_avx512_bf16())
return CPUTYPE_COOPERLAKE;
if(support_avx512())
return CPUTYPE_SKYLAKEX;
#endif
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_ZEN;
@ -2438,6 +2444,12 @@ int get_coretype(void){
// Ryzen 2
default:
// Matisse,Renoir Ryzen2 models
#ifndef NO_AVX512
if(support_avx512_bf16())
return CORE_COOPERLAKE;
if(support_avx512())
return CORE_SKYLAKEX;
#endif
if(support_avx())
#ifndef NO_AVX2
return CORE_ZEN;

View File

@ -214,6 +214,11 @@ endif
ifeq ($(F_COMPILER), NAG)
CEXTRALIB = -lgomp
endif
ifeq ($(F_COMPILER), IBM)
ifeq ($(C_COMPILER), GCC)
CEXTRALIB += -lgomp
endif
endif
endif
ifeq ($(BUILD_SINGLE),1)

View File

@ -242,251 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
#if 0
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
#endif
/* Common Block Declarations */
@ -503,16 +258,16 @@ static integer c__1 = 1;
static integer c__5 = 5;
static real c_b43 = (float)1.;
/* Main program */ int main()
/* Main program */ int main(void)
{
/* Initialized data */
static real sfac = (float)9.765625e-4;
/* Local variables */
extern /* Subroutine */ int check1_(), check2_();
extern /* Subroutine */ int check1_(real*), check2_(real*);
static integer ic;
extern /* Subroutine */ int header_();
extern /* Subroutine */ int header_(void);
/* Test program for the COMPLEX Level 1 CBLAS. */
/* Based upon the original CBLAS test routine together with: */
@ -553,7 +308,7 @@ static real c_b43 = (float)1.;
} /* MAIN__ */
/* Subroutine */ int header_()
/* Subroutine */ int header_(void)
{
/* Initialized data */
@ -564,7 +319,7 @@ static real c_b43 = (float)1.;
/* Format strings */
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
integer s_wsfe(void), do_fio(void), e_wsfe(void);
/* .. Parameters .. */
/* .. Scalars in Common .. */
@ -577,8 +332,7 @@ static real c_b43 = (float)1.;
} /* header_ */
/* Subroutine */ int check1_(sfac)
real *sfac;
/* Subroutine */ int check1_(real* sfac)
{
/* Initialized data */
@ -683,15 +437,15 @@ real *sfac;
/* Local variables */
static integer i__;
extern /* Subroutine */ int ctest_();
extern /* Subroutine */ int ctest_(integer*, complex*, complex*, complex*, real*);
static complex mwpcs[5], mwpct[5];
extern /* Subroutine */ int itest1_(), stest1_();
extern /* Subroutine */ int itest1_(integer*, integer*), stest1_(real*,real*,real*,real*);
static complex cx[8];
extern real scnrm2test_();
extern real scnrm2test_(integer*, complex*, integer*);
static integer np1;
extern integer icamaxtest_();
extern /* Subroutine */ int csscaltest_();
extern real scasumtest_();
extern integer icamaxtest_(integer*, complex*, integer*);
extern /* Subroutine */ int csscaltest_(integer*, real*, complex*, integer*);
extern real scasumtest_(integer*, complex*, integer*);
static integer len;
/* .. Parameters .. */
@ -808,8 +562,7 @@ real *sfac;
return 0;
} /* check1_ */
/* Subroutine */ int check2_(sfac)
real *sfac;
/* Subroutine */ int check2_(real* sfac)
{
/* Initialized data */
@ -981,10 +734,10 @@ real *sfac;
static complex cdot[1];
static integer lenx, leny, i__;
static complex ctemp;
extern /* Subroutine */ int ctest_();
extern /* Subroutine */ int ctest_(integer*, complex*, complex*, complex*, real*);
static integer ksize;
extern /* Subroutine */ int cdotctest_(), ccopytest_(), cdotutest_(),
cswaptest_(), caxpytest_();
extern /* Subroutine */ int cdotctest_(integer*, complex*, integer*, complex*, integer*,complex*), ccopytest_(integer*, complex*, integer*, complex*, integer*), cdotutest_(integer*, complex*, integer*, complex*, integer*, complex*),
cswaptest_(integer*, complex*, integer*, complex*, integer*), caxpytest_(integer*, complex*, complex*, integer*, complex*, integer*);
static integer ki, kn;
static complex cx[7], cy[7];
static integer mx, my;
@ -1067,9 +820,7 @@ real *sfac;
return 0;
} /* check2_ */
/* Subroutine */ int stest_(len, scomp, strue, ssize, sfac)
integer *len;
real *scomp, *strue, *ssize, *sfac;
/* Subroutine */ int stest_(integer* len, real* scomp, real* strue, real* ssize,real* sfac)
{
/* System generated locals */
integer i__1;
@ -1077,7 +828,7 @@ real *scomp, *strue, *ssize, *sfac;
/* Local variables */
static integer i__;
extern doublereal sdiff_();
extern doublereal sdiff_(real*, real*);
static real sd;
/* ********************************* STEST ************************** */
@ -1133,11 +884,10 @@ L40:
} /* stest_ */
/* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac)
real *scomp1, *strue1, *ssize, *sfac;
/* Subroutine */ int stest1_(real* scomp1, real* strue1, real* ssize, real* sfac)
{
static real scomp[1], strue[1];
extern /* Subroutine */ int stest_();
extern /* Subroutine */ int stest_(integer*, real*, real*, real*, real*);
/* ************************* STEST1 ***************************** */
@ -1164,8 +914,7 @@ real *scomp1, *strue1, *ssize, *sfac;
return 0;
} /* stest1_ */
doublereal sdiff_(sa, sb)
real *sa, *sb;
doublereal sdiff_(real* sa, real* sb)
{
/* System generated locals */
real ret_val;
@ -1179,10 +928,7 @@ real *sa, *sb;
return ret_val;
} /* sdiff_ */
/* Subroutine */ int ctest_(len, ccomp, ctrue, csize, sfac)
integer *len;
complex *ccomp, *ctrue, *csize;
real *sfac;
/* Subroutine */ int ctest_(integer* len, complex* ccomp, complex* ctrue, complex* csize, real* sfac)
{
/* System generated locals */
integer i__1, i__2;
@ -1193,7 +939,7 @@ real *sfac;
/* Local variables */
static integer i__;
static real scomp[20], ssize[20], strue[20];
extern /* Subroutine */ int stest_();
extern /* Subroutine */ int stest_(integer*, real*,real*,real*,real*);
/* **************************** CTEST ***************************** */
@ -1231,8 +977,7 @@ real *sfac;
return 0;
} /* ctest_ */
/* Subroutine */ int itest1_(icomp, itrue)
integer *icomp, *itrue;
/* Subroutine */ int itest1_(integer* icomp, integer* itrue)
{
/* Local variables */
static integer id;

View File

@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Common Block Declarations */
@ -396,7 +273,7 @@ static integer c_n1 = -1;
static integer c__0 = 0;
static logical c_false = FALSE_;
/* Main program */ int main()
/* Main program */ int main(void)
{
/* Initialized data */
@ -414,17 +291,21 @@ static logical c_false = FALSE_;
static logical same;
static integer ninc, nbet, ntra;
static logical rewi;
extern /* Subroutine */ int cchk1_(), cchk2_(), cchk3_(), cchk4_(),
cchk5_(), cchk6_();
extern /* Subroutine */ int cchk1_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, integer*, ftnlen);
extern /* Subroutine */ int cchk2_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, integer*, ftnlen);
extern /* Subroutine */ int cchk3_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen);
extern /* Subroutine */ int cchk4_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen);
extern /* Subroutine */ int cchk5_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen);
extern /* Subroutine */ int cchk6_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen);
static complex a[4225] /* was [65][65] */;
static real g[65];
static integer i__, j, n;
static logical fatal;
static complex x[65], y[65], z__[130];
extern doublereal sdiff_();
extern doublereal sdiff_(real*, real*);
static logical trace;
static integer nidim;
extern /* Subroutine */ int cmvch_();
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
static char snaps[32], trans[1];
static integer isnum;
static logical ltest[17];
@ -438,11 +319,11 @@ static logical c_false = FALSE_;
static char snamet[12];
static real thresh;
static logical rorder;
extern /* Subroutine */ int cc2chke_();
extern /* Subroutine */ void cc2chke_(char*, ftnlen);
static integer layout;
static logical ltestt, tsterr;
static complex alf[7];
extern logical lce_();
extern logical lce_(complex*, complex*, integer*);
static integer inc[7], nkb;
static complex bet[7];
static real eps, err;
@ -983,22 +864,7 @@ L240:
} /* MAIN__ */
/* Subroutine */ int cchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
complex *alf;
integer *nbet;
complex *bet;
integer *ninc, *inc, *nmax, *incmax;
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
real *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int cchk1_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, complex* alf, integer* nbet, complex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1015,10 +881,10 @@ ftnlen sname_len;
static integer incx, incy;
static logical full, tran, null;
static integer i__, m, n;
extern /* Subroutine */ int cmake_();
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
static complex alpha;
static logical isame[13];
extern /* Subroutine */ int cmvch_();
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static integer incxs, incys;
@ -1026,14 +892,15 @@ ftnlen sname_len;
static integer ia, ib, ic;
static logical banded;
static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns;
extern /* Subroutine */ int ccgbmv_(), ccgemv_();
extern logical lceres_();
extern /* Subroutine */ int ccgbmv_(integer*, char*, integer*, integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen);
extern /* Subroutine */ void ccgemv_(integer*, char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen);
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
static char ctrans[14];
static real errmax;
static complex transl;
static char transs[1];
static integer laa, lda;
extern logical lce_();
extern logical lce_(complex*, complex*, integer*);
static complex als, bls;
static real err;
static integer iku, kls, kus;
@ -1448,22 +1315,7 @@ L140:
} /* cchk1_ */
/* Subroutine */ int cchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
complex *alf;
integer *nbet;
complex *bet;
integer *ninc, *inc, *nmax, *incmax;
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
real *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int cchk2_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, complex* alf, integer* nbet, complex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1481,10 +1333,10 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1];
static integer i__, k, n;
extern /* Subroutine */ int cmake_();
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
static complex alpha;
static logical isame[13];
extern /* Subroutine */ int cmvch_();
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
@ -1495,13 +1347,14 @@ ftnlen sname_len;
static integer nc, ik, in;
static logical packed;
static integer nk, ks, ix, iy, ns, lx, ly;
extern /* Subroutine */ int cchbmv_(), cchemv_();
extern logical lceres_();
extern /* Subroutine */ int cchpmv_();
extern /* Subroutine */ void cchbmv_(integer*, char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen);
extern /* Subroutine */ void cchemv_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen);
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cchpmv_(integer*, char*, integer*, complex*, complex*, complex*, integer*, complex*, complex*, integer*, ftnlen);
static real errmax;
static complex transl;
static integer laa, lda;
extern logical lce_();
extern logical lce_(complex*, complex*, integer*);
static complex als, bls;
static real err;
@ -1906,19 +1759,7 @@ L130:
} /* cchk2_ */
/* Subroutine */ int cchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, xt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax;
complex *a, *aa, *as, *x, *xx, *xs, *xt;
real *g;
complex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int cchk3_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* xt, real* g, complex* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1937,10 +1778,10 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1], cdiag[14];
static integer i__, k, n;
extern /* Subroutine */ int cmake_();
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
static char diags[1];
static logical isame[13];
extern /* Subroutine */ int cmvch_();
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
@ -1950,17 +1791,19 @@ ftnlen sname_len;
static integer nc, ik, in;
static logical packed;
static integer nk, ks, ix, ns, lx;
extern logical lceres_();
extern /* Subroutine */ int cctbmv_(), cctbsv_();
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cctbmv_(integer*, char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cctbsv_(integer*, char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen);
static char ctrans[14];
extern /* Subroutine */ int cctpmv_();
extern /* Subroutine */ void cctpmv_(integer*, char*, char*, char*, integer*, complex*, complex*, integer*, ftnlen, ftnlen, ftnlen);
static real errmax;
extern /* Subroutine */ int cctrmv_(), cctpsv_();
extern /* Subroutine */ void cctrmv_(integer*, char*, char*, char*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cctpsv_(integer*, char*, char*, char*, integer*, complex*, complex*, integer*, ftnlen, ftnlen, ftnlen);
static complex transl;
extern /* Subroutine */ int cctrsv_();
extern /* Subroutine */ void cctrsv_(integer*, char*, char*, char*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen);
static char transs[1];
static integer laa, icd, lda;
extern logical lce_();
extern logical lce_(complex*, complex*, integer*);
static integer ict, icu;
static real err;
@ -2418,21 +2261,7 @@ L130:
} /* cchk3_ */
/* Subroutine */ int cchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
complex *alf;
integer *ninc, *inc, *nmax, *incmax;
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
real *g;
complex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int cchk4_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
@ -2444,21 +2273,21 @@ ftnlen sname_len;
static integer incx, incy;
static logical null;
static integer i__, j, m, n;
extern /* Subroutine */ int cmake_();
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
static complex alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int cmvch_();
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static integer incxs, incys, ia, nc, nd, im, in;
extern /* Subroutine */ int ccgerc_();
extern /* Subroutine */ void ccgerc_(integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*);
static integer ms, ix, iy, ns, lx, ly;
extern /* Subroutine */ int ccgeru_();
extern logical lceres_();
extern /* Subroutine */ void ccgeru_(integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*);
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
static real errmax;
static complex transl;
static integer laa, lda;
extern logical lce_();
extern logical lce_(complex*, complex*, integer*);
static complex als;
static real err;
@ -2786,21 +2615,7 @@ L150:
} /* cchk4_ */
/* Subroutine */ int cchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
complex *alf;
integer *ninc, *inc, *nmax, *incmax;
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
real *g;
complex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int cchk5_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2818,10 +2633,12 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1];
static integer i__, j, n;
extern /* Subroutine */ int cmake_(), ccher_();
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void ccher_(integer*, char*, integer*, real*, complex*, integer*, complex*, integer*, ftnlen);
static complex alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int cchpr_(), cmvch_();
extern /* Subroutine */ void cchpr_(integer*, char*, integer*, real*, complex*, integer*, complex*, ftnlen);
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
@ -2832,11 +2649,11 @@ ftnlen sname_len;
static logical packed;
static integer ix, ns, lx;
static real ralpha;
extern logical lceres_();
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
static real errmax;
static complex transl;
static integer laa, lda;
extern logical lce_();
extern logical lce_(complex*, complex*, integer*);
static real err;
/* Tests CHER and CHPR. */
@ -3160,21 +2977,7 @@ L130:
} /* cchk5_ */
/* Subroutine */ int cchk6_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
complex *alf;
integer *ninc, *inc, *nmax, *incmax;
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
real *g;
complex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int cchk6_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -3192,25 +2995,26 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1];
static integer i__, j, n;
extern /* Subroutine */ int cmake_();
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
static complex alpha, w[2];
static logical isame[13];
extern /* Subroutine */ int cmvch_();
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
static integer incxs, incys;
static logical upper;
static char uplos[1];
extern /* Subroutine */ int ccher2_(), cchpr2_();
extern /* Subroutine */ void ccher2_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*, ftnlen);
extern /* Subroutine */ void cchpr2_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, ftnlen);
static integer ia, ja, ic, nc, jj, lj, in;
static logical packed;
static integer ix, iy, ns, lx, ly;
extern logical lceres_();
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
static real errmax;
static complex transl;
static integer laa, lda;
extern logical lce_();
extern logical lce_(complex*, complex*, integer*);
static complex als;
static real err;
@ -3597,24 +3401,7 @@ L170:
} /* cchk6_ */
/* Subroutine */ int cmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y,
incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len)
char *trans;
integer *m, *n;
complex *alpha, *a;
integer *nmax;
complex *x;
integer *incx;
complex *beta, *y;
integer *incy;
complex *yt;
real *g;
complex *yy;
real *eps, *err;
logical *fatal;
integer *nout;
logical *mv;
ftnlen trans_len;
/* Subroutine */ int cmvch_(char* trans, integer* m, integer* n, complex* alpha, complex* a, integer* nmax, complex* x, integer* incx, complex* beta, complex* y, integer* incy, complex* yt, real* g, complex* yy, real* eps, real* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len)
{
/* System generated locals */
@ -3812,9 +3599,7 @@ L80:
} /* cmvch_ */
logical lce_(ri, rj, lr)
complex *ri, *rj;
integer *lr;
logical lce_(complex* ri, complex* rj, integer* lr)
{
/* System generated locals */
integer i__1, i__2, i__3;
@ -3861,13 +3646,7 @@ L30:
} /* lce_ */
logical lceres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
char *type__, *uplo;
integer *m, *n;
complex *aa, *as;
integer *lda;
ftnlen type_len;
ftnlen uplo_len;
logical lceres_(char* type__, char* uplo, integer* m, integer* n, complex* aa, complex* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
{
/* System generated locals */
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4;
@ -3960,9 +3739,7 @@ L80:
} /* lceres_ */
/* Complex */ VOID cbeg_( ret_val, reset)
complex * ret_val;
logical *reset;
/* Complex */ VOID cbeg_(complex* ret_val, logical* reset)
{
/* System generated locals */
real r__1, r__2;
@ -4023,8 +3800,7 @@ L10:
} /* cbeg_ */
doublereal sdiff_(x, y)
real *x, *y;
doublereal sdiff_(real* x, real* y)
{
/* System generated locals */
real ret_val;
@ -4044,19 +3820,7 @@ real *x, *y;
} /* sdiff_ */
/* Subroutine */ int cmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl,
ku, reset, transl, type_len, uplo_len, diag_len)
char *type__, *uplo, *diag;
integer *m, *n;
complex *a;
integer *nmax;
complex *aa;
integer *lda, *kl, *ku;
logical *reset;
complex *transl;
ftnlen type_len;
ftnlen uplo_len;
ftnlen diag_len;
/* Subroutine */ int cmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, complex* a, integer* nmax, complex* aa, integer* lda, integer* kl, integer* ku, logical* reset, complex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
@ -4064,7 +3828,7 @@ ftnlen diag_len;
complex q__1, q__2;
/* Local variables */
extern /* Complex */ VOID cbeg_();
extern /* Complex */ VOID cbeg_(complex*, logical*);
static integer ibeg, iend, ioff;
static logical unit;
static integer i__, j;

View File

@ -242,130 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Common Block Declarations */

View File

@ -21,19 +21,6 @@ typedef float real;
typedef double doublereal;
typedef struct { real r, i; } complex;
typedef struct { doublereal r, i; } doublecomplex;
#ifdef _MSC_VER
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
#else
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
#endif
#define pCf(z) (*_pCf(z))
#define pCd(z) (*_pCd(z))
typedef int logical;
typedef short int shortlogical;
typedef char logical1;
@ -242,124 +229,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* Common Block Declarations */
@ -375,16 +244,16 @@ struct {
static integer c__1 = 1;
static doublereal c_b34 = 1.;
/* Main program */ int main()
/* Main program */ int main(void)
{
/* Initialized data */
static doublereal sfac = 9.765625e-4;
/* Local variables */
extern /* Subroutine */ int check0_(), check1_(), check2_(), check3_();
extern /* Subroutine */ int check0_(doublereal*), check1_(doublereal*), check2_(doublereal*), check3_(doublereal*);
static integer ic;
extern /* Subroutine */ int header_();
extern /* Subroutine */ int header_(void);
/* Test program for the DOUBLE PRECISION Level 1 CBLAS. */
/* Based upon the original CBLAS test routine together with: */
@ -431,7 +300,7 @@ static doublereal c_b34 = 1.;
} /* MAIN__ */
/* Subroutine */ int header_()
/* Subroutine */ int header_(void)
{
/* Initialized data */
@ -450,8 +319,7 @@ static doublereal c_b34 = 1.;
} /* header_ */
/* Subroutine */ int check0_(sfac)
doublereal *sfac;
/* Subroutine */ int check0_(doublereal* sfac)
{
/* Initialized data */
@ -464,7 +332,7 @@ doublereal *sfac;
/* Local variables */
static integer k;
extern /* Subroutine */ int drotgtest_(), stest1_();
extern /* Subroutine */ int drotgtest_(doublereal*,doublereal*,doublereal*,doublereal*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*);
static doublereal sa, sb, sc, ss;
/* .. Parameters .. */
@ -509,8 +377,7 @@ L40:
return 0;
} /* check0_ */
/* Subroutine */ int check1_(sfac)
doublereal *sfac;
/* Subroutine */ int check1_(doublereal* sfac)
{
/* Initialized data */
@ -535,14 +402,14 @@ doublereal *sfac;
/* Local variables */
static integer i__;
extern doublereal dnrm2test_();
extern doublereal dnrm2test_(integer*, doublereal*, integer*);
static doublereal stemp[1], strue[8];
extern /* Subroutine */ int stest_(), dscaltest_();
extern doublereal dasumtest_();
extern /* Subroutine */ int itest1_(), stest1_();
extern /* Subroutine */ int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), dscaltest_(integer*,doublereal*,doublereal*,integer*);
extern doublereal dasumtest_(integer*,doublereal*,integer*);
extern /* Subroutine */ int itest1_(integer*,integer*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*);
static doublereal sx[8];
static integer np1;
extern integer idamaxtest_();
extern integer idamaxtest_(integer*,doublereal*,integer*);
static integer len;
/* .. Parameters .. */
@ -603,8 +470,7 @@ doublereal *sfac;
return 0;
} /* check1_ */
/* Subroutine */ int check2_(sfac)
doublereal *sfac;
/* Subroutine */ int check2_(doublereal* sfac)
{
/* Initialized data */
@ -649,10 +515,10 @@ doublereal *sfac;
/* Local variables */
static integer lenx, leny;
extern doublereal ddottest_();
extern doublereal ddottest_(integer*,doublereal*,integer*,doublereal*,integer*);
static integer i__, j, ksize;
extern /* Subroutine */ int stest_(), dcopytest_(), dswaptest_(),
daxpytest_(), stest1_();
extern /* Subroutine */ int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), dcopytest_(integer*,doublereal*,integer*,doublereal*,integer*), dswaptest_(integer*,doublereal*,integer*,doublereal*,integer*),
daxpytest_(integer*,doublereal*,doublereal*,integer*,doublereal*,integer*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*);
static integer ki, kn, mx, my;
static doublereal sx[7], sy[7], stx[7], sty[7];
@ -733,8 +599,7 @@ doublereal *sfac;
return 0;
} /* check2_ */
/* Subroutine */ int check3_(sfac)
doublereal *sfac;
/* Subroutine */ int check3_(doublereal* sfac)
{
/* Initialized data */
@ -753,9 +618,9 @@ doublereal *sfac;
;
/* Local variables */
extern /* Subroutine */ int drottest_();
extern /* Subroutine */ int drottest_(integer*,doublereal*,integer*,doublereal*,integer*,doublereal*,doublereal*);
static integer i__, k, ksize;
extern /* Subroutine */int stest_(), drotmtest_();
extern /* Subroutine */int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), drotmtest_(integer*,doublereal*,integer*,doublereal*,integer*,doublereal*);
static integer ki, kn;
static doublereal dparam[5], sx[10], sy[10], stx[10], sty[10];
@ -826,9 +691,7 @@ doublereal *sfac;
return 0;
} /* check3_ */
/* Subroutine */ int stest_(len, scomp, strue, ssize, sfac)
integer *len;
doublereal *scomp, *strue, *ssize, *sfac;
/* Subroutine */ int stest_(integer* len, doublereal* scomp, doublereal* strue, doublereal* ssize, doublereal* sfac)
{
/* System generated locals */
integer i__1;
@ -836,7 +699,7 @@ doublereal *scomp, *strue, *ssize, *sfac;
/* Local variables */
static integer i__;
extern doublereal sdiff_();
extern doublereal sdiff_(doublereal*,doublereal*);
static doublereal sd;
/* ********************************* STEST ************************** */
@ -892,11 +755,10 @@ L40:
} /* stest_ */
/* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac)
doublereal *scomp1, *strue1, *ssize, *sfac;
/* Subroutine */ int stest1_(doublereal* scomp1, doublereal* strue1, doublereal* ssize, doublereal* sfac)
{
static doublereal scomp[1], strue[1];
extern /* Subroutine */ int stest_();
extern /* Subroutine */ int stest_(integer*, doublereal*, doublereal*, doublereal*, doublereal*);
/* ************************* STEST1 ***************************** */
@ -923,8 +785,7 @@ doublereal *scomp1, *strue1, *ssize, *sfac;
return 0;
} /* stest1_ */
doublereal sdiff_(sa, sb)
doublereal *sa, *sb;
doublereal sdiff_(doublereal* sa, doublereal* sb)
{
/* System generated locals */
doublereal ret_val;
@ -938,8 +799,7 @@ doublereal *sa, *sb;
return ret_val;
} /* sdiff_ */
/* Subroutine */ int itest1_(icomp, itrue)
integer *icomp, *itrue;
/* Subroutine */ int itest1_(integer* icomp, integer* itrue)
{
/* Local variables */
static integer id;
@ -1188,4 +1048,4 @@ doublereal *dparam;
return 0;
} /* drotm_ */
#endif
#endif

View File

@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Common Block Declarations */
@ -395,7 +272,7 @@ static integer c_n1 = -1;
static integer c__0 = 0;
static logical c_false = FALSE_;
/* Main program */ int main()
/* Main program */ int main(void)
{
/* Initialized data */
@ -413,17 +290,21 @@ static logical c_false = FALSE_;
static logical same;
static integer ninc, nbet, ntra;
static logical rewi;
extern /* Subroutine */ int dchk1_(), dchk2_(), dchk3_(), dchk4_(),
dchk5_(), dchk6_();
extern /* Subroutine */ int dchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk6_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
static doublereal a[4225] /* was [65][65] */, g[65];
static integer i__, j;
extern doublereal ddiff_();
extern doublereal ddiff_(doublereal*, doublereal*);
static integer n;
static logical fatal;
static doublereal x[65], y[65], z__[130];
static logical trace;
static integer nidim;
extern /* Subroutine */ int dmvch_();
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static char snaps[32], trans[1];
static integer isnum;
static logical ltest[16];
@ -437,11 +318,11 @@ static logical c_false = FALSE_;
static char snamet[12];
static doublereal thresh;
static logical rorder;
extern /* Subroutine */ int cd2chke_();
extern /* Subroutine */ void cd2chke_(char*, ftnlen);
static integer layout;
static logical ltestt, tsterr;
static doublereal alf[7];
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static integer inc[7], nkb;
static doublereal bet[7],eps,err;
char tmpchar;
@ -977,21 +858,7 @@ L240:
} /* MAIN__ */
/* Subroutine */ int dchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
doublereal *alf;
integer *nbet;
doublereal *bet;
integer *ninc, *inc, *nmax, *incmax;
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1007,10 +874,10 @@ ftnlen sname_len;
static integer incx, incy;
static logical full, tran, null;
static integer i__, m, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha;
static logical isame[13];
extern /* Subroutine */ int dmvch_();
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static integer incxs, incys;
@ -1018,13 +885,14 @@ ftnlen sname_len;
static integer ia, ib, ic;
static logical banded;
static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns;
extern /* Subroutine */ int cdgbmv_(), cdgemv_();
extern logical lderes_();
extern /* Subroutine */ void cdgbmv_(integer*, char*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ void cdgemv_(integer*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static char ctrans[14];
static doublereal errmax, transl;
static char transs[1];
static integer laa, lda;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal als, bls, err;
static integer iku, kls, kus;
@ -1429,21 +1297,7 @@ L140:
} /* dchk1_ */
/* Subroutine */ int dchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
doublereal *alf;
integer *nbet;
doublereal *bet;
integer *ninc, *inc, *nmax, *incmax;
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1460,10 +1314,10 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1];
static integer i__, k, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha;
static logical isame[13];
extern /* Subroutine */ int dmvch_();
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
@ -1474,12 +1328,13 @@ ftnlen sname_len;
static integer nc, ik, in;
static logical packed;
static integer nk, ks, ix, iy, ns, lx, ly;
extern logical lderes_();
extern /* Subroutine */ int cdsbmv_(), cdspmv_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cdsbmv_(integer*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ void cdspmv_(integer*, char*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
static doublereal errmax, transl;
extern /* Subroutine */ int cdsymv_();
extern /* Subroutine */ void cdsymv_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
static integer laa, lda;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal als, bls, err;
@ -1882,17 +1737,7 @@ L130:
} /* dchk2_ */
/* Subroutine */ int dchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, xt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax;
doublereal *a, *aa, *as, *x, *xx, *xs, *xt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* xt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1911,10 +1756,10 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1], cdiag[14];
static integer i__, k, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static char diags[1];
static logical isame[13];
extern /* Subroutine */ int dmvch_();
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
@ -1924,16 +1769,19 @@ ftnlen sname_len;
static integer nc, ik, in;
static logical packed;
static integer nk, ks, ix, ns, lx;
extern logical lderes_();
extern /* Subroutine */ int cdtbmv_(), cdtbsv_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cdtbmv_(integer*, char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cdtbsv_(integer*, char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static char ctrans[14];
static doublereal errmax;
extern /* Subroutine */ int cdtpmv_(), cdtrmv_();
extern /* Subroutine */ void cdtpmv_(integer*, char*, char*, char*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cdtrmv_(integer*, char*, char*, char*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static doublereal transl;
extern /* Subroutine */ int cdtpsv_(), cdtrsv_();
extern /* Subroutine */ void cdtpsv_(integer*, char*, char*, char*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cdtrsv_(integer*, char*, char*, char*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static char transs[1];
static integer laa, icd, lda;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static integer ict, icu;
static doublereal err;
@ -2388,19 +2236,7 @@ L130:
} /* dchk3_ */
/* Subroutine */ int dchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *ninc, *inc, *nmax, *incmax;
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
@ -2411,17 +2247,18 @@ ftnlen sname_len;
static integer incx, incy;
static logical null;
static integer i__, j, m, n;
extern /* Subroutine */ int dmake_(), cdger_();
extern /* Subroutine */ void cdger_(integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, integer*);
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int dmvch_();
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static integer incxs, incys, ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly;
extern logical lderes_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static doublereal errmax, transl;
static integer laa, lda;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal als, err;
@ -2727,19 +2564,7 @@ L150:
} /* dchk4_ */
/* Subroutine */ int dchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *ninc, *inc, *nmax, *incmax;
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2757,25 +2582,25 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1];
static integer i__, j, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int dmvch_();
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer nargs;
extern /* Subroutine */ int cdspr_();
extern /* Subroutine */ void cdspr_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, ftnlen);
static logical reset;
static char cuplo[14];
static integer incxs;
extern /* Subroutine */ int cdsyr_();
extern /* Subroutine */ void cdsyr_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen);
static logical upper;
static char uplos[1];
static integer ia, ja, ic, nc, jj, lj, in;
static logical packed;
static integer ix, ns, lx;
extern logical lderes_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static doublereal errmax, transl;
static integer laa, lda;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal als, err;
@ -3096,19 +2921,7 @@ L130:
} /* dchk5_ */
/* Subroutine */ int dchk6_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *ninc, *inc, *nmax, *incmax;
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk6_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -3125,24 +2938,25 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1];
static integer i__, j, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha, w[2];
static logical isame[13];
extern /* Subroutine */ int dmvch_();
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
static integer incxs, incys;
static logical upper;
static char uplos[1];
extern /* Subroutine */ int cdspr2_(), cdsyr2_();
extern /* Subroutine */ void cdspr2_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, ftnlen);
extern /* Subroutine */ void cdsyr2_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen);
static integer ia, ja, ic, nc, jj, lj, in;
static logical packed;
static integer ix, iy, ns, lx, ly;
extern logical lderes_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static doublereal errmax, transl;
static integer laa, lda;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal als, err;
/* Tests DSYR2 and DSPR2. */
@ -3508,25 +3322,13 @@ L170:
} /* dchk6_ */
/* Subroutine */ int dmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl,
ku, reset, transl, type_len, uplo_len, diag_len)
char *type__, *uplo, *diag;
integer *m, *n;
doublereal *a;
integer *nmax;
doublereal *aa;
integer *lda, *kl, *ku;
logical *reset;
doublereal *transl;
ftnlen type_len;
ftnlen uplo_len;
ftnlen diag_len;
/* Subroutine */ int dmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublereal* a, integer* nmax, doublereal* aa, integer* lda, integer* kl, integer* ku, logical* reset, doublereal* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
/* Local variables */
extern doublereal dbeg_();
extern doublereal dbeg_(logical* );
static integer ibeg, iend, ioff;
static logical unit;
static integer i__, j;
@ -3752,28 +3554,14 @@ ftnlen diag_len;
} /* dmake_ */
/* Subroutine */ int dmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y,
incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len)
char *trans;
integer *m, *n;
doublereal *alpha, *a;
integer *nmax;
doublereal *x;
integer *incx;
doublereal *beta, *y;
integer *incy;
doublereal *yt, *g, *yy, *eps, *err;
logical *fatal;
integer *nout;
logical *mv;
ftnlen trans_len;
/* Subroutine */ int dmvch_(char* trans, integer* m, integer* n, doublereal* alpha, doublereal* a, integer* nmax, doublereal* x, integer* incx, doublereal* beta, doublereal* y, integer* incy, doublereal* yt, doublereal* g, doublereal* yy, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2;
doublereal d__1;
/* Builtin functions */
double sqrt();
double sqrt(double);
/* Local variables */
static doublereal erri;
@ -3902,9 +3690,7 @@ L70:
} /* dmvch_ */
logical lde_(ri, rj, lr)
doublereal *ri, *rj;
integer *lr;
logical lde_(doublereal* ri, doublereal* rj, integer* lr)
{
/* System generated locals */
integer i__1;
@ -3949,13 +3735,7 @@ L30:
} /* lde_ */
logical lderes_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
char *type__, *uplo;
integer *m, *n;
doublereal *aa, *as;
integer *lda;
ftnlen type_len;
ftnlen uplo_len;
logical lderes_(char* type__, char* uplo, integer* m, integer* n, doublereal* aa, doublereal* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
{
/* System generated locals */
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2;
@ -4042,8 +3822,7 @@ L80:
} /* lderes_ */
doublereal dbeg_(reset)
logical *reset;
doublereal dbeg_(logical* reset)
{
/* System generated locals */
doublereal ret_val;
@ -4094,8 +3873,7 @@ L10:
} /* dbeg_ */
doublereal ddiff_(x, y)
doublereal *x, *y;
doublereal ddiff_(doublereal* x, doublereal* y)
{
/* System generated locals */
doublereal ret_val;

View File

@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Common Block Declarations */
@ -393,7 +270,7 @@ static logical c_true = TRUE_;
static integer c__0 = 0;
static logical c_false = FALSE_;
/* Main program MAIN__() */ int main()
/* Main program MAIN__() */ int main(void)
{
/* Initialized data */
@ -403,25 +280,24 @@ static logical c_false = FALSE_;
integer i__1, i__2, i__3;
doublereal d__1;
/* Builtin functions */
integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(),
e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe();
integer f_clos();
/* Local variables */
static integer nalf, idim[9];
static logical same;
static integer nbet, ntra;
static logical rewi;
extern /* Subroutine */ int dchk1_(), dchk2_(), dchk3_(), dchk4_(),
dchk5_();
extern /* Subroutine */ int dchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
/* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* ab, doublereal* aa, doublereal* as, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, doublereal* w, integer* iorder, ftnlen sname_len);
static doublereal c__[4225] /* was [65][65] */, g[65];
static integer i__, j;
extern doublereal ddiff_();
extern doublereal ddiff_(doublereal*, doublereal*);
static integer n;
static logical fatal;
static doublereal w[130];
extern /* Subroutine */ int dmmch_();
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical trace;
static integer nidim;
static char snaps[32];
@ -433,11 +309,11 @@ static logical c_false = FALSE_;
static char snamet[12], transa[1], transb[1];
static doublereal thresh;
static logical rorder;
extern /* Subroutine */ int cd3chke_();
extern /* Subroutine */ void cd3chke_(char*, ftnlen);
static integer layout;
static logical ltestt, tsterr;
static doublereal alf[7];
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal bet[7], eps, err;
char tmpchar;
@ -907,21 +783,7 @@ L230:
} /* MAIN__ */
/* Subroutine */ int dchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *nbet;
doublereal *bet;
integer *nmax;
doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -931,29 +793,27 @@ ftnlen sname_len;
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
i__3, i__4, i__5, i__6;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static doublereal beta;
static integer ldas, ldbs, ldcs;
static logical same, null;
static integer i__, k, m, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha;
extern /* Subroutine */ int dmmch_();
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical isame[13], trana, tranb;
static integer nargs;
static logical reset;
extern /* Subroutine */ void dprcn1_();
extern /* Subroutine */ void dprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static integer ia, ib, ma, mb, na, nb, nc, ik, im, in;
extern /* Subroutine */ int cdgemm_();
extern /* Subroutine */ void cdgemm_(integer*, char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static integer ks, ms, ns;
extern logical lderes_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static char tranas[1], tranbs[1], transa[1], transb[1];
static doublereal errmax;
static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal als, bls, err;
/* Tests DGEMM. */
@ -1283,23 +1143,8 @@ L130:
} /* dchk1_ */
/* Subroutine */ void dprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k,
alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *transa, *transb;
integer *m, *n, *k;
doublereal *alpha;
integer *lda, *ldb;
doublereal *beta;
integer *ldc;
ftnlen sname_len;
ftnlen transa_len;
ftnlen transb_len;
/* Subroutine */ void dprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char crc[14], cta[14], ctb[14];
@ -1328,21 +1173,7 @@ ftnlen transb_len;
} /* dprcn1_ */
/* Subroutine */ int dchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *nbet;
doublereal *bet;
integer *nmax;
doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1353,8 +1184,6 @@ ftnlen sname_len;
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
i__3, i__4, i__5;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static doublereal beta;
@ -1364,21 +1193,21 @@ ftnlen sname_len;
static logical left, null;
static char uplo[1];
static integer i__, m, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha;
extern /* Subroutine */ int dmmch_();
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical isame[13];
static char sides[1];
static integer nargs;
static logical reset;
static char uplos[1];
extern /* Subroutine */ void dprcn2_();
extern /* Subroutine */ void dprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static integer ia, ib, na, nc, im, in, ms, ns;
extern logical lderes_();
extern /* Subroutine */ int cdsymm_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cdsymm_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static doublereal errmax;
static integer laa, lbb, lda, lcc, ldb, ldc;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static integer ics;
static doublereal als, bls;
static integer icu;
@ -1692,23 +1521,8 @@ L120:
} /* dchk2_ */
/* Subroutine */ void dprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha,
lda, ldb, beta, ldc, sname_len, side_len, uplo_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *side, *uplo;
integer *m, *n;
doublereal *alpha;
integer *lda, *ldb;
doublereal *beta;
integer *ldc;
ftnlen sname_len;
ftnlen side_len;
ftnlen uplo_len;
/* Subroutine */ void dprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char cs[14], cu[14], crc[14];
@ -1733,19 +1547,7 @@ ftnlen uplo_len;
} /* dprcn2_ */
/* Subroutine */ int dchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__,
iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *nmax;
doublereal *a, *aa, *as, *b, *bb, *bs, *ct, *g, *c__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* ct, doublereal* g, doublereal* c__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1766,24 +1568,24 @@ ftnlen sname_len;
static logical left, null;
static char uplo[1];
static integer i__, j, m, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha;
static char diags[1];
extern /* Subroutine */ int dmmch_();
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical isame[13];
static char sides[1];
static integer nargs;
static logical reset;
static char uplos[1];
extern /* Subroutine */ void dprcn3_();
extern /* Subroutine */ void dprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, ftnlen, ftnlen, ftnlen, ftnlen, ftnlen);
static integer ia, na, nc, im, in, ms, ns;
extern logical lderes_();
extern /* Subroutine */ int cdtrmm_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cdtrmm_(integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
static char tranas[1], transa[1];
extern /* Subroutine */ int cdtrsm_();
extern /* Subroutine */ void cdtrsm_(integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
static doublereal errmax;
static integer laa, icd, lbb, lda, ldb;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static integer ics;
static doublereal als;
static integer ict, icu;
@ -2165,24 +1967,8 @@ L160:
} /* dchk3_ */
/* Subroutine */ void dprcn3_(nout, nc, sname, iorder, side, uplo, transa,
diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len,
transa_len, diag_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *side, *uplo, *transa, *diag;
integer *m, *n;
doublereal *alpha;
integer *lda, *ldb;
ftnlen sname_len;
ftnlen side_len;
ftnlen uplo_len;
ftnlen transa_len;
ftnlen diag_len;
/* Subroutine */ void dprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, doublereal* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char ca[14], cd[14], cs[14], cu[14], crc[14];
@ -2219,21 +2005,7 @@ ftnlen diag_len;
} /* dprcn3_ */
/* Subroutine */ int dchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *nbet;
doublereal *bet;
integer *nmax;
doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2244,8 +2016,6 @@ ftnlen sname_len;
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
i__3, i__4, i__5;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static doublereal beta;
@ -2255,23 +2025,23 @@ ftnlen sname_len;
static logical tran, null;
static char uplo[1];
static integer i__, j, k, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha;
extern /* Subroutine */ int dmmch_();
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical isame[13];
static integer nargs;
static logical reset;
static char trans[1];
static logical upper;
static char uplos[1];
extern /* Subroutine */ void dprcn4_();
extern /* Subroutine */ void dprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns;
extern logical lderes_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static doublereal errmax;
extern /* Subroutine */ int cdsyrk_();
extern /* Subroutine */ void cdsyrk_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static char transs[1];
static integer laa, lda, lcc, ldc;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal als;
static integer ict, icu;
static doublereal err;
@ -2586,23 +2356,8 @@ L130:
} /* dchk4_ */
/* Subroutine */ void dprcn4_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
doublereal *alpha;
integer *lda;
doublereal *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ void dprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char ca[14], cu[14], crc[14];
@ -2629,21 +2384,7 @@ ftnlen transa_len;
} /* dprcn4_ */
/* Subroutine */ int dchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs,
c__, cc, cs, ct, g, w, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *nbet;
doublereal *bet;
integer *nmax;
doublereal *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct, *g, *w;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* ab, doublereal* aa, doublereal* as, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, doublereal* w, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2653,8 +2394,6 @@ ftnlen sname_len;
/* System generated locals */
integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static integer jjab;
@ -2665,23 +2404,23 @@ ftnlen sname_len;
static logical tran, null;
static char uplo[1];
static integer i__, j, k, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha;
extern /* Subroutine */ int dmmch_();
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical isame[13];
static integer nargs;
static logical reset;
static char trans[1];
static logical upper;
static char uplos[1];
extern /* Subroutine */ void dprcn5_();
extern /* Subroutine */ void dprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns;
extern logical lderes_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static doublereal errmax;
static char transs[1];
static integer laa, lbb, lda, lcc, ldb, ldc;
extern logical lde_();
extern /* Subroutine */ int cdsyr2k_();
extern logical lde_(doublereal*, doublereal*, integer*);
extern /* Subroutine */ void cdsyr2k_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static doublereal als;
static integer ict, icu;
static doublereal err;
@ -3048,23 +2787,8 @@ L160:
} /* dchk5_ */
/* Subroutine */ void dprcn5_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
doublereal *alpha;
integer *lda, *ldb;
doublereal *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ void dprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char ca[14], cu[14], crc[14];
@ -3091,25 +2815,13 @@ ftnlen transa_len;
} /* dprcn5_ */
/* Subroutine */ int dmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset,
transl, type_len, uplo_len, diag_len)
char *type__, *uplo, *diag;
integer *m, *n;
doublereal *a;
integer *nmax;
doublereal *aa;
integer *lda;
logical *reset;
doublereal *transl;
ftnlen type_len;
ftnlen uplo_len;
ftnlen diag_len;
/* Subroutine */ int dmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublereal* a, integer* nmax, doublereal* aa, integer* lda, logical* reset, doublereal* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2;
/* Local variables */
extern doublereal dbeg_();
extern doublereal dbeg_(logical*);
static integer ibeg, iend;
static logical unit;
static integer i__, j;
@ -3241,25 +2953,7 @@ ftnlen diag_len;
} /* dmake_ */
/* Subroutine */ int dmmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb,
beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv,
transa_len, transb_len)
char *transa, *transb;
integer *m, *n, *kk;
doublereal *alpha, *a;
integer *lda;
doublereal *b;
integer *ldb;
doublereal *beta, *c__;
integer *ldc;
doublereal *ct, *g, *cc;
integer *ldcc;
doublereal *eps, *err;
logical *fatal;
integer *nout;
logical *mv;
ftnlen transa_len;
ftnlen transb_len;
/* Subroutine */ int dmmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, doublereal* alpha, doublereal* a, integer* lda, doublereal* b, integer* ldb, doublereal* beta, doublereal* c__, integer* ldc, doublereal* ct, doublereal* g, doublereal* cc, integer* ldcc, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len)
{
/* System generated locals */
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1,
@ -3267,8 +2961,7 @@ ftnlen transb_len;
doublereal d__1, d__2;
/* Builtin functions */
double sqrt();
integer s_wsfe(), e_wsfe(), do_fio();
double sqrt(double);
/* Local variables */
static doublereal erri;
@ -3432,9 +3125,7 @@ L150:
} /* dmmch_ */
logical lde_(ri, rj, lr)
doublereal *ri, *rj;
integer *lr;
logical lde_(doublereal* ri, doublereal* rj, integer* lr)
{
/* System generated locals */
integer i__1;
@ -3481,13 +3172,7 @@ L30:
} /* lde_ */
logical lderes_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
char *type__, *uplo;
integer *m, *n;
doublereal *aa, *as;
integer *lda;
ftnlen type_len;
ftnlen uplo_len;
logical lderes_(char* type__, char* uplo, integer* m, integer* n, doublereal* aa, doublereal* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
{
/* System generated locals */
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2;
@ -3576,8 +3261,7 @@ L80:
} /* lderes_ */
doublereal dbeg_(reset)
logical *reset;
doublereal dbeg_(logical* reset)
{
/* System generated locals */
doublereal ret_val;
@ -3629,8 +3313,7 @@ L10:
} /* dbeg_ */
doublereal ddiff_(x, y)
doublereal *x, *y;
doublereal ddiff_(doublereal* x, doublereal* y)
{
/* System generated locals */
doublereal ret_val;

View File

@ -21,19 +21,6 @@ typedef float real;
typedef double doublereal;
typedef struct { real r, i; } complex;
typedef struct { doublereal r, i; } doublecomplex;
#ifdef _MSC_VER
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
#else
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
#endif
#define pCf(z) (*_pCf(z))
#define pCd(z) (*_pCd(z))
typedef int logical;
typedef short int shortlogical;
typedef char logical1;
@ -242,250 +229,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
#if 0
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
#endif
/* Common Block Declarations */
@ -502,16 +245,16 @@ struct {
static integer c__1 = 1;
static real c_b34 = (float)1.;
/* Main program */ int main ()
/* Main program */ int main (void)
{
/* Initialized data */
static real sfac = (float)9.765625e-4;
/* Local variables */
extern /* Subroutine */ int check0_(), check1_(), check2_(), check3_();
extern /* Subroutine */ int check0_(real*), check1_(real*), check2_(real*), check3_(real*);
static integer ic;
extern /* Subroutine */ int header_();
extern /* Subroutine */ int header_(void);
/* Test program for the REAL Level 1 CBLAS. */
/* Based upon the original CBLAS test routine together with: */
@ -557,7 +300,7 @@ static real c_b34 = (float)1.;
exit(0);
} /* MAIN__ */
/* Subroutine */ int header_()
/* Subroutine */ int header_(void)
{
/* Initialized data */
@ -580,8 +323,7 @@ static real c_b34 = (float)1.;
} /* header_ */
/* Subroutine */ int check0_(sfac)
real *sfac;
/* Subroutine */ int check0_(real *sfac)
{
/* Initialized data */
@ -600,7 +342,7 @@ real *sfac;
/* Local variables */
static integer k;
extern /* Subroutine */ int srotgtest_(), stest1_();
extern /* Subroutine */ int srotgtest_(real*,real*,real*,real*), stest1_(real*,real*,real*,real*);
static real sa, sb, sc, ss;
/* .. Parameters .. */
@ -645,8 +387,7 @@ L40:
return 0;
} /* check0_ */
/* Subroutine */ int check1_(sfac)
real *sfac;
/* Subroutine */ int check1_(real* sfac)
{
/* Initialized data */
@ -692,14 +433,14 @@ real *sfac;
/* Local variables */
static integer i__;
extern real snrm2test_();
extern real snrm2test_(integer*,real*,integer*);
static real stemp[1], strue[8];
extern /* Subroutine */ int stest_(), sscaltest_();
extern real sasumtest_();
extern /* Subroutine */ int itest1_(), stest1_();
extern /* Subroutine */ int stest_(integer*, real*,real*,real*,real*), sscaltest_(integer*,real*,real*,integer*);
extern real sasumtest_(integer*,real*,integer*);
extern /* Subroutine */ int itest1_(integer*,integer*), stest1_(real*,real*,real*,real*);
static real sx[8];
static integer np1;
extern integer isamaxtest_();
extern integer isamaxtest_(integer*,real*,integer*);
static integer len;
@ -761,8 +502,7 @@ real *sfac;
return 0;
} /* check1_ */
/* Subroutine */ int check2_(sfac)
real *sfac;
/* Subroutine */ int check2_(real* sfac)
{
/* Initialized data */
@ -850,12 +590,12 @@ real *sfac;
/* Local variables */
static integer lenx, leny;
extern real sdottest_();
extern real sdottest_(integer*,real*,integer*,real*,integer*);
static integer i__, j, ksize;
extern /* Subroutine */ int stest_(), scopytest_(), sswaptest_(),
saxpytest_();
extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*), scopytest_(integer*,real*,integer*,real*,integer*), sswaptest_(integer*,real*,integer*,real*,integer*),
saxpytest_(integer*,real*,real*,integer*,real*,integer*);
static integer ki;
extern /* Subroutine */ int stest1_();
extern /* Subroutine */ int stest1_(real*,real*,real*,real*);
static integer kn, mx, my;
static real sx[7], sy[7], stx[7], sty[7];
@ -936,8 +676,7 @@ real *sfac;
return 0;
} /* check2_ */
/* Subroutine */ int check3_(sfac)
real *sfac;
/* Subroutine */ int check3_(real* sfac)
{
/* Initialized data */
@ -969,9 +708,9 @@ real *sfac;
1.17 };
/* Local variables */
extern /* Subroutine */ void srottest_();
extern /* Subroutine */ void srottest_(integer*,real*,integer*,real*,integer*,real*,real*);
static integer i__, k, ksize;
extern /* Subroutine */ int stest_(), srotmtest_();
extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*), srotmtest_(integer*,real*,integer*,real*,integer*,real*);
static integer ki, kn;
static real sx[19], sy[19], sparam[5], stx[19], sty[19];
@ -1042,16 +781,14 @@ real *sfac;
return 0;
} /* check3_ */
/* Subroutine */ int stest_(len, scomp, strue, ssize, sfac)
integer *len;
real *scomp, *strue, *ssize, *sfac;
/* Subroutine */ int stest_(integer* len, real* scomp, real* strue, real* ssize, real* sfac)
{
integer i__1;
real r__1, r__2, r__3, r__4, r__5;
/* Local variables */
static integer i__;
extern doublereal sdiff_();
extern doublereal sdiff_(real*,real*);
static real sd;
/* ********************************* STEST ************************** */
@ -1107,11 +844,10 @@ L40:
} /* stest_ */
/* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac)
real *scomp1, *strue1, *ssize, *sfac;
/* Subroutine */ int stest1_(real* scomp1, real* strue1, real* ssize, real* sfac)
{
static real scomp[1], strue[1];
extern /* Subroutine */ int stest_();
extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*);
/* ************************* STEST1 ***************************** */
@ -1138,8 +874,7 @@ real *scomp1, *strue1, *ssize, *sfac;
return 0;
} /* stest1_ */
doublereal sdiff_(sa, sb)
real *sa, *sb;
doublereal sdiff_(real* sa, real* sb)
{
/* System generated locals */
real ret_val;
@ -1153,8 +888,7 @@ real *sa, *sb;
return ret_val;
} /* sdiff_ */
/* Subroutine */ int itest1_(icomp, itrue)
integer *icomp, *itrue;
/* Subroutine */ int itest1_(integer* icomp, integer* itrue)
{
/* Local variables */
static integer id;

View File

@ -242,255 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
#if 0
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Common Block Declarations */
@ -521,7 +272,7 @@ static integer c_n1 = -1;
static integer c__0 = 0;
static logical c_false = FALSE_;
/* Main program */ int main()
/* Main program */ int main(void)
{
/* Initialized data */
@ -539,16 +290,20 @@ static logical c_false = FALSE_;
static logical same;
static integer ninc, nbet, ntra;
static logical rewi;
extern /* Subroutine */ int schk1_(), schk2_(), schk3_(), schk4_(),
schk5_(), schk6_();
extern /* Subroutine */ int schk1_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk2_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk3_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk4_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk5_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk6_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len);
static real a[4225] /* was [65][65] */, g[65];
static integer i__, j, n;
static logical fatal;
static real x[65], y[65], z__[130];
extern doublereal sdiff_();
extern doublereal sdiff_(real*, real*);
static logical trace;
static integer nidim;
extern /* Subroutine */ int smvch_();
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
static char snaps[32], trans[1];
static integer isnum;
static logical ltest[16];
@ -564,12 +319,12 @@ static logical c_false = FALSE_;
static logical rorder;
static integer layout;
static logical ltestt;
extern /* Subroutine */ int cs2chke_();
extern /* Subroutine */ int cs2chke_(char*, ftnlen);
static logical tsterr;
static real alf[7];
static integer inc[7], nkb;
static real bet[7];
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real eps, err;
char tmpchar;
@ -1098,21 +853,7 @@ L240:
} /* MAIN__ */
/* Subroutine */ int schk1_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
real *alf;
integer *nbet;
real *bet;
integer *ninc, *inc, *nmax, *incmax;
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk1_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, real* alf, integer* nbet, real* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1130,24 +871,25 @@ ftnlen sname_len;
static integer i__, m, n;
static real alpha;
static logical isame[13];
extern /* Subroutine */ int smake_();
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int smvch_();
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
static logical reset;
static integer incxs, incys;
static char trans[1];
static integer ia, ib, ic;
static logical banded;
static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns;
extern /* Subroutine */ int csgbmv_(), csgemv_();
extern /* Subroutine */ void csgbmv_(integer*, char*, integer*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ void csgemv_(integer*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen);
static char ctrans[14];
static real errmax;
extern logical lseres_();
extern logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen ltype_len, ftnlen uplo_len);
static real transl;
static char transs[1];
static integer laa, lda;
static real als, bls;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
static integer iku, kls, kus;
@ -1552,21 +1294,7 @@ L140:
} /* schk1_ */
/* Subroutine */ int schk2_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
real *alf;
integer *nbet;
real *bet;
integer *ninc, *inc, *nmax, *incmax;
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk2_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, real* alf, integer* nbet, real* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1585,9 +1313,9 @@ ftnlen sname_len;
static integer i__, k, n;
static real alpha;
static logical isame[13];
extern /* Subroutine */ int smake_();
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int smvch_();
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
static logical reset;
static char cuplo[14];
static integer incxs, incys;
@ -1598,13 +1326,14 @@ ftnlen sname_len;
static logical packed;
static integer nk, ks, ix, iy, ns, lx, ly;
static real errmax;
extern logical lseres_();
extern /* Subroutine */ int cssbmv_();
extern logical lseres_(char* , char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cssbmv_(integer*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen);
static real transl;
extern /* Subroutine */ int csspmv_(), cssymv_();
extern /* Subroutine */ void csspmv_(integer*, char*, integer*, real*, real*, real*, integer*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ void cssymv_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen);
static integer laa, lda;
static real als, bls;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests SSYMV, SSBMV and SSPMV. */
@ -2003,17 +1732,7 @@ L130:
} /* schk2_ */
/* Subroutine */ int schk3_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, xt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax;
real *a, *aa, *as, *x, *xx, *xs, *xt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk3_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* xt, real* g, real* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2034,9 +1753,9 @@ ftnlen sname_len;
static integer i__, k, n;
static char diags[1];
static logical isame[13];
extern /* Subroutine */ int smake_();
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int smvch_();
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
static logical reset;
static char cuplo[14];
static integer incxs;
@ -2047,14 +1766,17 @@ ftnlen sname_len;
static integer nk, ks, ix, ns, lx;
static char ctrans[14];
static real errmax;
extern logical lseres_();
extern /* Subroutine */ int cstbmv_();
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cstbmv_(integer*, char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
static real transl;
extern /* Subroutine */ int cstbsv_();
extern /* Subroutine */ void cstbsv_(integer*, char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
static char transs[1];
extern /* Subroutine */ int cstpmv_(), cstrmv_(), cstpsv_(), cstrsv_();
extern /* Subroutine */ void cstpmv_(integer*, char*, char*, char*, integer*, real*, real*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cstrmv_(integer*, char*, char*, char*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cstpsv_(integer*, char*, char*, char*, integer*, real*, real*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cstrsv_(integer*, char*, char*, char*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
static integer laa, icd, lda, ict, icu;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests STRMV, STBMV, STPMV, STRSV, STBSV and STPSV. */
@ -2508,19 +2230,7 @@ L130:
} /* schk3_ */
/* Subroutine */ int schk4_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *ninc, *inc, *nmax, *incmax;
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk4_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
@ -2533,17 +2243,18 @@ ftnlen sname_len;
static integer i__, j, m, n;
static real alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int smake_(), csger_();
/* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void csger_(integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, integer*);
static integer nargs;
extern /* Subroutine */ int smvch_();
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
static logical reset;
static integer incxs, incys, ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly;
static real errmax;
extern logical lseres_();
extern logical lseres_(char* , char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
static real transl;
static integer laa, lda;
static real als;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests SGER. */
@ -2848,19 +2559,7 @@ L150:
} /* schk4_ */
/* Subroutine */ int schk5_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *ninc, *inc, *nmax, *incmax;
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk5_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2880,25 +2579,25 @@ ftnlen sname_len;
static integer i__, j, n;
static real alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int smake_();
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int smvch_();
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
static logical reset;
static char cuplo[14];
static integer incxs;
extern /* Subroutine */ int csspr_();
extern /* Subroutine */ void csspr_(integer*, char*, integer*, real*, real*, integer*, real*, ftnlen);
static logical upper;
static char uplos[1];
extern /* Subroutine */ int cssyr_();
extern /* Subroutine */ void cssyr_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, ftnlen);
static integer ia, ja, ic, nc, jj, lj, in;
static logical packed;
static integer ix, ns, lx;
static real errmax;
extern logical lseres_();
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
static real transl;
static integer laa, lda;
static real als;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests SSYR and SSPR. */
@ -3218,19 +2917,7 @@ L130:
} /* schk5_ */
/* Subroutine */ int schk6_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *ninc, *inc, *nmax, *incmax;
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk6_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -3249,26 +2936,26 @@ ftnlen sname_len;
static integer i__, j, n;
static real alpha, w[2];
static logical isame[13];
extern /* Subroutine */ int smake_();
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int smvch_();
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
static logical reset;
static char cuplo[14];
static integer incxs, incys;
static logical upper;
static char uplos[1];
static integer ia, ja, ic;
extern /* Subroutine */ int csspr2_();
extern /* Subroutine */ void csspr2_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, real*, ftnlen);
static integer nc, jj, lj, in;
static logical packed;
extern /* Subroutine */ int cssyr2_();
extern /* Subroutine */ void cssyr2_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, real*, integer*, ftnlen);
static integer ix, iy, ns, lx, ly;
static real errmax;
extern logical lseres_();
extern logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen ltype_len, ftnlen uplo_len);
static real transl;
static integer laa, lda;
static real als;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests SSYR2 and SSPR2. */
@ -3634,26 +3321,14 @@ L170:
} /* schk6_ */
/* Subroutine */ int smake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl,
ku, reset, transl, type_len, uplo_len, diag_len)
char *type__, *uplo, *diag;
integer *m, *n;
real *a;
integer *nmax;
real *aa;
integer *lda, *kl, *ku;
logical *reset;
real *transl;
ftnlen type_len;
ftnlen uplo_len;
ftnlen diag_len;
{
/* Subroutine */ int smake_(char* type__, char* uplo, char* diag, integer* m, integer* n, real* a, integer* nmax, real* aa, integer* lda, integer* kl, integer* ku, logical* reset, real* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
/* Local variables */
static integer ibeg, iend;
extern doublereal sbeg_();
extern doublereal sbeg_(logical*);
static integer ioff;
static logical unit;
static integer i__, j;
@ -3879,28 +3554,14 @@ ftnlen diag_len;
} /* smake_ */
/* Subroutine */ int smvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y,
incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len)
char *trans;
integer *m, *n;
real *alpha, *a;
integer *nmax;
real *x;
integer *incx;
real *beta, *y;
integer *incy;
real *yt, *g, *yy, *eps, *err;
logical *fatal;
integer *nout;
logical *mv;
ftnlen trans_len;
/* Subroutine */ int smvch_(char* trans, integer* m, integer* n, real* alpha, real* a, integer* nmax, real* x, integer* incx, real* beta, real* y, integer* incy, real* yt, real* g, real* yy, real* eps, real* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2;
real r__1;
/* Builtin functions */
double sqrt();
double sqrt(double);
/* Local variables */
static real erri;
@ -4029,9 +3690,7 @@ L70:
} /* smvch_ */
logical lse_(ri, rj, lr)
real *ri, *rj;
integer *lr;
logical lse_(real* ri, real* rj, integer* lr)
{
/* System generated locals */
integer i__1;
@ -4076,13 +3735,7 @@ L30:
} /* lse_ */
logical lseres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
char *type__, *uplo;
integer *m, *n;
real *aa, *as;
integer *lda;
ftnlen type_len;
ftnlen uplo_len;
logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen ltype_len, ftnlen uplo_len)
{
/* System generated locals */
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2;
@ -4169,8 +3822,7 @@ L80:
} /* lseres_ */
doublereal sbeg_(reset)
logical *reset;
doublereal sbeg_(logical* reset)
{
/* System generated locals */
real ret_val;
@ -4221,8 +3873,7 @@ L10:
} /* sbeg_ */
doublereal sdiff_(x, y)
real *x, *y;
doublereal sdiff_(real* x, real* y)
{
/* System generated locals */
real ret_val;

View File

@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Common Block Declarations */
@ -393,7 +270,7 @@ static logical c_true = TRUE_;
static integer c__0 = 0;
static logical c_false = FALSE_;
/* Main program MAIN__() */ int main()
/* Main program MAIN__() */ int main(void)
{
/* Initialized data */
@ -402,26 +279,25 @@ static logical c_false = FALSE_;
/* System generated locals */
integer i__1, i__2, i__3;
real r__1;
/* Builtin functions */
integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(),
e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe();
integer f_clos();
/* Local variables */
static integer nalf, idim[9];
static logical same;
static integer nbet, ntra;
static logical rewi;
extern /* Subroutine */ int schk1_(), schk2_(), schk3_(), schk4_(),
schk5_();
extern /* Subroutine */ int schk1_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk2_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk3_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk4_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk5_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
static real c__[4225] /* was [65][65] */, g[65];
static integer i__, j, n;
static logical fatal;
static real w[130];
extern doublereal sdiff_();
extern doublereal sdiff_(real*, real*);
static logical trace;
static integer nidim;
extern /* Subroutine */ int smmch_();
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
static char snaps[32];
static integer isnum;
static logical ltest[6];
@ -433,9 +309,9 @@ static logical c_false = FALSE_;
static logical rorder;
static integer layout;
static logical ltestt, tsterr;
extern /* Subroutine */ int cs3chke_();
extern /* Subroutine */ void cs3chke_(char*, ftnlen);
static real alf[7], bet[7];
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real eps, err;
char tmpchar;
@ -899,21 +775,7 @@ L230:
} /* MAIN__ */
/* Subroutine */ int schk1_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *nbet;
real *bet;
integer *nmax;
real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk1_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -923,8 +785,6 @@ ftnlen sname_len;
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
i__3, i__4, i__5, i__6;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static real beta;
@ -936,18 +796,17 @@ ftnlen sname_len;
static logical trana, tranb;
static integer nargs;
static logical reset;
extern /* Subroutine */ void sprcn1_();
extern /* Subroutine */ int smake_();
extern /* Subroutine */ int smmch_();
extern /* Subroutine */ void sprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
static integer ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns;
extern /* Subroutine */ int csgemm_();
extern /* Subroutine */ void csgemm_(integer*, char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen);
static char tranas[1], tranbs[1], transa[1], transb[1];
static real errmax;
extern logical lseres_();
extern logical lse_();
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern logical lse_(real*, real*, integer*);
static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc;
static real als, bls;
extern logical lse_();
static real err;
/* Tests SGEMM. */
@ -1278,23 +1137,8 @@ L130:
/* Subroutine */ void sprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k,
alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *transa, *transb;
integer *m, *n, *k;
real *alpha;
integer *lda, *ldb;
real *beta;
integer *ldc;
ftnlen sname_len;
ftnlen transa_len;
ftnlen transb_len;
/* Subroutine */ void sprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char crc[14], cta[14], ctb[14];
@ -1324,21 +1168,7 @@ ftnlen transb_len;
} /* sprcn1_ */
/* Subroutine */ int schk2_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *nbet;
real *bet;
integer *nmax;
real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk2_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1349,8 +1179,6 @@ ftnlen sname_len;
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
i__3, i__4, i__5;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static real beta;
@ -1368,15 +1196,15 @@ ftnlen sname_len;
static char uplos[1];
static integer ia, ib, na, nc, im, in, ms, ns;
static real errmax;
extern logical lseres_();
extern /* Subroutine */ int cssymm_();
extern void sprcn2_();
extern int smake_();
extern int smmch_();
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cssymm_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern void sprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
static integer laa, lbb, lda, lcc, ldb, ldc, ics;
static real als, bls;
static integer icu;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests SSYMM. */
@ -1685,23 +1513,8 @@ L120:
} /* schk2_ */
/* Subroutine */ void sprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha,
lda, ldb, beta, ldc, sname_len, side_len, uplo_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *side, *uplo;
integer *m, *n;
real *alpha;
integer *lda, *ldb;
real *beta;
integer *ldc;
ftnlen sname_len;
ftnlen side_len;
ftnlen uplo_len;
/* Subroutine */ void sprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char cs[14], cu[14], crc[14];
@ -1726,19 +1539,7 @@ ftnlen uplo_len;
} /* sprcn2_ */
/* Subroutine */ int schk3_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__,
iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *nmax;
real *a, *aa, *as, *b, *bb, *bs, *ct, *g, *c__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk3_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* ct, real* g, real* c__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1751,8 +1552,6 @@ ftnlen sname_len;
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
i__3, i__4, i__5;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static char diag[1];
@ -1769,18 +1568,19 @@ ftnlen sname_len;
static integer nargs;
static logical reset;
static char uplos[1];
extern /* Subroutine */ void sprcn3_();
extern /* Subroutine */ void sprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, real*, integer*, integer*, ftnlen , ftnlen, ftnlen, ftnlen, ftnlen);
static integer ia, na, nc, im, in, ms, ns;
static char tranas[1], transa[1];
static real errmax;
extern int smake_();
extern int smmch_();
extern logical lseres_();
extern /* Subroutine */ int cstrmm_(), cstrsm_();
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cstrmm_(integer*, char*, char*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cstrsm_(integer*, char*, char*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
static integer laa, icd, lbb, lda, ldb, ics;
static real als;
static integer ict, icu;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests STRMM and STRSM. */
@ -2155,24 +1955,8 @@ L160:
} /* schk3_ */
/* Subroutine */ void sprcn3_(nout, nc, sname, iorder, side, uplo, transa,
diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len,
transa_len, diag_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *side, *uplo, *transa, *diag;
integer *m, *n;
real *alpha;
integer *lda, *ldb;
ftnlen sname_len;
ftnlen side_len;
ftnlen uplo_len;
ftnlen transa_len;
ftnlen diag_len;
/* Subroutine */ void sprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, real* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char ca[14], cd[14], cs[14], cu[14], crc[14];
@ -2210,21 +1994,7 @@ ftnlen diag_len;
} /* sprcn3_ */
/* Subroutine */ int schk4_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *nbet;
real *bet;
integer *nmax;
real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk4_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2235,8 +2005,6 @@ ftnlen sname_len;
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
i__3, i__4, i__5;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static real beta;
@ -2253,18 +2021,18 @@ ftnlen sname_len;
static char trans[1];
static logical upper;
static char uplos[1];
extern /* Subroutine */ void sprcn4_();
extern /* Subroutine */ int smake_();
extern /* Subroutine */ int smmch_();
extern /* Subroutine */ void sprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns;
static real errmax;
extern logical lseres_();
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
static char transs[1];
extern /* Subroutine */ int cssyrk_();
extern /* Subroutine */ void cssyrk_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen);
static integer laa, lda, lcc, ldc;
static real als;
static integer ict, icu;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests SSYRK. */
@ -2575,23 +2343,8 @@ L130:
} /* schk4_ */
/* Subroutine */ void sprcn4_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
real *alpha;
integer *lda;
real *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ void sprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, real* alpha, integer* lda, real* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char ca[14], cu[14], crc[14];
@ -2619,21 +2372,7 @@ ftnlen transa_len;
} /* sprcn4_ */
/* Subroutine */ int schk5_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs,
c__, cc, cs, ct, g, w, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *nbet;
real *bet;
integer *nmax;
real *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct, *g, *w;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk5_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* ab, real* aa, real* as, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, real* w, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2643,8 +2382,6 @@ ftnlen sname_len;
/* System generated locals */
integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static integer jjab;
@ -2663,18 +2400,18 @@ ftnlen sname_len;
static logical upper;
static char uplos[1];
static integer ia, ib;
extern /* Subroutine */ void sprcn5_();
extern /* Subroutine */ void sprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
static integer jc, ma, na, nc, ik, in, jj, lj, ks, ns;
static real errmax;
extern logical lseres_();
extern int smake_();
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
static char transs[1];
static integer laa, lbb, lda, lcc, ldb, ldc;
static real als;
static integer ict, icu;
extern /* Subroutine */ int cssyr2k_();
extern logical lse_();
extern int smmch_();
extern /* Subroutine */ void cssyr2k_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern logical lse_(real*, real*, integer*);
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
static real err;
/* Tests SSYR2K. */
@ -3037,23 +2774,8 @@ L160:
} /* schk5_ */
/* Subroutine */ void sprcn5_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
real *alpha;
integer *lda, *ldb;
real *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ void sprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char ca[14], cu[14], crc[14];
@ -3081,19 +2803,7 @@ ftnlen transa_len;
} /* sprcn5_ */
/* Subroutine */ int smake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset,
transl, type_len, uplo_len, diag_len)
char *type__, *uplo, *diag;
integer *m, *n;
real *a;
integer *nmax;
real *aa;
integer *lda;
logical *reset;
real *transl;
ftnlen type_len;
ftnlen uplo_len;
ftnlen diag_len;
/* Subroutine */ int smake_(char* type__, char* uplo, char* diag, integer* m, integer* n, real* a, integer* nmax, real* aa, integer* lda, logical* reset, real* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2;
@ -3102,7 +2812,7 @@ ftnlen diag_len;
/* Local variables */
static integer ibeg, iend;
extern doublereal sbeg_();
extern doublereal sbeg_(logical*);
static logical unit;
static integer i__, j;
static logical lower, upper, gen, tri, sym;
@ -3233,25 +2943,7 @@ ftnlen diag_len;
} /* smake_ */
/* Subroutine */ int smmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb,
beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv,
transa_len, transb_len)
char *transa, *transb;
integer *m, *n, *kk;
real *alpha, *a;
integer *lda;
real *b;
integer *ldb;
real *beta, *c__;
integer *ldc;
real *ct, *g, *cc;
integer *ldcc;
real *eps, *err;
logical *fatal;
integer *nout;
logical *mv;
ftnlen transa_len;
ftnlen transb_len;
/* Subroutine */ int smmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, real* alpha, real* a, integer* lda, real* b, integer* ldb, real* beta, real* c__, integer* ldc, real* ct, real* g, real* cc, integer* ldcc, real* eps, real* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len)
{
/* System generated locals */
@ -3260,8 +2952,7 @@ ftnlen transb_len;
real r__1, r__2;
/* Builtin functions */
double sqrt();
integer s_wsfe(), e_wsfe(), do_fio();
double sqrt(double);
/* Local variables */
static real erri;
@ -3426,9 +3117,7 @@ L150:
} /* smmch_ */
logical lse_(ri, rj, lr)
real *ri, *rj;
integer *lr;
logical lse_(real* ri, real* rj, integer* lr)
{
/* System generated locals */
integer i__1;
@ -3475,13 +3164,7 @@ L30:
} /* lse_ */
logical lseres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
char *type__, *uplo;
integer *m, *n;
real *aa, *as;
integer *lda;
ftnlen type_len;
ftnlen uplo_len;
logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
{
/* System generated locals */
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2;
@ -3572,8 +3255,7 @@ L80:
} /* lseres_ */
doublereal sbeg_(reset)
logical *reset;
doublereal sbeg_(logical* reset)
{
/* System generated locals */
real ret_val;
@ -3625,8 +3307,7 @@ L10:
} /* sbeg_ */
doublereal sdiff_(x, y)
real *x, *y;
doublereal sdiff_(real* x, real* y)
{
/* System generated locals */
real ret_val;

View File

@ -242,250 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
#if 0
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
#endif
/* Common Block Declarations */
@ -502,16 +258,16 @@ static integer c__1 = 1;
static integer c__5 = 5;
static doublereal c_b43 = 1.;
/* Main program */ int main()
/* Main program */ int main(void)
{
/* Initialized data */
static doublereal sfac = 9.765625e-4;
/* Local variables */
extern /* Subroutine */ int check1_(), check2_();
extern /* Subroutine */ int check1_(doublereal*), check2_(doublereal*);
static integer ic;
extern /* Subroutine */ int header_();
extern /* Subroutine */ int header_(void);
/* Test program for the COMPLEX*16 Level 1 CBLAS. */
/* Based upon the original CBLAS test routine together with: */
@ -551,7 +307,7 @@ static doublereal c_b43 = 1.;
exit(0);
} /* MAIN__ */
/* Subroutine */ int header_()
/* Subroutine */ int header_(void)
{
/* Initialized data */
@ -570,8 +326,7 @@ static doublereal c_b43 = 1.;
} /* header_ */
/* Subroutine */ int check1_(sfac)
doublereal *sfac;
/* Subroutine */ int check1_(doublereal* sfac)
{
/* Initialized data */
@ -623,15 +378,15 @@ doublereal *sfac;
/* Local variables */
static integer i__;
extern /* Subroutine */ int ctest_();
extern /* Subroutine */ int ctest_(integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*);
static doublecomplex mwpcs[5], mwpct[5];
extern /* Subroutine */ int zscaltest_(), itest1_(), stest1_();
extern /* Subroutine */ int zscaltest_(integer*, doublereal*, doublecomplex*, integer*), itest1_(integer*, integer*), stest1_(doublereal*, doublereal*, doublereal*, doublereal*);
static doublecomplex cx[8];
extern doublereal dznrm2test_();
extern doublereal dznrm2test_(integer*, doublecomplex*, integer*);
static integer np1;
extern /* Subroutine */ int zdscaltest_();
extern integer izamaxtest_();
extern doublereal dzasumtest_();
extern /* Subroutine */ int zdscaltest_(integer*, doublereal*, doublecomplex*, integer*);
extern integer izamaxtest_(integer*, doublecomplex*, integer*);
extern doublereal dzasumtest_(integer*, doublecomplex*, integer*);
static integer len;
/* .. Parameters .. */
@ -748,8 +503,7 @@ doublereal *sfac;
return 0;
} /* check1_ */
/* Subroutine */ int check2_(sfac)
doublereal *sfac;
/* Subroutine */ int check2_(doublereal* sfac)
{
/* Initialized data */
@ -834,14 +588,14 @@ doublereal *sfac;
/* Local variables */
static doublecomplex cdot[1];
static integer lenx, leny, i__;
extern /* Subroutine */ int ctest_();
extern /* Subroutine */ int ctest_(integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*);
static integer ksize;
static doublecomplex ztemp;
extern /* Subroutine */ int zdotctest_(), zcopytest_();
extern /* Subroutine */ int zdotctest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*), zcopytest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*);
static integer ki;
extern /* Subroutine */ int zdotutest_(), zswaptest_();
extern /* Subroutine */ int zdotutest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*), zswaptest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*);
static integer kn;
extern /* Subroutine */ int zaxpytest_();
extern /* Subroutine */ int zaxpytest_(integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, integer*);
static doublecomplex cx[7], cy[7];
static integer mx, my;
@ -923,20 +677,18 @@ doublereal *sfac;
return 0;
} /* check2_ */
/* Subroutine */ int stest_(len, scomp, strue, ssize, sfac)
integer *len;
doublereal *scomp, *strue, *ssize, *sfac;
/* Subroutine */ int stest_(integer* len, doublereal* scomp, doublereal* strue, doublereal* ssize, doublereal* sfac)
{
/* System generated locals */
integer i__1;
doublereal d__1, d__2, d__3, d__4, d__5;
/* Builtin functions */
integer s_wsfe(), e_wsfe(), do_fio();
integer s_wsfe(void), e_wsfe(void), do_fio(void);
/* Local variables */
static integer i__;
extern doublereal sdiff_();
extern doublereal sdiff_(doublereal*, doublereal*);
static doublereal sd;
/* ********************************* STEST ************************** */
@ -992,11 +744,10 @@ L40:
} /* stest_ */
/* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac)
doublereal *scomp1, *strue1, *ssize, *sfac;
/* Subroutine */ int stest1_(doublereal* scomp1, doublereal* strue1, doublereal* ssize, doublereal* sfac)
{
static doublereal scomp[1], strue[1];
extern /* Subroutine */ int stest_();
extern /* Subroutine */ int stest_(integer*,doublereal*, doublereal*, doublereal*, doublereal*);
/* ************************* STEST1 ***************************** */
@ -1023,8 +774,7 @@ doublereal *scomp1, *strue1, *ssize, *sfac;
return 0;
} /* stest1_ */
doublereal sdiff_(sa, sb)
doublereal *sa, *sb;
doublereal sdiff_(doublereal* sa, doublereal* sb)
{
/* System generated locals */
doublereal ret_val;
@ -1038,10 +788,7 @@ doublereal *sa, *sb;
return ret_val;
} /* sdiff_ */
/* Subroutine */ int ctest_(len, ccomp, ctrue, csize, sfac)
integer *len;
doublecomplex *ccomp, *ctrue, *csize;
doublereal *sfac;
/* Subroutine */ int ctest_(integer* len, doublecomplex* ccomp, doublecomplex* ctrue, doublecomplex* csize, doublereal* sfac)
{
/* System generated locals */
integer i__1, i__2;
@ -1049,7 +796,7 @@ doublereal *sfac;
/* Local variables */
static integer i__;
static doublereal scomp[20], ssize[20], strue[20];
extern /* Subroutine */ int stest_();
extern /* Subroutine */ int stest_(integer*, doublereal*, doublereal*, doublereal*, doublereal*);
/* **************************** CTEST ***************************** */
@ -1087,8 +834,7 @@ doublereal *sfac;
return 0;
} /* ctest_ */
/* Subroutine */ int itest1_(icomp, itrue)
integer *icomp, *itrue;
/* Subroutine */ int itest1_(integer* icomp, integer* itrue)
{
static integer id;

View File

@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Common Block Declarations */
@ -396,7 +273,7 @@ static integer c_n1 = -1;
static integer c__0 = 0;
static logical c_false = FALSE_;
/* Main program */ int main()
/* Main program */ int main(void)
{
/* Initialized data */
@ -414,19 +291,23 @@ static logical c_false = FALSE_;
static logical same;
static integer ninc, nbet, ntra;
static logical rewi;
extern /* Subroutine */ int zchk1_(), zchk2_(), zchk3_(), zchk4_(),
zchk5_(), zchk6_();
extern /* Subroutine */ int zchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int zchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int zchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
extern /* Subroutine */ int zchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
extern /* Subroutine */ int zchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
extern /* Subroutine */ int zchk6_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
static doublecomplex a[4225] /* was [65][65] */;
static doublereal g[65];
static integer i__, j;
extern doublereal ddiff_();
extern doublereal ddiff_(doublereal*, doublereal*);
static integer n;
static logical fatal;
static doublecomplex x[65], y[65], z__[130];
static logical trace;
static integer nidim;
static char snaps[32], trans[1];
extern /* Subroutine */ int zmvch_();
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer isnum;
static logical ltest[17];
static doublecomplex aa[4225];
@ -441,12 +322,12 @@ static logical c_false = FALSE_;
static logical rorder;
static integer layout;
static logical ltestt, tsterr;
extern /* Subroutine */ int cz2chke_();
extern /* Subroutine */ void cz2chke_(char*, ftnlen);
static doublecomplex alf[7];
static integer inc[7], nkb;
static doublecomplex bet[7];
static doublereal eps, err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
char tmpchar;
/* Test program for the DOUBLE PRECISION COMPLEX Level 2 Blas. */
@ -984,22 +865,7 @@ L240:
} /* MAIN__ */
/* Subroutine */ int zchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
doublecomplex *alf;
integer *nbet;
doublecomplex *bet;
integer *ninc, *inc, *nmax, *incmax;
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
doublereal *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1018,27 +884,27 @@ ftnlen sname_len;
static integer i__, m, n;
static doublecomplex alpha;
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
static logical reset;
static integer incxs, incys;
static char trans[1];
extern /* Subroutine */ int zmvch_();
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer ia, ib, ic;
static logical banded;
static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns;
extern /* Subroutine */ int czgbmv_();
extern /* Subroutine */ void czgbmv_(integer*, char*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
static char ctrans[14];
extern /* Subroutine */ int czgemv_();
extern /* Subroutine */ void czgemv_(integer*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
static doublereal errmax;
static doublecomplex transl;
extern logical lzeres_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static char transs[1];
static integer laa, lda;
static doublecomplex als, bls;
static doublereal err;
static integer iku, kls;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
static integer kus;
@ -1451,22 +1317,7 @@ L140:
} /* zchk1_ */
/* Subroutine */ int zchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
doublecomplex *alf;
integer *nbet;
doublecomplex *bet;
integer *ninc, *inc, *nmax, *incmax;
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
doublereal *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1486,27 +1337,28 @@ ftnlen sname_len;
static integer i__, k, n;
static doublecomplex alpha;
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
static integer incxs, incys;
extern /* Subroutine */ int zmvch_();
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static char uplos[1];
static integer ia, ib, ic;
static logical banded;
static integer nc, ik, in;
static logical packed;
static integer nk, ks, ix, iy, ns, lx, ly;
extern /* Subroutine */ int czhbmv_(), czhemv_();
extern /* Subroutine */ void czhbmv_(integer*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
extern /* Subroutine */ void czhemv_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
static doublereal errmax;
static doublecomplex transl;
extern logical lzeres_();
extern /* Subroutine */ int czhpmv_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void czhpmv_(integer*, char*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
static integer laa, lda;
static doublecomplex als, bls;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests CHEMV, CHBMV and CHPMV. */
@ -1909,19 +1761,7 @@ L130:
} /* zchk2_ */
/* Subroutine */ int zchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, xt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax;
doublecomplex *a, *aa, *as, *x, *xx, *xs, *xt;
doublereal *g;
doublecomplex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* xt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1942,13 +1782,13 @@ ftnlen sname_len;
static integer i__, k, n;
static char diags[1];
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
static integer incxs;
static char trans[1];
extern /* Subroutine */ int zmvch_();
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static char uplos[1];
static logical banded;
static integer nc, ik, in;
@ -1957,14 +1797,17 @@ ftnlen sname_len;
static char ctrans[14];
static doublereal errmax;
static doublecomplex transl;
extern logical lzeres_();
extern /* Subroutine */ int cztbmv_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cztbmv_(integer*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
static char transs[1];
extern /* Subroutine */ int cztbsv_(), cztpmv_(), cztrmv_(), cztpsv_(),
cztrsv_();
extern /* Subroutine */ void cztbsv_(integer*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cztpmv_(integer*, char*, char*, char*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cztpsv_(integer*, char*, char*, char*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cztrmv_(integer*, char*, char*, char*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cztrsv_(integer*, char*, char*, char*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
static integer laa, icd, lda, ict, icu;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
@ -2422,21 +2265,7 @@ L130:
} /* zchk3_ */
/* Subroutine */ int zchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *ninc, *inc, *nmax, *incmax;
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
doublereal *g;
doublecomplex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
@ -2450,21 +2279,21 @@ ftnlen sname_len;
static integer i__, j, m, n;
static doublecomplex alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
static logical reset;
static integer incxs, incys;
extern /* Subroutine */ int zmvch_();
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly;
extern /* Subroutine */ int czgerc_();
extern /* Subroutine */ void czgerc_(integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*);
static doublereal errmax;
extern /* Subroutine */ int czgeru_();
extern /* Subroutine */ void czgeru_(integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*);
static doublecomplex transl;
extern logical lzeres_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static integer laa, lda;
static doublecomplex als;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
@ -2793,21 +2622,7 @@ L150:
} /* zchk4_ */
/* Subroutine */ int zchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *ninc, *inc, *nmax, *incmax;
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
doublereal *g;
doublecomplex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2827,13 +2642,14 @@ ftnlen sname_len;
static integer i__, j, n;
static doublecomplex alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int czher_();
extern /* Subroutine */ void czher_(integer*, char*, integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen);
static logical reset;
static char cuplo[14];
static integer incxs;
extern /* Subroutine */ int czhpr_(), zmvch_();
extern /* Subroutine */ void czhpr_(integer*, char*, integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, ftnlen);
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static logical upper;
static char uplos[1];
static integer ia, ja, ic, nc, jj, lj, in;
@ -2841,10 +2657,10 @@ ftnlen sname_len;
static integer ix, ns, lx;
static doublereal ralpha, errmax;
static doublecomplex transl;
extern logical lzeres_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static integer laa, lda;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests ZHER and ZHPR. */
@ -3167,21 +2983,7 @@ L130:
} /* zchk5_ */
/* Subroutine */ int zchk6_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *ninc, *inc, *nmax, *incmax;
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
doublereal *g;
doublecomplex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk6_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -3201,25 +3003,26 @@ ftnlen sname_len;
static integer i__, j, n;
static doublecomplex alpha, w[2];
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
static integer incxs, incys;
extern /* Subroutine */ int zmvch_();
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static logical upper;
static char uplos[1];
extern /* Subroutine */ int czher2_(), czhpr2_();
extern /* Subroutine */ void czher2_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen);
extern /* Subroutine */ void czhpr2_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, ftnlen);
static integer ia, ja, ic, nc, jj, lj, in;
static logical packed;
static integer ix, iy, ns, lx, ly;
static doublereal errmax;
static doublecomplex transl;
extern logical lzeres_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static integer laa, lda;
static doublecomplex als;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests ZHER2 and ZHPR2. */
@ -3604,24 +3407,7 @@ L170:
} /* zchk6_ */
/* Subroutine */ int zmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y,
incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len)
char *trans;
integer *m, *n;
doublecomplex *alpha, *a;
integer *nmax;
doublecomplex *x;
integer *incx;
doublecomplex *beta, *y;
integer *incy;
doublecomplex *yt;
doublereal *g;
doublecomplex *yy;
doublereal *eps, *err;
logical *fatal;
integer *nout;
logical *mv;
ftnlen trans_len;
/* Subroutine */ int zmvch_(char* trans, integer* m, integer* n, doublecomplex* alpha, doublecomplex* a, integer* nmax, doublecomplex* x, integer* incx, doublecomplex* beta, doublecomplex* y, integer* incy, doublecomplex* yt, doublereal* g, doublecomplex* yy, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len)
{
/* System generated locals */
@ -3819,9 +3605,7 @@ L80:
} /* zmvch_ */
logical lze_(ri, rj, lr)
doublecomplex *ri, *rj;
integer *lr;
logical lze_(doublecomplex* ri, doublecomplex* rj, integer* lr)
{
/* System generated locals */
integer i__1, i__2, i__3;
@ -3868,13 +3652,7 @@ L30:
} /* lze_ */
logical lzeres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
char *type__, *uplo;
integer *m, *n;
doublecomplex *aa, *as;
integer *lda;
ftnlen type_len;
ftnlen uplo_len;
logical lzeres_(char* type__, char* uplo, integer* m, integer* n, doublecomplex* aa, doublecomplex* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
{
/* System generated locals */
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4;
@ -3967,9 +3745,7 @@ L80:
} /* lzeres_ */
/* Double Complex */ VOID zbeg_( ret_val, reset)
doublecomplex * ret_val;
logical *reset;
/* Double Complex */ VOID zbeg_( doublecomplex* ret_val, logical* reset)
{
/* System generated locals */
doublereal d__1, d__2;
@ -4030,8 +3806,7 @@ L10:
} /* zbeg_ */
doublereal ddiff_(x, y)
doublereal *x, *y;
doublereal ddiff_(doublereal* x, doublereal* y)
{
/* System generated locals */
doublereal ret_val;
@ -4051,19 +3826,7 @@ doublereal *x, *y;
} /* ddiff_ */
/* Subroutine */ int zmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl,
ku, reset, transl, type_len, uplo_len, diag_len)
char *type__, *uplo, *diag;
integer *m, *n;
doublecomplex *a;
integer *nmax;
doublecomplex *aa;
integer *lda, *kl, *ku;
logical *reset;
doublecomplex *transl;
ftnlen type_len;
ftnlen uplo_len;
ftnlen diag_len;
/* Subroutine */ int zmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublecomplex* a, integer* nmax, doublecomplex* aa, integer* lda, integer* kl, integer* ku, logical* reset, doublecomplex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
@ -4072,7 +3835,7 @@ ftnlen diag_len;
/* Local variables */
static integer ibeg, iend, ioff;
extern /* Double Complex */ VOID zbeg_();
extern /* Double Complex */ VOID zbeg_(doublecomplex*, logical*);
static logical unit;
static integer i__, j;
static logical lower;

View File

@ -22,14 +22,11 @@ typedef double doublereal;
typedef struct { real r, i; } complex;
typedef struct { doublereal r, i; } doublecomplex;
#ifdef _MSC_VER
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
#else
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
#endif
#define pCf(z) (*_pCf(z))
@ -242,124 +239,7 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* Common Block Declarations */
@ -388,7 +268,7 @@ static logical c_true = TRUE_;
static integer c__0 = 0;
static logical c_false = FALSE_;
/* Main program MAIN__() */ int main()
/* Main program MAIN__() */ int main(void)
{
/* Initialized data */
@ -400,26 +280,29 @@ static logical c_false = FALSE_;
doublereal d__1;
/* Builtin functions */
integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(),
e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe();
integer s_rsle(void), do_lio(void), e_rsle(void), f_open(void), s_wsfe(void), do_fio(void),
e_wsfe(void), s_wsle(void), e_wsle(void), s_rsfe(void), e_rsfe(void);
/* Local variables */
static integer nalf, idim[9];
static logical same;
static integer nbet, ntra;
static logical rewi;
extern /* Subroutine */ int zchk1_(), zchk2_(), zchk3_(), zchk4_(),
zchk5_();
extern /* Subroutine */ int zchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int zchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int zchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
extern /* Subroutine */ int zchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int zchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
static doublecomplex c__[4225] /* was [65][65] */;
static doublereal g[65];
static integer i__, j;
extern doublereal ddiff_();
extern doublereal ddiff_(doublereal*, doublereal*);
static integer n;
static logical fatal;
static doublecomplex w[130];
static logical trace;
static integer nidim;
extern /* Subroutine */ int zmmch_();
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static char snaps[32];
static integer isnum;
static logical ltest[9];
@ -431,10 +314,10 @@ static logical c_false = FALSE_;
static logical rorder;
static integer layout;
static logical ltestt, tsterr;
extern /* Subroutine */ int cz3chke_();
extern /* Subroutine */ int cz3chke_(char*, ftnlen);
static doublecomplex alf[7], bet[7];
static doublereal eps, err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
char tmpchar;
/* Test program for the COMPLEX*16 Level 3 Blas. */
@ -924,22 +807,7 @@ L230:
} /* MAIN__ */
/* Subroutine */ int zchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *nbet;
doublecomplex *bet;
integer *nmax;
doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct;
doublereal *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -956,21 +824,21 @@ ftnlen sname_len;
static integer i__, k, m, n;
static doublecomplex alpha;
static logical isame[13], trana, tranb;
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int zmmch_();
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical reset;
static integer ia, ib;
extern /* Subroutine */ int zprcn1_();
extern /* Subroutine */ int zprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
static integer ma, mb, na, nb, nc, ik, im, in, ks, ms, ns;
extern /* Subroutine */ int czgemm_();
extern /* Subroutine */ void czgemm_(integer*, char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static char tranas[1], tranbs[1], transa[1], transb[1];
static doublereal errmax;
extern logical lzeres_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc;
static doublecomplex als, bls;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests ZGEMM. */
@ -1313,20 +1181,7 @@ L130:
} /* zchk1_ */
/* Subroutine */ int zprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k,
alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *transa, *transb;
integer *m, *n, *k;
doublecomplex *alpha;
integer *lda, *ldb;
doublecomplex *beta;
integer *ldc;
ftnlen sname_len;
ftnlen transa_len;
ftnlen transb_len;
/* Subroutine */ int zprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len)
{
/* Local variables */
static char crc[14], cta[14], ctb[14];
@ -1357,22 +1212,7 @@ return 0;
} /* zprcn1_ */
/* Subroutine */ int zchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *nbet;
doublecomplex *bet;
integer *nmax;
doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct;
doublereal *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1394,23 +1234,23 @@ ftnlen sname_len;
static doublecomplex alpha;
static logical isame[13];
static char sides[1];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int zmmch_();
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical reset;
static char uplos[1];
static integer ia, ib;
extern /* Subroutine */ int zprcn2_();
extern /* Subroutine */ int zprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
static integer na, nc, im, in, ms, ns;
extern /* Subroutine */ int czhemm_();
extern /* Subroutine */ void czhemm_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static doublereal errmax;
extern logical lzeres_();
extern /* Subroutine */ int czsymm_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void czsymm_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static integer laa, lbb, lda, lcc, ldb, ldc, ics;
static doublecomplex als, bls;
static integer icu;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests ZHEMM and ZSYMM. */
@ -1737,20 +1577,7 @@ L120:
} /* zchk2_ */
/* Subroutine */ int zprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha,
lda, ldb, beta, ldc, sname_len, side_len, uplo_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *side, *uplo;
integer *m, *n;
doublecomplex *alpha;
integer *lda, *ldb;
doublecomplex *beta;
integer *ldc;
ftnlen sname_len;
ftnlen side_len;
ftnlen uplo_len;
/* Subroutine */ int zprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len)
{
/* Local variables */
static char cs[14], cu[14], crc[14];
@ -1777,21 +1604,7 @@ return 0;
} /* zprcn2_ */
/* Subroutine */ int zchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__,
iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *nmax;
doublecomplex *a, *aa, *as, *b, *bb, *bs, *ct;
doublereal *g;
doublecomplex *c__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* ct, doublereal* g, doublecomplex* c__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1817,23 +1630,24 @@ ftnlen sname_len;
static char diags[1];
static logical isame[13];
static char sides[1];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int zmmch_();
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical reset;
static char uplos[1];
static integer ia, na;
extern /* Subroutine */ int zprcn3_();
extern /* Subroutine */ int zprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, ftnlen, ftnlen, ftnlen, ftnlen, ftnlen);
static integer nc, im, in, ms, ns;
static char tranas[1], transa[1];
static doublereal errmax;
extern logical lzeres_();
extern /* Subroutine */ int cztrmm_(), cztrsm_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cztrmm_(integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cztrsm_(integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
static integer laa, icd, lbb, lda, ldb, ics;
static doublecomplex als;
static integer ict, icu;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests ZTRMM and ZTRSM. */
@ -2227,21 +2041,7 @@ L160:
} /* zchk3_ */
/* Subroutine */ int zprcn3_(nout, nc, sname, iorder, side, uplo, transa,
diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len,
transa_len, diag_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *side, *uplo, *transa, *diag;
integer *m, *n;
doublecomplex *alpha;
integer *lda, *ldb;
ftnlen sname_len;
ftnlen side_len;
ftnlen uplo_len;
ftnlen transa_len;
ftnlen diag_len;
/* Subroutine */ int zprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, doublecomplex* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len)
{
/* Local variables */
@ -2281,22 +2081,7 @@ return 0;
} /* zprcn3_ */
/* Subroutine */ int zchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *nbet;
doublecomplex *bet;
integer *nmax;
doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct;
doublereal *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2320,30 +2105,30 @@ ftnlen sname_len;
static doublecomplex alpha;
static doublereal rbeta;
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int zmmch_();
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static doublereal rbets;
static logical reset;
static char trans[1];
static logical upper;
static char uplos[1];
static integer ia, ib, jc, ma, na;
extern /* Subroutine */ int zprcn4_();
extern /* Subroutine */ int zprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
static integer nc;
extern /* Subroutine */ int zprcn6_();
extern /* Subroutine */ int zprcn6_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static integer ik, in, jj, lj, ks, ns;
static doublereal ralpha;
extern /* Subroutine */ int czherk_();
extern /* Subroutine */ int czherk_(integer*, char*, char*, integer*, integer*, doublereal*, doublecomplex*, integer*, doublereal*, doublecomplex*, integer*, ftnlen, ftnlen);
static doublereal errmax;
extern logical lzeres_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static char transs[1], transt[1];
extern /* Subroutine */ int czsyrk_();
extern /* Subroutine */ int czsyrk_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static integer laa, lda, lcc, ldc;
static doublecomplex als;
static integer ict, icu;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests ZHERK and ZSYRK. */
@ -2732,20 +2517,7 @@ L130:
} /* zchk4_ */
/* Subroutine */ int zprcn4_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
doublecomplex *alpha;
integer *lda;
doublecomplex *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ int zprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Local variables */
static char ca[14], cu[14], crc[14];
@ -2775,20 +2547,7 @@ return 0;
/* Subroutine */ int zprcn6_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
doublereal *alpha;
integer *lda;
doublereal *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ int zprcn6_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Local variables */
@ -2818,23 +2577,7 @@ return 0;
} /* zprcn6_ */
/* Subroutine */ int zchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs,
c__, cc, cs, ct, g, w, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *nbet;
doublecomplex *bet;
integer *nmax;
doublecomplex *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct;
doublereal *g;
doublecomplex *w;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* ab, doublecomplex* aa, doublecomplex* as, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, doublecomplex* w, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2857,27 +2600,28 @@ ftnlen sname_len;
static doublecomplex alpha;
static doublereal rbeta;
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int zmmch_();
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static doublereal rbets;
static logical reset;
static char trans[1];
static logical upper;
static char uplos[1];
static integer ia, ib, jc, ma, na, nc;
extern /* Subroutine */ int zprcn5_(), zprcn7_();
extern /* Subroutine */ int zprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int zprcn7_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static integer ik, in, jj, lj, ks, ns;
static doublereal errmax;
extern logical lzeres_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static char transs[1], transt[1];
extern /* Subroutine */ int czher2k_();
extern /* Subroutine */ int czher2k_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublereal*, doublecomplex*, integer*, ftnlen, ftnlen);
static integer laa, lbb, lda, lcc, ldb, ldc;
static doublecomplex als;
static integer ict, icu;
extern /* Subroutine */ int czsyr2k_();
extern /* Subroutine */ int czsyr2k_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests ZHER2K and ZSYR2K. */
@ -3349,20 +3093,7 @@ L160:
} /* zchk5_ */
/* Subroutine */ int zprcn5_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
doublecomplex *alpha;
integer *lda, *ldb;
doublecomplex *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ int zprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Local variables */
static char ca[14], cu[14], crc[14];
@ -3392,20 +3123,7 @@ return 0;
/* Subroutine */ int zprcn7_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
doublecomplex *alpha;
integer *lda, *ldb;
doublereal *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ int zprcn7_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Local variables */
@ -3435,19 +3153,7 @@ return 0;
} /* zprcn7_ */
/* Subroutine */ int zmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset,
transl, type_len, uplo_len, diag_len)
char *type__, *uplo, *diag;
integer *m, *n;
doublecomplex *a;
integer *nmax;
doublecomplex *aa;
integer *lda;
logical *reset;
doublecomplex *transl;
ftnlen type_len;
ftnlen uplo_len;
ftnlen diag_len;
/* Subroutine */ int zmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublecomplex* a, integer* nmax, doublecomplex* aa, integer* lda, logical* reset, doublecomplex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
@ -3456,7 +3162,7 @@ ftnlen diag_len;
/* Local variables */
static integer ibeg, iend;
extern /* Double Complex */ VOID zbeg_();
extern /* Double Complex */ VOID zbeg_(doublecomplex*, logical*);
static logical unit;
static integer i__, j;
static logical lower, upper;
@ -3629,27 +3335,7 @@ ftnlen diag_len;
} /* zmake_ */
/* Subroutine */ int zmmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb,
beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv,
transa_len, transb_len)
char *transa, *transb;
integer *m, *n, *kk;
doublecomplex *alpha, *a;
integer *lda;
doublecomplex *b;
integer *ldb;
doublecomplex *beta, *c__;
integer *ldc;
doublecomplex *ct;
doublereal *g;
doublecomplex *cc;
integer *ldcc;
doublereal *eps, *err;
logical *fatal;
integer *nout;
logical *mv;
ftnlen transa_len;
ftnlen transb_len;
/* Subroutine */ int zmmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, doublecomplex* alpha, doublecomplex* a, integer* lda, doublecomplex* b, integer* ldb, doublecomplex* beta, doublecomplex* c__, integer* ldc, doublecomplex* ct, doublereal* g, doublecomplex* cc, integer* ldcc, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len)
{
/* System generated locals */
@ -3658,7 +3344,7 @@ ftnlen transb_len;
doublereal d__1, d__2, d__3, d__4, d__5, d__6;
doublecomplex z__1, z__2, z__3, z__4;
double sqrt();
double sqrt(double);
/* Local variables */
static doublereal erri;
static integer i__, j, k;
@ -4031,9 +3717,7 @@ L250:
} /* zmmch_ */
logical lze_(ri, rj, lr)
doublecomplex *ri, *rj;
integer *lr;
logical lze_(doublecomplex* ri, doublecomplex* rj, integer* lr)
{
/* System generated locals */
integer i__1, i__2, i__3;
@ -4082,13 +3766,7 @@ L30:
} /* lze_ */
logical lzeres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
char *type__, *uplo;
integer *m, *n;
doublecomplex *aa, *as;
integer *lda;
ftnlen type_len;
ftnlen uplo_len;
logical lzeres_(char* type__, char* uplo, integer* m, integer* n, doublecomplex *aa, doublecomplex* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
{
/* System generated locals */
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4;
@ -4184,9 +3862,7 @@ L80:
} /* lzeres_ */
/* Double Complex */ VOID zbeg_( ret_val, reset)
doublecomplex * ret_val;
logical *reset;
/* Double Complex */ VOID zbeg_(doublecomplex* ret_val, logical* reset)
{
/* System generated locals */
doublereal d__1, d__2;
@ -4249,8 +3925,7 @@ L10:
} /* zbeg_ */
doublereal ddiff_(x, y)
doublereal *x, *y;
doublereal ddiff_(doublereal* x, doublereal* y)
{
/* System generated locals */
doublereal ret_val;

View File

@ -40,7 +40,7 @@
#include <stdlib.h>
#include "common.h"
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG nthreads) {
blas_queue_t queue[MAX_CPU_NUMBER];
BLASLONG range[MAX_CPU_NUMBER + 1];

View File

@ -60,7 +60,7 @@ static const int divide_rule[][2] =
{ 1, 61}, { 2, 31}, { 7, 9}, { 8, 8},
};
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG nthreads) {
blas_queue_t queue[MAX_CPU_NUMBER];

View File

@ -40,7 +40,7 @@
#include <stdlib.h>
#include "common.h"
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *sa, void *sb, BLASLONG nthreads) {
blas_queue_t queue[MAX_CPU_NUMBER];
BLASLONG range[MAX_CPU_NUMBER + 1];

View File

@ -42,7 +42,7 @@
int CNAME(int mode,
blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
int (*function)(), void *sa, void *sb, BLASLONG divM, BLASLONG divN) {
int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG divM, BLASLONG divN) {
blas_queue_t queue[MAX_CPU_NUMBER];

View File

@ -41,7 +41,7 @@
#include <math.h>
#include "common.h"
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*, FLOAT *, FLOAT *, BLASLONG), void *sa, void *sb, BLASLONG nthreads) {
blas_queue_t queue[MAX_CPU_NUMBER];
BLASLONG range[MAX_CPU_NUMBER + 1];

View File

@ -43,7 +43,7 @@
int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
void *a, BLASLONG lda,
void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int nthreads){
void *c, BLASLONG ldc, int (*function)(void), int nthreads){
blas_queue_t queue[MAX_CPU_NUMBER];
blas_arg_t args [MAX_CPU_NUMBER];
@ -141,7 +141,7 @@ int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha
int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
void *a, BLASLONG lda,
void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int nthreads){
void *c, BLASLONG ldc, int (*function)(void), int nthreads){
blas_queue_t queue[MAX_CPU_NUMBER];
blas_arg_t args [MAX_CPU_NUMBER];

View File

@ -93,7 +93,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#endif
extern unsigned int openblas_thread_timeout();
extern unsigned int openblas_thread_timeout(void);
#ifdef SMP_SERVER

View File

@ -70,7 +70,7 @@
int blas_server_avail = 0;
int blas_omp_number_max = 0;
extern int openblas_omp_adaptive_env();
extern int openblas_omp_adaptive_env(void);
static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
#ifdef HAVE_C11
@ -79,7 +79,7 @@ static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
static _Bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
#endif
static void adjust_thread_buffers() {
static void adjust_thread_buffers(void) {
int i=0, j=0;
@ -124,8 +124,17 @@ void openblas_set_num_threads(int num_threads) {
}
int blas_thread_init(void){
if(blas_omp_number_max <= 0)
blas_omp_number_max = omp_get_max_threads();
#if defined(__FreeBSD__) && defined(__clang__)
extern int openblas_omp_num_threads_env(void);
if(blas_omp_number_max <= 0)
blas_omp_number_max= openblas_omp_num_threads_env();
if (blas_omp_number_max <= 0)
blas_omp_number_max=MAX_CPU_NUMBER;
#else
blas_omp_number_max = omp_get_max_threads();
#endif
blas_get_cpu_number();

View File

@ -805,7 +805,8 @@ static gotoblas_t *get_coretype(void){
}
return NULL;
}
case 0xf:
break;
case 0xf:
if (model <= 0x2) return &gotoblas_NORTHWOOD;
return &gotoblas_PRESCOTT;
}

View File

@ -3,7 +3,7 @@
extern gotoblas_t gotoblas_POWER6;
extern gotoblas_t gotoblas_POWER8;
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
extern gotoblas_t gotoblas_POWER9;
#endif
#ifdef HAVE_P10_SUPPORT
@ -20,14 +20,14 @@ static char *corename[] = {
"POWER10"
};
#define NUM_CORETYPES 4
#define NUM_CORETYPES 5
char *gotoblas_corename(void) {
#ifndef C_PGI
if (gotoblas == &gotoblas_POWER6) return corename[1];
#endif
if (gotoblas == &gotoblas_POWER8) return corename[2];
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
if (gotoblas == &gotoblas_POWER9) return corename[3];
#endif
#ifdef HAVE_P10_SUPPORT
@ -36,13 +36,37 @@ char *gotoblas_corename(void) {
return corename[0];
}
#if defined(__clang__)
static int __builtin_cpu_supports(char* arg)
{
return 0;
}
#endif
#define CPU_UNKNOWN 0
#define CPU_POWER5 5
#define CPU_POWER6 6
#define CPU_POWER8 8
#define CPU_POWER9 9
#define CPU_POWER10 10
#ifdef _AIX
#include <sys/systemcfg.h>
static int cpuid(void)
{
int arch = _system_configuration.implementation;
#ifdef POWER_6
if (arch == POWER_6) return CPU_POWER6;
#endif
#ifdef POWER_7
else if (arch == POWER_7) return CPU_POWER6;
#endif
#ifdef POWER_8
else if (arch == POWER_8) return CPU_POWER8;
#endif
#ifdef POWER_9
else if (arch == POWER_9) return CPU_POWER9;
#endif
#ifdef POWER_10
else if (arch == POWER_10) return CPU_POWER10;
#endif
return CPU_UNKNOWN;
}
#else
#if defined(C_PGI) || defined(__clang__)
/*
* NV HPC compilers do not yet implement __builtin_cpu_is().
@ -53,21 +77,12 @@ static int __builtin_cpu_supports(char* arg)
* what was requested.
*/
#include <string.h>
/*
* Define POWER processor version table.
*
* NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
*/
#define CPU_UNKNOWN 0
#define CPU_POWER5 5
#define CPU_POWER6 6
#define CPU_POWER8 8
#define CPU_POWER9 9
#define CPU_POWER10 10
static struct {
uint32_t pvr_mask;
uint32_t pvr_value;
@ -160,7 +175,8 @@ static struct {
},
};
static int __builtin_cpu_is(const char *cpu) {
static int cpuid(void)
{
int i;
uint32_t pvr;
uint32_t cpu_type;
@ -178,15 +194,46 @@ static int __builtin_cpu_is(const char *cpu) {
pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type);
#endif
cpu_type = pvrPOWER[i].cpu_type;
if (!strcmp(cpu, "power8"))
return cpu_type == CPU_POWER8;
if (!strcmp(cpu, "power9"))
return cpu_type == CPU_POWER9;
return 0;
return (int)(cpu_type);
}
#endif /* C_PGI */
#endif /* _AIX */
#ifndef __BUILTIN_CPU_SUPPORTS__
#include <string.h>
#if defined(_AIX) || (defined(__has_builtin) && !__has_builtin(__builtin_cpu_is))
static int __builtin_cpu_is(const char *arg)
{
static int ipinfo = -1;
if (ipinfo < 0) {
ipinfo = cpuid();
}
#ifdef HAVE_P10_SUPPORT
if (ipinfo == CPU_POWER10) {
if (!strcmp(arg, "power10")) return 1;
}
#endif
if (ipinfo == CPU_POWER9) {
if (!strcmp(arg, "power9")) return 1;
} else if (ipinfo == CPU_POWER8) {
if (!strcmp(arg, "power8")) return 1;
#ifndef C_PGI
} else if (ipinfo == CPU_POWER6) {
if (!strcmp(arg, "power6")) return 1;
#endif
}
return 0;
}
#endif
#if defined(_AIX) || (defined(__has_builtin) && !__has_builtin(__builtin_cpu_supports))
static int __builtin_cpu_supports(const char *arg)
{
return 0;
}
#endif
#endif
static gotoblas_t *get_coretype(void) {
@ -196,19 +243,23 @@ static gotoblas_t *get_coretype(void) {
#endif
if (__builtin_cpu_is("power8"))
return &gotoblas_POWER8;
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
if (__builtin_cpu_is("power9"))
return &gotoblas_POWER9;
#endif
#ifdef HAVE_P10_SUPPORT
#if defined(_AIX) || defined(__clang__)
if (__builtin_cpu_is("power10"))
#else
if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma"))
#endif
return &gotoblas_POWER10;
#endif
/* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */
#if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
if (__builtin_cpu_is("power10"))
return &gotoblas_POWER9;
#endif
#endif
return NULL;
}
@ -233,7 +284,7 @@ static gotoblas_t *force_coretype(char * coretype) {
case 1: return (&gotoblas_POWER6);
#endif
case 2: return (&gotoblas_POWER8);
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
case 3: return (&gotoblas_POWER9);
#endif
#ifdef HAVE_P10_SUPPORT

View File

@ -13,7 +13,7 @@ extern gotoblas_t gotoblas_Z14;
#define NUM_CORETYPES 4
extern int openblas_verbose();
extern int openblas_verbose(void);
extern void openblas_warning(int verbose, const char* msg);
char* gotoblas_corename(void) {

View File

@ -73,6 +73,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
#define NEW_BUFFERS 512
#ifndef likely
#ifdef __GNUC__
#define likely(x) __builtin_expect(!!(x), 1)
@ -426,9 +427,9 @@ int goto_get_num_procs (void) {
return blas_cpu_number;
}
static void blas_memory_init();
static void blas_memory_init(void);
void openblas_fork_handler()
void openblas_fork_handler(void)
{
// This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is
// built with "make USE_OPENMP=0".
@ -445,9 +446,9 @@ void openblas_fork_handler()
#endif
}
extern int openblas_num_threads_env();
extern int openblas_goto_num_threads_env();
extern int openblas_omp_num_threads_env();
extern int openblas_num_threads_env(void);
extern int openblas_goto_num_threads_env(void);
extern int openblas_omp_num_threads_env(void);
int blas_get_cpu_number(void){
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_HAIKU)
@ -591,7 +592,7 @@ static BLASULONG key_lock = 0UL;
#endif
/* Returns a pointer to the start of the per-thread memory allocation data */
static __inline struct alloc_t ** get_memory_table() {
static __inline struct alloc_t ** get_memory_table(void) {
#if defined(SMP)
LOCK_COMMAND(&key_lock);
lsk=local_storage_key;
@ -1144,7 +1145,7 @@ static void blas_memory_cleanup(void* ptr){
}
}
static void blas_memory_init(){
static void blas_memory_init(void){
#if defined(SMP)
# if defined(OS_WINDOWS)
local_storage_key = TlsAlloc();
@ -1501,7 +1502,7 @@ static void gotoblas_memory_init(void) {
/* Initialization for all function; this function should be called before main */
static int gotoblas_initialized = 0;
extern void openblas_read_env();
extern void openblas_read_env(void);
void CONSTRUCTOR gotoblas_init(void) {
@ -1998,7 +1999,7 @@ int goto_get_num_procs (void) {
return blas_cpu_number;
}
void openblas_fork_handler()
void openblas_fork_handler(void)
{
// This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is
// built with "make USE_OPENMP=0".
@ -2015,9 +2016,9 @@ void openblas_fork_handler()
#endif
}
extern int openblas_num_threads_env();
extern int openblas_goto_num_threads_env();
extern int openblas_omp_num_threads_env();
extern int openblas_num_threads_env(void);
extern int openblas_goto_num_threads_env(void);
extern int openblas_omp_num_threads_env(void);
int blas_get_cpu_number(void){
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_HAIKU)
@ -2897,7 +2898,7 @@ void *blas_memory_alloc(int procpos){
#endif
position ++;
} while (position < 512+NUM_BUFFERS);
} while (position < NEW_BUFFERS + NUM_BUFFERS);
}
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
@ -3012,11 +3013,12 @@ void *blas_memory_alloc(int procpos){
if (memory_overflowed) goto terminate;
fprintf(stderr,"OpenBLAS warning: precompiled NUM_THREADS exceeded, adding auxiliary array for thread metadata.\n");
fprintf(stderr,"To avoid this warning, please rebuild your copy of OpenBLAS with a larger NUM_THREADS setting\n");
fprintf(stderr,"or set the environment variable OPENBLAS_NUM_THREADS to %d or lower\n", NUM_BUFFERS);
fprintf(stderr,"or set the environment variable OPENBLAS_NUM_THREADS to %d or lower\n", MAX_CPU_NUMBER);
memory_overflowed=1;
new_release_info = (struct release_t*) malloc(512*sizeof(struct release_t));
newmemory = (struct newmemstruct*) malloc(512*sizeof(struct newmemstruct));
for (i = 0; i < 512; i++) {
MB;
new_release_info = (struct release_t*) malloc(NEW_BUFFERS * sizeof(struct release_t));
newmemory = (struct newmemstruct*) malloc(NEW_BUFFERS * sizeof(struct newmemstruct));
for (i = 0; i < NEW_BUFFERS; i++) {
newmemory[i].addr = (void *)0;
#if defined(WHEREAMI) && !defined(USE_OPENMP)
newmemory[i].pos = -1;
@ -3129,12 +3131,12 @@ void blas_memory_free(void *free_area){
printf(" Position : %d\n", position);
#endif
if (unlikely(memory_overflowed && position >= NUM_BUFFERS)) {
while ((position < NUM_BUFFERS+512) && (newmemory[position-NUM_BUFFERS].addr != free_area))
while ((position < NUM_BUFFERS+NEW_BUFFERS) && (newmemory[position-NUM_BUFFERS].addr != free_area))
position++;
// arm: ensure all writes are finished before other thread takes this memory
WMB;
newmemory[position].used = 0;
if (position - NUM_BUFFERS >= NEW_BUFFERS) goto error;
newmemory[position-NUM_BUFFERS].used = 0;
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#endif
@ -3213,7 +3215,7 @@ void blas_shutdown(void){
memory[pos].lock = 0;
}
if (memory_overflowed)
for (pos = 0; pos < 512; pos ++){
for (pos = 0; pos < NEW_BUFFERS; pos ++){
newmemory[pos].addr = (void *)0;
newmemory[pos].used = 0;
#if defined(WHEREAMI) && !defined(USE_OPENMP)
@ -3337,7 +3339,7 @@ static void gotoblas_memory_init(void) {
/* Initialization for all function; this function should be called before main */
static int gotoblas_initialized = 0;
extern void openblas_read_env();
extern void openblas_read_env(void);
void CONSTRUCTOR gotoblas_init(void) {

View File

@ -288,7 +288,7 @@ int goto_get_num_procs (void) {
return blas_cpu_number;
}
void openblas_fork_handler()
void openblas_fork_handler(void)
{
// This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is
// built with "make USE_OPENMP=0".
@ -305,9 +305,9 @@ void openblas_fork_handler()
#endif
}
extern int openblas_num_threads_env();
extern int openblas_goto_num_threads_env();
extern int openblas_omp_num_threads_env();
extern int openblas_num_threads_env(void);
extern int openblas_goto_num_threads_env(void);
extern int openblas_omp_num_threads_env(void);
int blas_get_cpu_number(void){
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)

View File

@ -41,15 +41,15 @@ static int openblas_env_goto_num_threads=0;
static int openblas_env_omp_num_threads=0;
static int openblas_env_omp_adaptive=0;
int openblas_verbose() { return openblas_env_verbose;}
unsigned int openblas_thread_timeout() { return openblas_env_thread_timeout;}
int openblas_block_factor() { return openblas_env_block_factor;}
int openblas_num_threads_env() { return openblas_env_openblas_num_threads;}
int openblas_goto_num_threads_env() { return openblas_env_goto_num_threads;}
int openblas_omp_num_threads_env() { return openblas_env_omp_num_threads;}
int openblas_omp_adaptive_env() { return openblas_env_omp_adaptive;}
int openblas_verbose(void) { return openblas_env_verbose;}
unsigned int openblas_thread_timeout(void) { return openblas_env_thread_timeout;}
int openblas_block_factor(void) { return openblas_env_block_factor;}
int openblas_num_threads_env(void) { return openblas_env_openblas_num_threads;}
int openblas_goto_num_threads_env(void) { return openblas_env_goto_num_threads;}
int openblas_omp_num_threads_env(void) { return openblas_env_omp_num_threads;}
int openblas_omp_adaptive_env(void) { return openblas_env_omp_adaptive;}
void openblas_read_env() {
void openblas_read_env(void) {
int ret=0;
env_var_t p;
if (readenv(p,"OPENBLAS_VERBOSE")) ret = atoi(p);

View File

@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
extern int openblas_verbose();
extern int openblas_verbose(void);
void openblas_warning(int verbose, const char * msg) {
int current_verbose;

View File

@ -69,13 +69,13 @@ static char* openblas_config_str=""
;
#ifdef DYNAMIC_ARCH
char *gotoblas_corename();
char *gotoblas_corename(void);
#endif
static char tmp_config_str[256];
int openblas_get_parallel();
int openblas_get_parallel(void);
char* CNAME() {
char* CNAME(void) {
char tmpstr[20];
strcpy(tmp_config_str, openblas_config_str);
#ifdef DYNAMIC_ARCH
@ -90,7 +90,7 @@ char tmpstr[20];
}
char* openblas_get_corename() {
char* openblas_get_corename(void) {
#ifndef DYNAMIC_ARCH
return CHAR_CORENAME;
#else

View File

@ -42,17 +42,17 @@ static int parallel = 0;
#ifdef NEEDBUNDERSCORE
int CNAME() {
int CNAME(void) {
return parallel;
}
int NAME() {
int NAME(void) {
return parallel;
}
#else
//The CNAME and NAME are the same.
int NAME() {
int NAME(void) {
return parallel;
}
#endif

View File

@ -40,7 +40,7 @@
#include <string.h>
#include "common.h"
extern int openblas_block_factor();
extern int openblas_block_factor(void);
int get_L2_size(void);
#define DEFAULT_GEMM_P 128

View File

@ -272,23 +272,23 @@ static : ../$(LIBNAME)
rm -f goto.$(SUFFIX)
osx.def : $(GENSYM) ../Makefile.system ../getarch.c
./$(GENSYM) osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
./$(GENSYM) osx $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
aix.def : $(GENSYM) ../Makefile.system ../getarch.c
./$(GENSYM) aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
./$(GENSYM) aix $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
objcopy.def : $(GENSYM) ../Makefile.system ../getarch.c
./$(GENSYM) objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
./$(GENSYM) objcopy $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
objconv.def : $(GENSYM) ../Makefile.system ../getarch.c
./$(GENSYM) objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
./$(GENSYM) objconv $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
test : linktest.c
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
rm -f linktest
linktest.c : $(GENSYM) ../Makefile.system ../getarch.c
./$(GENSYM) linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c
./$(GENSYM) linktest $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c
clean ::
@rm -f *.def *.dylib __.SYMDEF* *.renamed

18
f_check
View File

@ -117,6 +117,9 @@ else
vendor=PGI
openmp='-mp'
;;
*xlf*)
vendor=IBM
;;
*)
vendor=G77
openmp=''
@ -155,6 +158,10 @@ else
*'IBM XL'*)
vendor=IBM
openmp='-openmp'
case "$CC" in *gcc*)
bu=_
;;
esac
;;
*NAG*)
vendor=NAG
@ -223,6 +230,10 @@ else
*ppuf*|*xlf*)
vendor=IBM
openmp='-openmp'
case "$CC" in *gcc*)
bu=_
;;
esac
;;
*open64*)
vendor=OPEN64
@ -362,13 +373,6 @@ if [ -n "$link" ]; then
;;
esac
case "$flag" in *-lgomp*)
case "$CC" in *clang*)
flag="-lomp"
;;
esac
esac
case "$flag" in -l*)
case "$flag" in
*ibrary*|*gfortranbegin*|*flangmain*|*frtbegin*|*pathfstart*|\

View File

@ -1301,7 +1301,7 @@ xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c
ifeq ($(BUILD_BFLOAT16),1)
sbgemm.$(SUFFIX) sbgemm.$(PSUFFIX) : gemm.c ../param.h
$(CC) -c $(CFLAGS) $< -o $(@F)
sbgemmt.$(SUFFIX) sbgemm.$(PSUFFIX) : gemmt.c ../param.h
sbgemmt.$(SUFFIX) sbgemmt.$(PSUFFIX) : gemmt.c ../param.h
$(CC) -c $(CFLAGS) $< -o $(@F)
endif

View File

@ -100,27 +100,29 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
if ( order == BlasColMajor)
{
if ( trans == BlasNoTrans && *ldb < *rows ) info = 8;
if ( trans == BlasTrans && *ldb < *cols ) info = 8;
if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 8;
if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 8;
}
if ( order == BlasRowMajor)
{
if ( trans == BlasNoTrans && *ldb < *cols ) info = 8;
if ( trans == BlasTrans && *ldb < *rows ) info = 8;
if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 8;
if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 8;
}
if ( order == BlasColMajor && *lda < *rows ) info = 7;
if ( order == BlasRowMajor && *lda < *cols ) info = 7;
if ( *cols <= 0 ) info = 4;
if ( *rows <= 0 ) info = 3;
if ( trans < 0 ) info = 2;
if ( order < 0 ) info = 1;
if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7;
if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7;
if ( *cols < 0 ) info = 4;
if ( *rows < 0 ) info = 3;
if ( trans < 0 ) info = 2;
if ( order < 0 ) info = 1;
if (info >= 0) {
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
return;
}
if ((*rows == 0) || (*cols == 0)) return;
#ifdef NEW_IMATCOPY
if ( *lda == *ldb ) {
if ( order == BlasColMajor )

View File

@ -97,7 +97,7 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint *
blas_level1_thread(mode, n, k1, k2, dummyalpha,
a, lda, NULL, 0, ipiv, incx,
(int(*)())laswp[flag], nthreads);
(int(*)(void))laswp[flag], nthreads);
}
#endif

View File

@ -96,7 +96,7 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint *
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, (int(*)())laswp[flag], nthreads);
blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, (int(*)(void))laswp[flag], nthreads);
}
#endif

View File

@ -90,27 +90,29 @@ void CNAME(enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
#endif
if ( order == BlasColMajor)
{
if ( trans == BlasNoTrans && *ldb < *rows ) info = 9;
if ( trans == BlasTrans && *ldb < *cols ) info = 9;
if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 9;
if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 9;
}
if ( order == BlasRowMajor)
{
if ( trans == BlasNoTrans && *ldb < *cols ) info = 9;
if ( trans == BlasTrans && *ldb < *rows ) info = 9;
if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 9;
if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 9;
}
if ( order == BlasColMajor && *lda < *rows ) info = 7;
if ( order == BlasRowMajor && *lda < *cols ) info = 7;
if ( *cols <= 0 ) info = 4;
if ( *rows <= 0 ) info = 3;
if ( trans < 0 ) info = 2;
if ( order < 0 ) info = 1;
if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7;
if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7;
if ( *cols < 0 ) info = 4;
if ( *rows < 0 ) info = 3;
if ( trans < 0 ) info = 2;
if ( order < 0 ) info = 1;
if (info >= 0) {
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
return;
}
if ((*rows == 0) || (*cols == 0)) return;
if ( order == BlasColMajor )
{
if ( trans == BlasNoTrans )

View File

@ -66,13 +66,8 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
c = da / r;
s = db / r;
z = ONE;
if (da != ZERO) {
if (ada > adb){
z = s;
} else {
z = ONE / c;
}
}
if (ada > adb) z = s;
if ((ada <= adb) && (c != ZERO)) z = ONE / c;
*C = c;
*S = s;

View File

@ -101,31 +101,33 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
if ( order == BlasColMajor)
{
if ( trans == BlasNoTrans && *ldb < *rows ) info = 9;
if ( trans == BlasConj && *ldb < *rows ) info = 9;
if ( trans == BlasTrans && *ldb < *cols ) info = 9;
if ( trans == BlasTransConj && *ldb < *cols ) info = 9;
if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 9;
if ( trans == BlasConj && *ldb < MAX(1,*rows) ) info = 9;
if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 9;
if ( trans == BlasTransConj && *ldb < MAX(1,*cols) ) info = 9;
}
if ( order == BlasRowMajor)
{
if ( trans == BlasNoTrans && *ldb < *cols ) info = 9;
if ( trans == BlasConj && *ldb < *cols ) info = 9;
if ( trans == BlasTrans && *ldb < *rows ) info = 9;
if ( trans == BlasTransConj && *ldb < *rows ) info = 9;
if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 9;
if ( trans == BlasConj && *ldb < MAX(1,*cols) ) info = 9;
if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 9;
if ( trans == BlasTransConj && *ldb < MAX(1,*rows) ) info = 9;
}
if ( order == BlasColMajor && *lda < *rows ) info = 7;
if ( order == BlasRowMajor && *lda < *cols ) info = 7;
if ( *cols <= 0 ) info = 4;
if ( *rows <= 0 ) info = 3;
if ( trans < 0 ) info = 2;
if ( order < 0 ) info = 1;
if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7;
if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7;
if ( *cols < 0 ) info = 4;
if ( *rows < 0 ) info = 3;
if ( trans < 0 ) info = 2;
if ( order < 0 ) info = 1;
if (info >= 0) {
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
return;
}
if ((*rows == 0) || (*cols == 0)) return;
#ifdef NEW_IMATCOPY
if (*lda == *ldb ) {
if ( order == BlasColMajor )

View File

@ -92,31 +92,33 @@ void CNAME(enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
#endif
if ( order == BlasColMajor)
{
if ( trans == BlasNoTrans && *ldb < *rows ) info = 9;
if ( trans == BlasConj && *ldb < *rows ) info = 9;
if ( trans == BlasTrans && *ldb < *cols ) info = 9;
if ( trans == BlasTransConj && *ldb < *cols ) info = 9;
if ( trans == BlasNoTrans && *ldb < MAX(1,*rows) ) info = 9;
if ( trans == BlasConj && *ldb < MAX(1,*rows) ) info = 9;
if ( trans == BlasTrans && *ldb < MAX(1,*cols) ) info = 9;
if ( trans == BlasTransConj && *ldb < MAX(1,*cols) ) info = 9;
}
if ( order == BlasRowMajor)
{
if ( trans == BlasNoTrans && *ldb < *cols ) info = 9;
if ( trans == BlasConj && *ldb < *cols ) info = 9;
if ( trans == BlasTrans && *ldb < *rows ) info = 9;
if ( trans == BlasTransConj && *ldb < *rows ) info = 9;
if ( trans == BlasNoTrans && *ldb < MAX(1,*cols) ) info = 9;
if ( trans == BlasConj && *ldb < MAX(1,*cols) ) info = 9;
if ( trans == BlasTrans && *ldb < MAX(1,*rows) ) info = 9;
if ( trans == BlasTransConj && *ldb < MAX(1,*rows) ) info = 9;
}
if ( order == BlasColMajor && *lda < *rows ) info = 7;
if ( order == BlasRowMajor && *lda < *cols ) info = 7;
if ( *cols <= 0 ) info = 4;
if ( *rows <= 0 ) info = 3;
if ( trans < 0 ) info = 2;
if ( order < 0 ) info = 1;
if ( order == BlasColMajor && *lda < MAX(1,*rows) ) info = 7;
if ( order == BlasRowMajor && *lda < MAX(1,*cols) ) info = 7;
if ( *cols < 0 ) info = 4;
if ( *rows < 0 ) info = 3;
if ( trans < 0 ) info = 2;
if ( order < 0 ) info = 1;
if (info >= 0) {
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
return;
}
if ((*rows == 0) || (*cols == 0)) return;
if ( order == BlasColMajor )
{

View File

@ -30,14 +30,12 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
FLOAT db_r = *(DB+0);
FLOAT db_i = *(DB+1);
//long double r;
FLOAT *r, *S1=(FLOAT *)malloc(2*sizeof(FLOAT));
FLOAT *R=(FLOAT *)malloc(2*sizeof(FLOAT));
FLOAT S1[2];
FLOAT R[2];
long double d;
FLOAT ada = da_r * da_r + da_i * da_i;
FLOAT adb = db_r * db_r + db_i * db_i;
FLOAT adart = sqrt( da_r * da_r + da_i * da_i);
FLOAT adbrt = sqrt( db_r * db_r + db_i * db_i);
PRINT_DEBUG_NAME;
@ -61,16 +59,16 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
*(S1 + 0) = *(DB + 0);
*(S1 + 1) = *(DB + 1) *-1;
if (da_r == ZERO && da_i == ZERO) {
*C = ZERO;
*C = ZERO;
if (db_r == ZERO) {
(*DA) = fabsl(db_i);
*S = *S1 /da_r;
*(S+1) = *(S1+1) /da_r;
*S = *S1 /(*DA);
*(S+1) = *(S1+1) /(*DA);
return;
} else if ( db_i == ZERO) {
*DA = fabsl(db_r);
*S = *S1 /da_r;
*(S+1) = *(S1+1) /da_r;
*S = *S1 /(*DA);
*(S+1) = *(S1+1) /(*DA);
return;
} else {
long double g1 = MAX( fabsl(db_r), fabsl(db_i));
@ -115,10 +113,13 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
}
} else {
*C = ada / adahsq;
if (*C >= safmin)
if (*C >= safmin) {
*R = *DA / *C;
else
*(R+1) = *(DA+1) / *(C+1);
} else {
*R = *DA * (h / adahsq);
*(R+1) = *(DA+1) * (h / adahsq);
}
*S = *S1 * ada / adahsq;
*(S+1) = *(S1+1) * ada / adahsq;
}
@ -178,4 +179,4 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
}
}
}

View File

@ -5,12 +5,6 @@ endif
TOPDIR = ..
include $(TOPDIR)/Makefile.system
ifeq ($(ARCH), power)
ifeq ($(C_COMPILER), CLANG)
override CFLAGS += -fno-integrated-as
endif
endif
AVX2OPT =
ifeq ($(C_COMPILER), GCC)
# AVX2 support was added in 4.7.0

View File

@ -61,6 +61,15 @@ ifeq ($(CORE), ZEN)
USE_TRMM = 1
endif
ifeq ($(OS), AIX)
M4VERSION := $(shell m4 --version < /dev/null 2>&1 | grep GNU 2>&1 >/dev/null ; echo $$?)
ifeq ($(M4VERSION), 0)
M4_AIX := m4 -l16384
else
M4_AIX := m4 -B16384
endif
$(info $$var is [${$(M4_AIX)}])
endif
ifeq ($(CORE), POWER8)
ifeq ($(BINARY64),1)
USE_TRMM = 1
@ -173,7 +182,7 @@ ifeq ($(BUILD_BFLOAT16),1)
SBBLASOBJS += sbgemm_beta$(TSUFFIX).$(SUFFIX)
endif
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" ""
SBLASOBJS += \
sgemm_beta$(TSUFFIX).$(SUFFIX) \
strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
@ -182,7 +191,7 @@ SBLASOBJS += \
strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_DOUBLE),1)
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
DBLASOBJS += \
dgemm_beta$(TSUFFIX).$(SUFFIX) \
dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
@ -198,7 +207,7 @@ QBLASOBJS += \
qtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \
qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX)
ifeq ($(BUILD_COMPLEX),1)
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS += \
ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) \
@ -246,7 +255,7 @@ XBLASOBJS += xgemm3m_kernel$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_SINGLE),1)
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" ""
SBLASOBJS += \
strmm_iunucopy$(TSUFFIX).$(SUFFIX) strmm_iunncopy$(TSUFFIX).$(SUFFIX) \
strmm_ilnucopy$(TSUFFIX).$(SUFFIX) strmm_ilnncopy$(TSUFFIX).$(SUFFIX) \
@ -255,10 +264,7 @@ SBLASOBJS += \
strmm_ounucopy$(TSUFFIX).$(SUFFIX) strmm_ounncopy$(TSUFFIX).$(SUFFIX) \
strmm_olnucopy$(TSUFFIX).$(SUFFIX) strmm_olnncopy$(TSUFFIX).$(SUFFIX) \
strmm_outucopy$(TSUFFIX).$(SUFFIX) strmm_outncopy$(TSUFFIX).$(SUFFIX) \
strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX)
endif
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS += \
strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) \
strsm_iunucopy$(TSUFFIX).$(SUFFIX) strsm_iunncopy$(TSUFFIX).$(SUFFIX) \
strsm_ilnucopy$(TSUFFIX).$(SUFFIX) strsm_ilnncopy$(TSUFFIX).$(SUFFIX) \
strsm_iutucopy$(TSUFFIX).$(SUFFIX) strsm_iutncopy$(TSUFFIX).$(SUFFIX) \
@ -266,10 +272,7 @@ SBLASOBJS += \
strsm_ounucopy$(TSUFFIX).$(SUFFIX) strsm_ounncopy$(TSUFFIX).$(SUFFIX) \
strsm_olnucopy$(TSUFFIX).$(SUFFIX) strsm_olnncopy$(TSUFFIX).$(SUFFIX) \
strsm_outucopy$(TSUFFIX).$(SUFFIX) strsm_outncopy$(TSUFFIX).$(SUFFIX) \
strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_SINGLE),1)
SBLASOBJS += \
strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) \
ssymm_iutcopy$(TSUFFIX).$(SUFFIX) ssymm_iltcopy$(TSUFFIX).$(SUFFIX) \
ssymm_outcopy$(TSUFFIX).$(SUFFIX) ssymm_oltcopy$(TSUFFIX).$(SUFFIX)
endif
@ -391,7 +394,7 @@ XBLASOBJS += \
ifeq ($(USE_GEMM3M), 1)
ifeq ($(BUILD_COMPLEX),1)
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS += \
cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \
cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \
@ -634,15 +637,7 @@ $(KDIR)$(SBGEMMONCOPYOBJ) : $(KERNELDIR)/$(SBGEMMONCOPY)
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
$(KDIR)$(SBGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SBGEMMOTCOPY)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmotcopy.s
m4 sbgemmotcopy.s > sbgemmotcopy_nomacros.s
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmotcopy_nomacros.s -o $@
rm sbgemmotcopy.s sbgemmotcopy_nomacros.s
else
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
endif
ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))
@ -650,14 +645,7 @@ $(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY)
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
$(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmitcopy.s
m4 sbgemmitcopy.s > sbgemmitcopy_nomacros.s
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmitcopy_nomacros.s -o $@
rm sbgemmitcopy.s sbgemmitcopy_nomacros.s
else
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
endif
endif
endif
@ -668,7 +656,7 @@ $(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY)
$(KDIR)$(SGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SGEMMOTCOPY)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmotcopy.s
m4 sgemmotcopy.s > sgemmotcopy_nomacros.s
$(M4_AIX) sgemmotcopy.s > sgemmotcopy_nomacros.s
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmotcopy_nomacros.s -o $@
rm sgemmotcopy.s sgemmotcopy_nomacros.s
else
@ -684,7 +672,7 @@ $(KDIR)$(SGEMMINCOPYOBJ) : $(KERNELDIR)/$(SGEMMINCOPY)
$(KDIR)$(SGEMMITCOPYOBJ) : $(KERNELDIR)/$(SGEMMITCOPY)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemmitcopy.s
m4 sgemmitcopy.s > sgemmitcopy_nomacros.s
$(M4_AIX) sgemmitcopy.s > sgemmitcopy_nomacros.s
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemmitcopy_nomacros.s -o $@
rm sgemmitcopy.s sgemmitcopy_nomacros.s
else
@ -696,7 +684,7 @@ endif
$(KDIR)$(DGEMMONCOPYOBJ) : $(KERNELDIR)/$(DGEMMONCOPY)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_ncopy.s
m4 dgemm_ncopy.s > dgemm_ncopy_nomacros.s
$(M4_AIX) dgemm_ncopy.s > dgemm_ncopy_nomacros.s
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_ncopy_nomacros.s -o $@
rm dgemm_ncopy.s dgemm_ncopy_nomacros.s
else
@ -714,7 +702,7 @@ $(KDIR)$(DGEMMINCOPYOBJ) : $(KERNELDIR)/$(DGEMMINCOPY)
$(KDIR)$(DGEMMITCOPYOBJ) : $(KERNELDIR)/$(DGEMMITCOPY)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_itcopy.s
m4 dgemm_itcopy.s > dgemm_itcopy_nomacros.s
$(M4_AIX) dgemm_itcopy.s > dgemm_itcopy_nomacros.s
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_itcopy_nomacros.s -o $@
rm dgemm_itcopy.s dgemm_itcopy_nomacros.s
else
@ -757,7 +745,7 @@ $(KDIR)$(CGEMMINCOPYOBJ) : $(KERNELDIR)/$(CGEMMINCOPY)
$(KDIR)$(CGEMMITCOPYOBJ) : $(KERNELDIR)/$(CGEMMITCOPY)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -S $< -o - > cgemm_itcopy.s
m4 cgemm_itcopy.s > cgemm_itcopy_nomacros.s
$(M4_AIX) cgemm_itcopy.s > cgemm_itcopy_nomacros.s
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX cgemm_itcopy_nomacros.s -o $@
rm cgemm_itcopy.s cgemm_itcopy_nomacros.s
else
@ -780,7 +768,7 @@ $(KDIR)$(ZGEMMINCOPYOBJ) : $(KERNELDIR)/$(ZGEMMINCOPY)
$(KDIR)$(ZGEMMITCOPYOBJ) : $(KERNELDIR)/$(ZGEMMITCOPY)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > zgemm_itcopy.s
m4 zgemm_itcopy.s > zgemm_itcopy_nomacros.s
$(M4_AIX) zgemm_itcopy.s > zgemm_itcopy_nomacros.s
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX zgemm_itcopy_nomacros.s -o $@
rm zgemm_itcopy.s zgemm_itcopy_nomacros.s
else
@ -812,7 +800,7 @@ endif
$(KDIR)sgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -UDOUBLE -UCOMPLEX $< -o - > sgemm_kernel$(TSUFFIX).s
m4 sgemm_kernel$(TSUFFIX).s > sgemm_kernel$(TSUFFIX)_nomacros.s
$(M4_AIX) sgemm_kernel$(TSUFFIX).s > sgemm_kernel$(TSUFFIX)_nomacros.s
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX sgemm_kernel$(TSUFFIX)_nomacros.s -o $@
rm sgemm_kernel$(TSUFFIX).s sgemm_kernel$(TSUFFIX)_nomacros.s
else
@ -829,20 +817,13 @@ endif
ifeq ($(BUILD_BFLOAT16), 1)
$(KDIR)sbgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemm_kernel$(TSUFFIX).s
m4 sbgemm_kernel$(TSUFFIX).s > sbgemm_kernel$(TSUFFIX)_nomacros.s
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemm_kernel$(TSUFFIX)_nomacros.s -o $@
rm sbgemm_kernel$(TSUFFIX).s sbgemm_kernel$(TSUFFIX)_nomacros.s
else
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
endif
endif
$(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_kernel$(TSUFFIX).s
m4 dgemm_kernel$(TSUFFIX).s > dgemm_kernel$(TSUFFIX)_nomacros.s
$(M4_AIX) dgemm_kernel$(TSUFFIX).s > dgemm_kernel$(TSUFFIX)_nomacros.s
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX dgemm_kernel$(TSUFFIX)_nomacros.s -o $@
rm dgemm_kernel$(TSUFFIX).s dgemm_kernel$(TSUFFIX)_nomacros.s
else
@ -855,7 +836,7 @@ $(KDIR)qgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(QGEMMDEP
$(KDIR)cgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNN $< -o - > cgemm_kernel_n.s
m4 cgemm_kernel_n.s > cgemm_kernel_n_nomacros.s
$(M4_AIX) cgemm_kernel_n.s > cgemm_kernel_n_nomacros.s
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN cgemm_kernel_n_nomacros.s -o $@
rm cgemm_kernel_n.s cgemm_kernel_n_nomacros.s
else
@ -865,7 +846,7 @@ endif
$(KDIR)cgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCN $< -o - > cgemm_kernel_l.s
m4 cgemm_kernel_l.s > cgemm_kernel_l_nomacros.s
$(M4_AIX) cgemm_kernel_l.s > cgemm_kernel_l_nomacros.s
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCN cgemm_kernel_l_nomacros.s -o $@
rm cgemm_kernel_l.s cgemm_kernel_l_nomacros.s
else
@ -875,7 +856,7 @@ endif
$(KDIR)cgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s
m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s
$(M4_AIX) cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@
rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s
else
@ -885,7 +866,7 @@ endif
$(KDIR)cgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -UDOUBLE -DCOMPLEX -DCC $< -o - > cgemm_kernel_b.s
m4 cgemm_kernel_b.s > cgemm_kernel_b_nomacros.s
$(M4_AIX) cgemm_kernel_b.s > cgemm_kernel_b_nomacros.s
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DCC cgemm_kernel_b_nomacros.s -o $@
rm cgemm_kernel_b.s cgemm_kernel_b_nomacros.s
else
@ -895,7 +876,7 @@ endif
$(KDIR)zgemm_kernel_n$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNN $< -o - > zgemm_kernel_n.s
m4 zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s
$(M4_AIX) zgemm_kernel_n.s > zgemm_kernel_n_nomacros.s
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNN zgemm_kernel_n_nomacros.s -o $@
rm zgemm_kernel_n.s zgemm_kernel_n_nomacros.s
else ifeq ($(CORE),SANDYBRIDGE)
@ -907,7 +888,7 @@ endif
$(KDIR)zgemm_kernel_l$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCN $< -o - > zgemm_kernel_l.s
m4 zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s
$(M4_AIX) zgemm_kernel_l.s > zgemm_kernel_l_nomacros.s
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCN zgemm_kernel_l_nomacros.s -o $@
rm zgemm_kernel_l.s zgemm_kernel_l_nomacros.s
else ifeq ($(CORE),SANDYBRIDGE)
@ -919,7 +900,7 @@ endif
$(KDIR)zgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DNC $< -o - > zgemm_kernel_r.s
m4 zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s
$(M4_AIX) zgemm_kernel_r.s > zgemm_kernel_r_nomacros.s
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DNC zgemm_kernel_r_nomacros.s -o $@
rm zgemm_kernel_r.s zgemm_kernel_r_nomacros.s
else ifeq ($(CORE),SANDYBRIDGE)
@ -931,7 +912,7 @@ endif
$(KDIR)zgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(ZGEMMDEPEND)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DDOUBLE -DCOMPLEX -DCC $< -o - > zgemm_kernel_b.s
m4 zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s
$(M4_AIX) zgemm_kernel_b.s > zgemm_kernel_b_nomacros.s
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DCC zgemm_kernel_b_nomacros.s -o $@
rm zgemm_kernel_b.s zgemm_kernel_b_nomacros.s
else ifeq ($(CORE),SANDYBRIDGE)
@ -957,7 +938,7 @@ ifdef USE_TRMM
$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > strmmkernel_ln.s
m4 strmmkernel_ln.s > strmmkernel_ln_nomacros.s
$(M4_AIX) strmmkernel_ln.s > strmmkernel_ln_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA strmmkernel_ln_nomacros.s -o $@
rm strmmkernel_ln.s strmmkernel_ln_nomacros.s
else
@ -967,7 +948,7 @@ endif
$(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > strmmkernel_lt.s
m4 strmmkernel_lt.s > strmmkernel_lt_nomacros.s
$(M4_AIX) strmmkernel_lt.s > strmmkernel_lt_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA strmmkernel_lt_nomacros.s -o $@
rm strmmkernel_lt.s strmmkernel_lt_nomacros.s
else
@ -977,7 +958,7 @@ endif
$(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > strmmkernel_rn.s
m4 strmmkernel_rn.s > strmmkernel_rn_nomacros.s
$(M4_AIX) strmmkernel_rn.s > strmmkernel_rn_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA strmmkernel_rn_nomacros.s -o $@
rm strmmkernel_rn.s strmmkernel_rn_nomacros.s
else
@ -987,7 +968,7 @@ endif
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s
$(M4_AIX) strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@
rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s
else
@ -997,7 +978,7 @@ endif
$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o - > dtrmm_kernel_ln.s
m4 dtrmm_kernel_ln.s > dtrmm_kernel_ln_nomacros.s
$(M4_AIX) dtrmm_kernel_ln.s > dtrmm_kernel_ln_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA dtrmm_kernel_ln_nomacros.s -o $@
rm dtrmm_kernel_ln.s dtrmm_kernel_ln_nomacros.s
else
@ -1007,7 +988,7 @@ endif
$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o - > dtrmm_kernel_lt.s
m4 dtrmm_kernel_lt.s > dtrmm_kernel_lt_nomacros.s
$(M4_AIX) dtrmm_kernel_lt.s > dtrmm_kernel_lt_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA dtrmm_kernel_lt_nomacros.s -o $@
rm dtrmm_kernel_lt.s dtrmm_kernel_lt_nomacros.s
else
@ -1017,7 +998,7 @@ endif
$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o - > dtrmm_kernel_rn.s
m4 dtrmm_kernel_rn.s > dtrmm_kernel_rn_nomacros.s
$(M4_AIX) dtrmm_kernel_rn.s > dtrmm_kernel_rn_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA dtrmm_kernel_rn_nomacros.s -o $@
rm dtrmm_kernel_rn.s dtrmm_kernel_rn_nomacros.s
else
@ -1027,7 +1008,7 @@ endif
$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > dtrmm_kernel_rt.s
m4 dtrmm_kernel_rt.s > dtrmm_kernel_rt_nomacros.s
$(M4_AIX) dtrmm_kernel_rt.s > dtrmm_kernel_rt_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA dtrmm_kernel_rt_nomacros.s -o $@
rm dtrmm_kernel_rt.s dtrmm_kernel_rt_nomacros.s
else
@ -1049,7 +1030,7 @@ $(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
$(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_ln.s
m4 ctrmm_kernel_ln.s > ctrmm_kernel_ln_nomacros.s
$(M4_AIX) ctrmm_kernel_ln.s > ctrmm_kernel_ln_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_ln_nomacros.s -o $@
rm ctrmm_kernel_ln.s ctrmm_kernel_ln_nomacros.s
else
@ -1059,7 +1040,7 @@ endif
$(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_lt.s
m4 ctrmm_kernel_lt.s > ctrmm_kernel_lt_nomacros.s
$(M4_AIX) ctrmm_kernel_lt.s > ctrmm_kernel_lt_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_lt_nomacros.s -o $@
rm ctrmm_kernel_lt.s ctrmm_kernel_lt_nomacros.s
else
@ -1069,7 +1050,7 @@ endif
$(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lr.s
m4 ctrmm_kernel_lr.s > ctrmm_kernel_lr_nomacros.s
$(M4_AIX) ctrmm_kernel_lr.s > ctrmm_kernel_lr_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ctrmm_kernel_lr_nomacros.s -o $@
rm ctrmm_kernel_lr.s ctrmm_kernel_lr_nomacros.s
else
@ -1079,7 +1060,7 @@ endif
$(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ctrmm_kernel_lc.s
m4 ctrmm_kernel_lc.s > ctrmm_kernel_lc_nomacros.s
$(M4_AIX) ctrmm_kernel_lc.s > ctrmm_kernel_lc_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ctrmm_kernel_lc_nomacros.s -o $@
rm ctrmm_kernel_lc_nomacros.s ctrmm_kernel_lc.s
else
@ -1089,7 +1070,7 @@ endif
$(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rn.s
m4 ctrmm_kernel_rn.s > ctrmm_kernel_rn_nomacros.s
$(M4_AIX) ctrmm_kernel_rn.s > ctrmm_kernel_rn_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ctrmm_kernel_rn_nomacros.s -o $@
rm ctrmm_kernel_rn.s ctrmm_kernel_rn_nomacros.s
else
@ -1099,7 +1080,7 @@ endif
$(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ctrmm_kernel_rt.s
m4 ctrmm_kernel_rt.s > ctrmm_kernel_rt_nomacros.s
$(M4_AIX) ctrmm_kernel_rt.s > ctrmm_kernel_rt_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ctrmm_kernel_rt_nomacros.s -o $@
rm ctrmm_kernel_rt.s ctrmm_kernel_rt_nomacros.s
else
@ -1109,7 +1090,7 @@ endif
$(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_rr.s
m4 ctrmm_kernel_rr.s > ctrmm_kernel_rr_nomacros.s
$(M4_AIX) ctrmm_kernel_rr.s > ctrmm_kernel_rr_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ctrmm_kernel_rr_nomacros.s -o $@
rm ctrmm_kernel_rr.s ctrmm_kernel_rr_nomacros.s
else
@ -1119,7 +1100,7 @@ endif
$(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ctrmm_kernel_RC.s
m4 ctrmm_kernel_RC.s > ctrmm_kernel_RC_nomacros.s
$(M4_AIX) ctrmm_kernel_RC.s > ctrmm_kernel_RC_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ctrmm_kernel_RC_nomacros.s -o $@
rm ctrmm_kernel_RC.s ctrmm_kernel_RC_nomacros.s
else
@ -1129,7 +1110,7 @@ endif
$(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_ln.s
m4 ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s
$(M4_AIX) ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_ln_nomacros.s -o $@
rm ztrmm_kernel_ln.s ztrmm_kernel_ln_nomacros.s
else ifeq ($(CORE), SANDYBRIDGE)
@ -1141,7 +1122,7 @@ endif
$(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_lt.s
m4 ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s
$(M4_AIX) ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_lt_nomacros.s -o $@
rm ztrmm_kernel_lt.s ztrmm_kernel_lt_nomacros.s
else ifeq ($(CORE), SANDYBRIDGE)
@ -1153,7 +1134,7 @@ endif
$(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lr.s
m4 ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s
$(M4_AIX) ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ztrmm_kernel_lr_nomacros.s -o $@
rm ztrmm_kernel_lr.s ztrmm_kernel_lr_nomacros.s
else ifeq ($(CORE), SANDYBRIDGE)
@ -1165,7 +1146,7 @@ endif
$(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lc.s
m4 ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s
$(M4_AIX) ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ztrmm_kernel_lc_nomacros.s -o $@
rm ztrmm_kernel_lc.s ztrmm_kernel_lc_nomacros.s
else ifeq ($(CORE), SANDYBRIDGE)
@ -1177,7 +1158,7 @@ endif
$(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rn.s
m4 ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s
$(M4_AIX) ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_rn_nomacros.s -o $@
rm ztrmm_kernel_rn.s ztrmm_kernel_rn_nomacros.s
else ifeq ($(CORE), SANDYBRIDGE)
@ -1189,7 +1170,7 @@ endif
$(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rt.s
m4 ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s
$(M4_AIX) ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_rt_nomacros.s -o $@
rm ztrmm_kernel_rt.s ztrmm_kernel_rt_nomacros.s
else ifeq ($(CORE), SANDYBRIDGE)
@ -1201,7 +1182,7 @@ endif
$(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rr.s
m4 ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s
$(M4_AIX) ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ztrmm_kernel_rr_nomacros.s -o $@
rm ztrmm_kernel_rr.s ztrmm_kernel_rr_nomacros.s
else ifeq ($(CORE), SANDYBRIDGE)
@ -1213,7 +1194,7 @@ endif
$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rc.s
m4 ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s
$(M4_AIX) ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ztrmm_kernel_rc_nomacros.s -o $@
rm ztrmm_kernel_rc.s ztrmm_kernel_rc_nomacros.s
else ifeq ($(CORE), SANDYBRIDGE)
@ -1235,7 +1216,7 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
m4 strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s
$(M4_AIX) strmm_kernel_rt.s > strmm_kernel_rt_nomacros.s
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@
rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s
else
@ -1395,7 +1376,7 @@ $(KDIR)dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LN) $(DT
$(KDIR)dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRSMKERNEL_LT) $(DTRSMDEPEND)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ $< -o - > dtrsm_kernel_lt.s
m4 dtrsm_kernel_lt.s > dtrsm_kernel_lt_nomacros.s
$(M4_AIX) dtrsm_kernel_lt.s > dtrsm_kernel_lt_nomacros.s
$(CC) -c $(CFLAGS) -DTRSMKERNEL -UCOMPLEX -DDOUBLE -UUPPER -DLT -UCONJ dtrsm_kernel_lt_nomacros.s -o $@
rm dtrsm_kernel_lt.s dtrsm_kernel_lt_nomacros.s
else
@ -2987,7 +2968,7 @@ $(KDIR)cgemm_kernel_l$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMM
$(KDIR)cgemm_kernel_r$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CGEMMDEPEND)
ifeq ($(OS), AIX)
$(CC) $(PFLAGS) -S -UDOUBLE -DCOMPLEX -DNC $< -o - > cgemm_kernel_r.s
m4 cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s
$(M4_AIX) cgemm_kernel_r.s > cgemm_kernel_r_nomacros.s
$(CC) $(PFLAGS) -c -UDOUBLE -DCOMPLEX -DNC cgemm_kernel_r_nomacros.s -o $@
rm cgemm_kernel_r.s cgemm_kernel_r_nomacros.s
else
@ -3033,7 +3014,7 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
ifeq ($(OS), AIX)
$(CC) $(CFLAGS) -S -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o - > strmm_kernel_rt.s
m4 strmmkernel_rn.s > strmm_kernel_rt_nomacros.s
$(M4_AIX) strmmkernel_rn.s > strmm_kernel_rt_nomacros.s
$(CC) $(PFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA strmm_kernel_rt_nomacros.s -o $@
rm strmm_kernel_rt.s strmm_kernel_rt_nomacros.s
else

View File

@ -57,7 +57,7 @@ CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S
SAXPYKERNEL = axpy.S
DAXPYKERNEL = axpy.S
DAXPYKERNEL = daxpy_thunderx2t99.S
CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S
@ -81,45 +81,35 @@ DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S
SASUMKERNEL = sasum_thunderx2t99.c
DASUMKERNEL = dasum_thunderx2t99.c
CASUMKERNEL = casum_thunderx2t99.c
ZASUMKERNEL = zasum_thunderx2t99.c
SASUMKERNEL = asum.S
DASUMKERNEL = asum.S
CASUMKERNEL = casum.S
ZASUMKERNEL = zasum.S
SCOPYKERNEL = copy_thunderx2t99.c
DCOPYKERNEL = copy_thunderx2t99.c
CCOPYKERNEL = copy_thunderx2t99.c
ZCOPYKERNEL = copy_thunderx2t99.c
SCOPYKERNEL = copy.S
DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S
SSWAPKERNEL = swap_thunderx2t99.S
DSWAPKERNEL = swap_thunderx2t99.S
CSWAPKERNEL = swap_thunderx2t99.S
ZSWAPKERNEL = swap_thunderx2t99.S
SSWAPKERNEL = swap.S
DSWAPKERNEL = swap.S
CSWAPKERNEL = swap.S
ZSWAPKERNEL = swap.S
ISAMAXKERNEL = iamax_thunderx2t99.c
IDAMAXKERNEL = iamax_thunderx2t99.c
ICAMAXKERNEL = izamax_thunderx2t99.c
IZAMAXKERNEL = izamax_thunderx2t99.c
ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax.S
SNRM2KERNEL = scnrm2_thunderx2t99.c
DNRM2KERNEL = dznrm2_thunderx2t99.c
CNRM2KERNEL = scnrm2_thunderx2t99.c
ZNRM2KERNEL = dznrm2_thunderx2t99.c
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DDOTKERNEL = dot.c
SDOTKERNEL = dot.c
CDOTKERNEL = zdot_thunderx2t99.c
ZDOTKERNEL = zdot_thunderx2t99.c
DSDOTKERNEL = dot.S
DGEMM_BETA = dgemm_beta.S
@ -128,10 +118,10 @@ SGEMM_BETA = sgemm_beta.S
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
SGEMMINCOPY = gemm_ncopy_sve_v1x$(SGEMM_UNROLL_N).c
SGEMMITCOPY = gemm_tcopy_sve_v1x$(SGEMM_UNROLL_N).c
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
@ -149,8 +139,8 @@ SSYMMLCOPY_M = symm_lcopy_sve.c
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
DGEMMINCOPY = gemm_ncopy_sve_v1x$(DGEMM_UNROLL_N).c
DGEMMITCOPY = gemm_tcopy_sve_v1x$(DGEMM_UNROLL_N).c
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
@ -170,8 +160,8 @@ DSYMMLCOPY_M = symm_lcopy_sve.c
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
CGEMMINCOPY = gemm_ncopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c
CGEMMITCOPY = gemm_tcopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
@ -194,8 +184,8 @@ CSYMMLCOPY_M = zsymm_lcopy_sve.c
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
ZGEMMINCOPY = gemm_ncopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c
ZGEMMITCOPY = gemm_tcopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c

View File

@ -1,216 +1 @@
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c
SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c
SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c
ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c
ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c
ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
TRSMCOPYLN_M = trsm_lncopy_sve.c
TRSMCOPYLT_M = trsm_ltcopy_sve.c
TRSMCOPYUN_M = trsm_uncopy_sve.c
TRSMCOPYUT_M = trsm_utcopy_sve.c
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
SAMAXKERNEL = amax.S
DAMAXKERNEL = amax.S
CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S
SAXPYKERNEL = axpy.S
DAXPYKERNEL = axpy.S
CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S
SROTKERNEL = rot.S
DROTKERNEL = rot.S
CROTKERNEL = zrot.S
ZROTKERNEL = zrot.S
SSCALKERNEL = scal.S
DSCALKERNEL = scal.S
CSCALKERNEL = zscal.S
ZSCALKERNEL = zscal.S
SGEMVNKERNEL = gemv_n.S
DGEMVNKERNEL = gemv_n.S
CGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n.S
SGEMVTKERNEL = gemv_t.S
DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S
SASUMKERNEL = asum.S
DASUMKERNEL = asum.S
CASUMKERNEL = casum.S
ZASUMKERNEL = zasum.S
SCOPYKERNEL = copy.S
DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S
SSWAPKERNEL = swap.S
DSWAPKERNEL = swap.S
CSWAPKERNEL = swap.S
ZSWAPKERNEL = swap.S
ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax.S
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
DGEMM_BETA = dgemm_beta.S
SGEMM_BETA = sgemm_beta.S
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
SSYMMUCOPY_M = symm_ucopy_sve.c
SSYMMLCOPY_M = symm_lcopy_sve.c
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
DSYMMUCOPY_M = symm_ucopy_sve.c
DSYMMLCOPY_M = symm_lcopy_sve.c
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
CSYMMUCOPY_M = zsymm_ucopy_sve.c
CSYMMLCOPY_M = zsymm_lcopy_sve.c
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
include $(KERNELDIR)/KERNEL.ARMV8SVE

View File

@ -1,216 +1 @@
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c
SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c
SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c
ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c
ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c
ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
TRSMCOPYLN_M = trsm_lncopy_sve.c
TRSMCOPYLT_M = trsm_ltcopy_sve.c
TRSMCOPYUN_M = trsm_uncopy_sve.c
TRSMCOPYUT_M = trsm_utcopy_sve.c
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
SAMAXKERNEL = amax.S
DAMAXKERNEL = amax.S
CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S
SAXPYKERNEL = axpy.S
DAXPYKERNEL = axpy.S
CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S
SROTKERNEL = rot.S
DROTKERNEL = rot.S
CROTKERNEL = zrot.S
ZROTKERNEL = zrot.S
SSCALKERNEL = scal.S
DSCALKERNEL = scal.S
CSCALKERNEL = zscal.S
ZSCALKERNEL = zscal.S
SGEMVNKERNEL = gemv_n.S
DGEMVNKERNEL = gemv_n.S
CGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n.S
SGEMVTKERNEL = gemv_t.S
DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S
SASUMKERNEL = asum.S
DASUMKERNEL = asum.S
CASUMKERNEL = casum.S
ZASUMKERNEL = zasum.S
SCOPYKERNEL = copy.S
DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S
SSWAPKERNEL = swap.S
DSWAPKERNEL = swap.S
CSWAPKERNEL = swap.S
ZSWAPKERNEL = swap.S
ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax.S
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
DGEMM_BETA = dgemm_beta.S
SGEMM_BETA = sgemm_beta.S
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
SSYMMUCOPY_M = symm_ucopy_sve.c
SSYMMLCOPY_M = symm_lcopy_sve.c
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
DSYMMUCOPY_M = symm_ucopy_sve.c
DSYMMLCOPY_M = symm_lcopy_sve.c
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
CSYMMUCOPY_M = zsymm_ucopy_sve.c
CSYMMLCOPY_M = zsymm_lcopy_sve.c
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
include $(KERNELDIR)/KERNEL.ARMV8SVE

View File

@ -1,216 +1 @@
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c
SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c
SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c
ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c
ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c
ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
TRSMCOPYLN_M = trsm_lncopy_sve.c
TRSMCOPYLT_M = trsm_ltcopy_sve.c
TRSMCOPYUN_M = trsm_uncopy_sve.c
TRSMCOPYUT_M = trsm_utcopy_sve.c
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
SAMAXKERNEL = amax.S
DAMAXKERNEL = amax.S
CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S
SAXPYKERNEL = axpy.S
DAXPYKERNEL = axpy.S
CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S
SROTKERNEL = rot.S
DROTKERNEL = rot.S
CROTKERNEL = zrot.S
ZROTKERNEL = zrot.S
SSCALKERNEL = scal.S
DSCALKERNEL = scal.S
CSCALKERNEL = zscal.S
ZSCALKERNEL = zscal.S
SGEMVNKERNEL = gemv_n.S
DGEMVNKERNEL = gemv_n.S
CGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n.S
SGEMVTKERNEL = gemv_t.S
DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S
SASUMKERNEL = asum.S
DASUMKERNEL = asum.S
CASUMKERNEL = casum.S
ZASUMKERNEL = zasum.S
SCOPYKERNEL = copy.S
DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S
SSWAPKERNEL = swap.S
DSWAPKERNEL = swap.S
CSWAPKERNEL = swap.S
ZSWAPKERNEL = swap.S
ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax.S
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
DGEMM_BETA = dgemm_beta.S
SGEMM_BETA = sgemm_beta.S
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
SSYMMUCOPY_M = symm_ucopy_sve.c
SSYMMLCOPY_M = symm_lcopy_sve.c
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
DSYMMUCOPY_M = symm_ucopy_sve.c
DSYMMLCOPY_M = symm_lcopy_sve.c
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
CSYMMUCOPY_M = zsymm_ucopy_sve.c
CSYMMLCOPY_M = zsymm_lcopy_sve.c
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
include $(KERNELDIR)/KERNEL.ARMV8SVE

View File

@ -21,12 +21,15 @@ SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
endif
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
SGEMVNKERNEL = sgemv_n_8_lasx.S
SGEMVTKERNEL = sgemv_t_8_lasx.S
DTRSMKERNEL_LN = dtrsm_kernel_LN_16x4_lasx.S
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_lasx.S
DTRSMKERNEL_RN = dtrsm_kernel_RN_16x4_lasx.S
DTRSMKERNEL_RT = dtrsm_kernel_RT_16x4_lasx.S
endif
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c

View File

@ -341,7 +341,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmadd.d $f10, $f12, $f2, $f10
.endm
.macro DGEMV_N XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req
.macro DGEMV_N_LASX XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req
PTR_SRLI J, N, 3
beqz J, .L_\XW\()_N_7
PTR_SLLI K_LDA, LDA, 3
@ -541,13 +541,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.hword .L_GAP_1_0 - .L_GAP_TABLE
.hword .L_GAP_1_1 - .L_GAP_TABLE
.L_GAP_0_0: /* if (inc_x == 1) && (incy == 1) */
DGEMV_N GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1
DGEMV_N_LASX GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1
.L_GAP_0_1: /* if (inc_x == 1) && (incy != 1) */
DGEMV_N GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1
DGEMV_N_LASX GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1
.L_GAP_1_0: /* if (inc_x != 1) && (incy == 1) */
DGEMV_N GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1
DGEMV_N_LASX GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1
.L_GAP_1_1: /* if (inc_x != 1) && (incy != 1) */
DGEMV_N GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1
DGEMV_N_LASX GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1
.L_END:
pop_if_used 17 + 7, 24 + 4
jirl $r0, $r1, 0x0

View File

@ -220,7 +220,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
GMADD xvf, d, TP0, A0, X0, TP0, TP1, A2, X0, TP1
.endm
.macro DGEMV_T XW:req X8:req, X4:req
.macro DGEMV_T_LASX XW:req X8:req, X4:req
PTR_SRLI J, N, 3
beqz J, .L_\XW\()_N_7
PTR_SLLI K_LDA, LDA, 3
@ -472,9 +472,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.hword .L_GAP_0 - .L_GAP_TABLE
.hword .L_GAP_1 - .L_GAP_TABLE
.L_GAP_0: /* if (incx == 1) */
DGEMV_T GAP_0, X8, X4
DGEMV_T_LASX GAP_0, X8, X4
.L_GAP_1: /* if (incx != 1) */
DGEMV_T GAP_1, X8_GAP, X4_GAP
DGEMV_T_LASX GAP_1, X8_GAP, X4_GAP
.L_END:
pop_if_used 17 + 8, 24 + 3
jirl $r0, $r1, 0x0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,959 @@
/*******************************************************************************
Copyright (c) 2023, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#define ASSEMBLER
#include "common.h"
#include "loongarch64_asm.S"
/*********************************************************************
* 2023/08/26 guxiwei
* UTEST : OK
* CTEST : OK
* TEST : OK
*
*
*********************************************************************/
/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, FLOAT *a, FLOAT *b,
* FLOAT *c, BLASLONG ldc, BLASLONG offset)
*/
#define M $r4 // param 1: bm
#define N $r5 // param 2: bn
#define K $r6 // param 3: bk
#define A $r7 // param 5: ba
#define B $r8 // param 6: bb
#define C $r9 // param 7: bc
#define LDC $r10 // param 8: ldc
#define OFFSET $r11 // param 9: offset
/* Cycle control parameters */
#define I $r13
#define J $r14
#define L $r15
#define TL $r16
/* Matrix address */
#define A0 $r17
#define B0 $r18
#define C0 $r19
#define C1 $r20
#define C2 $r23
#define C3 $r24
#define T0 $r25
#define T1 $r26
#define T2 $r27
#define KK $r28
#define AA $r29
#define CC $r30
#define BB B0
#undef ZERO
#define ZERO $r0
#define U0 $xr0
#define U1 $xr1
#define U2 $xr2
#define U3 $xr3
#define U4 $xr4
#define U5 $xr5
#define U6 $xr6
#define U7 $xr7
#define U8 $xr8
#define U9 $xr9
#define U10 $xr10
#define U11 $xr11
#define U12 $xr12
#define U13 $xr13
#define U14 $xr14
#define U15 $xr15
#define D0 $xr16
#define D1 $xr17
#define D2 $xr18
#define D3 $xr19
#define D4 $xr20
#define D5 $xr21
#define D6 $xr22
#define D7 $xr23
#define D8 $xr24
#define D9 $xr25
#define D10 $xr26
#define D11 $xr27
#define D12 $xr28
#define D13 $xr29
#define D14 $xr30
#define D15 $xr31
#define G0 D0
#define G1 D1
#define G2 D2
#define G3 D3
#define G4 D4
#define G5 D5
#define G6 D6
#define G7 D7
#define G8 D8
#define G9 D9
#define G10 D10
#define G11 D11
#define G12 D12
#define G13 D13
#define G14 D14
#define G15 D15
/* Prefetch interval */
#define A_PRE 0x400
#define B_PRE 0x100
#include "dtrsm_kernel_macro.S"
.macro ldrepl_macro start, end, stride
// Load Ux (x = 0...15)
.if \start <= \end
GLDREPL xv, d, $xr\start, A0, \stride * 8
ldrepl_macro %start + 1, \end, %stride + 1
.endif
.endm
.macro nmsub_macro start0, end0, start1, reg
// Gx -= reg * Ux
.if \start0 <= \end0
xvfnmsub.d $xr\start0, \reg, $xr\start1, $xr\start0
nmsub_macro %start0 + 1, \end0, %start1 + 1, \reg
.endif
.endm
.macro B_st_macro start, end, stride, N
// Store Gx(x = 16...31)
.if \start <= \end
.if \N == 4
xvst $xr\start, B0, \stride * 0x20
.elseif \N == 2
vst $vr\start, B0, \stride * 0x10
.elseif \N == 1
fst.d $f\start, B0, \stride * 0x08
.endif
B_st_macro %start + 1, \end, %stride + 1, \N
.endif
.endm
.macro dsolve_16 N
// The data layout of C (4x16) is as follows (store 4 data in each register):
// U0 U1 U2 U3
// U4 U5 U6 U7
// U8 U9 U10 U11
// U12 U13 U14 U15
// The first step is to transpose the result of C
GTRANSPOSE4x4_D U3, U7, U11, U15, G12, G13, G14, G15, D0, D1
GTRANSPOSE4x4_D U2, U6, U10, U14, G8, G9, G10, G11, D0, D1
GTRANSPOSE4x4_D U1, U5, U9, U13, G4, G5, G6, G7, U3, U7
GTRANSPOSE4x4_D U0, U4, U8, U12, G0, G1, G2, G3, U3, U7
// Now we have the following memory layout of C:
// 0 1 2 3 ... 15
// 0 | | | | | | |
// 1 | G0 | G1 | G2 | G3 | ... | G15 |
// 2 | | | | | | |
// 3 | | | | | | |
// Next we are going to process matrix A with a size of 16x16,
// using only the upper triangular portion. The memory layout of
// matrix A is as follows, quite large.
//0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
// 34 35 36 37 38 39 40 41 42 43 44 45 46 47
// 51 52 53 54 55 56 57 58 59 60 61 62 63
// 68 69 70 71 72 73 74 75 76 77 78 79
// 85 86 87 88 89 90 91 92 93 94 95
// 102 103 104 105 106 107 108 109 110 111
// 119 120 121 122 123 124 125 126 127
// 136 137 138 139 140 141 142 143
// 153 154 155 156 157 158 159
// 170 171 172 173 174 175
// 187 188 189 190 191
// 204 205 206 207
// 221 222 223
// 238 239
// 255
// Sequentially extract data from A in row order
// Load 0
ldrepl_macro 0, 15, 0
GMUL xvf, d, G0, G0, U0
nmsub_macro 17, 31, 1, G0
PTR_ADDI A0, A0, 17 * 8
// Load 1
ldrepl_macro 1, 15, 0
GMUL xvf, d, G1, G1, U1
nmsub_macro 18, 31, 2, G1
PTR_ADDI A0, A0, 17 * 8
// Load 2
ldrepl_macro 2, 15, 0
GMUL xvf, d, G2, G2, U2
nmsub_macro 19, 31, 3, G2
PTR_ADDI A0, A0, 17 * 8
// Load 3
ldrepl_macro 3, 15, 0
GMUL xvf, d, G3, G3, U3
nmsub_macro 20, 31, 4, G3
PTR_ADDI A0, A0, 17 * 8
// Load 4
ldrepl_macro 4, 15, 0
GMUL xvf, d, G4, G4, U4
nmsub_macro 21, 31, 5, G4
PTR_ADDI A0, A0, 17 * 8
// Load 5
ldrepl_macro 5, 15, 0
GMUL xvf, d, G5, G5, U5
nmsub_macro 22, 31, 6, G5
PTR_ADDI A0, A0, 17 * 8
// Load 6
ldrepl_macro 6, 15, 0
GMUL xvf, d, G6, G6, U6
nmsub_macro 23, 31, 7, G6
PTR_ADDI A0, A0, 17 * 8
// Load 7
ldrepl_macro 7, 15, 0
GMUL xvf, d, G7, G7, U7
nmsub_macro 24, 31, 8, G7
PTR_ADDI A0, A0, 17 * 8
// Load 8
ldrepl_macro 8, 15, 0
GMUL xvf, d, G8, G8, U8
nmsub_macro 25, 31, 9, G8
PTR_ADDI A0, A0, 17 * 8
// Load 9
ldrepl_macro 9, 15, 0
GMUL xvf, d, G9, G9, U9
nmsub_macro 26, 31, 10, G9
PTR_ADDI A0, A0, 17 * 8
// Load 10
ldrepl_macro 10, 15, 0
GMUL xvf, d, G10, G10, U10
nmsub_macro 27, 31, 11, G10
PTR_ADDI A0, A0, 17 * 8
// Load 11
ldrepl_macro 11, 15, 0
GMUL xvf, d, G11, G11, U11
nmsub_macro 28, 31, 12, G11
PTR_ADDI A0, A0, 17 * 8
// Load 12
ldrepl_macro 12, 15, 0
GMUL xvf, d, G12, G12, U12
nmsub_macro 29, 31, 13, G12
PTR_ADDI A0, A0, 17 * 8
// Load 13
ldrepl_macro 13, 15, 0
GMUL xvf, d, G13, G13, U13
nmsub_macro 30, 31, 14, G13
PTR_ADDI A0, A0, 17 * 8
// Load 14
ldrepl_macro 14, 15, 0
GMUL xvf, d, G14, G14, U14
nmsub_macro 31, 31, 15, G14
PTR_ADDI A0, A0, 17 * 8
// Load 15
ldrepl_macro 15, 15, 0
GMUL xvf, d, G15, G15, U15
// Finally, We can store the result.
// For B, stored sequentially, and C, first transpose and then store
B_st_macro 16, 31, 0, \N
GTRANSPOSE4x4_D G0, G1, G2, G3, G0, G1, G2, G3, U0, U1
GTRANSPOSE4x4_D G4, G5, G6, G7, G4, G5, G6, G7, U0, U1
GTRANSPOSE4x4_D G8, G9, G10, G11, G8, G9, G10, G11, U0, U1
GTRANSPOSE4x4_D G12, G13, G14, G15, G12, G13, G14, G15, U0, U1
.if \N == 4
GST xv, , G0, C0, 0x00, G4, C0, 0x20, G8, C0, 0x40, G12, C0, 0x60, \
G1, C1, 0x00, G5, C1, 0x20, G9, C1, 0x40, G13, C1, 0x60, \
G2, C2, 0x00, G6, C2, 0x20, G10, C2, 0x40, G14, C2, 0x60, \
G3, C3, 0x00, G7, C3, 0x20, G11, C3, 0x40, G15, C3, 0x60
.elseif \N == 2
GST xv, , G0, C0, 0x00, G4, C0, 0x20, G8, C0, 0x40, G12, C0, 0x60, \
G1, C1, 0x00, G5, C1, 0x20, G9, C1, 0x40, G13, C1, 0x60
.elseif \N == 1
GST xv, , G0, C0, 0x00, G4, C0, 0x20, G8, C0, 0x40, G12, C0, 0x60
.endif
.endm
.macro dgemm_dsolve_16x4
bge ZERO, KK, .L_dsolve_16x4_load
dgemm_16x4
b .L_dsolve_16x4
.L_dsolve_16x4_load:
// Load C
GLD xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60
GLD xv, , U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60
GLD xv, , U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60
GLD xv, , U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60
/********************** solver ******************/
.L_dsolve_16x4:
dsolve_16 4
.endm
.macro dsolve_8 N
// The data layout of C (4x8) is as follows (store 4 data in each register):
// U0 U1
// U2 U3
// U4 U5
// U6 U7
// The first step is to transpose the result of C
GTRANSPOSE4x4_D U1, U3, U5, U7, G4, G5, G6, G7, G8, G9
GTRANSPOSE4x4_D U0, U2, U4, U6, G0, G1, G2, G3, G8, G9
// Now we have the following memory layout of C:
// 0 1 2 3 ... 7
// 0 | | | | | | |
// 1 | G0 | G1 | G2 | G3 | ... | G7 |
// 2 | | | | | | |
// 3 | | | | | | |
// Next we are going to process matrix A with a size of 8x8,
// using only the upper triangular portion. The memory layout of
// matrix A is as follows:
//0 1 2 3 4 5 6 7
// 9 10 11 12 13 14 15
// 18 19 20 21 22 23
// 27 28 29 30 31
// 36 37 38 39
// 45 46 47
// 54 55
// 63
// Sequentially extract data from A in row order
// Load 0
ldrepl_macro 0, 7, 0
GMUL xvf, d, G0, G0, U0
nmsub_macro 17, 23, 1, G0
PTR_ADDI A0, A0, 9 * 8
// Load 1
ldrepl_macro 1, 7, 0
GMUL xvf, d, G1, G1, U1
nmsub_macro 18, 23, 2, G1
PTR_ADDI A0, A0, 9 * 8
// Load 2
ldrepl_macro 2, 7, 0
GMUL xvf, d, G2, G2, U2
nmsub_macro 19, 23, 3, G2
PTR_ADDI A0, A0, 9 * 8
// Load 3
ldrepl_macro 3, 7, 0
GMUL xvf, d, G3, G3, U3
nmsub_macro 20, 23, 4, G3
PTR_ADDI A0, A0, 9 * 8
// Load 4
ldrepl_macro 4, 7, 0
GMUL xvf, d, G4, G4, U4
nmsub_macro 21, 23, 5, G4
PTR_ADDI A0, A0, 9 * 8
// Load 5
ldrepl_macro 5, 7, 0
GMUL xvf, d, G5, G5, U5
nmsub_macro 22, 23, 6, G5
PTR_ADDI A0, A0, 9 * 8
// Load 6
ldrepl_macro 6, 7, 0
GMUL xvf, d, G6, G6, U6
nmsub_macro 23, 23, 7, G6
PTR_ADDI A0, A0, 9 * 8
// Load 7
ldrepl_macro 7, 7, 0
GMUL xvf, d, G7, G7, U7
// Finally, We can store the result.
// For B, stored sequentially, and C, first transpose and then store
B_st_macro 16, 23, 0, \N
GTRANSPOSE4x4_D G0, G1, G2, G3, G0, G1, G2, G3, U0, U1
GTRANSPOSE4x4_D G4, G5, G6, G7, G4, G5, G6, G7, U0, U1
.if \N == 4
GST xv, , G0, C0, 0x00, G4, C0, 0x20, \
G1, C1, 0x00, G5, C1, 0x20, \
G2, C2, 0x00, G6, C2, 0x20, \
G3, C3, 0x00, G7, C3, 0x20
.elseif \N == 2
GST xv, , G0, C0, 0x00, G4, C0, 0x20, \
G1, C1, 0x00, G5, C1, 0x20
.elseif \N == 1
GST xv, , G0, C0, 0x00, G4, C0, 0x20
.endif
.endm
.macro dgemm_dsolve_8x4
bge ZERO, L, .L_dsolve_8x4_load
dgemm_8x4
b .L_dsolve_8x4
.L_dsolve_8x4_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
/* Load C1 */
xvld U2, C1, 0x00
xvld U3, C1, 0x20
/* Load C2 */
xvld U4, C2, 0x00
xvld U5, C2, 0x20
/* Load C3 */
xvld U6, C3, 0x00
xvld U7, C3, 0x20
/********* solver *********/
.L_dsolve_8x4:
dsolve_8 4
.endm
.macro dsolve_4 N
// The data layout of C (4x4) is as follows (store 4 data in each register):
// U0
// U1
// U2
// U3
// The first step is to transpose the result of C
GTRANSPOSE4x4_D U0, U1, U2, U3, G0, G1, G2, G3, G4, G5
// Now we have the following memory layout of C:
// 0 1 2 3
// 0 | | | | |
// 1 | G0 | G1 | G2 | G3 |
// 2 | | | | |
// 3 | | | | |
// Next we are going to process matrix A with a size of 4x4,
// using only the upper triangular portion. The memory layout of
// matrix A is as follows:
//0 1 2 3
// 5 6 7
// 10 11
// 15
// Sequentially extract data from A in row order
// Load 0
ldrepl_macro 0, 3, 0
GMUL xvf, d, G0, G0, U0
nmsub_macro 17, 19, 1, G0
PTR_ADDI A0, A0, 5 * 8
// Load 1
ldrepl_macro 1, 3, 0
GMUL xvf, d, G1, G1, U1
nmsub_macro 18, 19, 2, G1
PTR_ADDI A0, A0, 5 * 8
// Load 2
ldrepl_macro 2, 3, 0
GMUL xvf, d, G2, G2, U2
nmsub_macro 19, 19, 3, G2
PTR_ADDI A0, A0, 5 * 8
// Load 3
ldrepl_macro 3, 3, 0
GMUL xvf, d, G3, G3, U3
// Finally, We can store the result.
// For B, stored sequentially, and C, first transpose and then store
B_st_macro 16, 19, 0, \N
GTRANSPOSE4x4_D G0, G1, G2, G3, G0, G1, G2, G3, U0, U1
.if \N == 4
GST xv, , G0, C0, 0x00, G1, C1, 0x00, G2, C2, 0x00, G3, C3, 0x00
.elseif \N == 2
GST xv, , G0, C0, 0x00, G1, C1, 0x00
.elseif \N == 1
GST xv, , G0, C0, 0x00
.endif
.endm
.macro dgemm_dsolve_4x4
bge ZERO, L, .L_dsolve_4x4_load
dgemm_4x4
b .L_dsolve_4x4
.L_dsolve_4x4_load:
/* Load C0 */
xvld U0, C0, 0x00
/* Load C1 */
xvld U1, C1, 0x00
/* Load C2 */
xvld U2, C2, 0x00
/* Load C3 */
xvld U3, C3, 0x00
/************** solver *****************/
.L_dsolve_4x4:
dsolve_4 4
.endm
.macro dsolve_2 N
// Transpose
GSBUTTERFLY xv, d, G0, G1, U1, U0
// Now we have the following memory layout of C:
// 0 1
// 0 | | |
// 1 | G0 | G1 |
// 2 | | |
// 3 | | |
// Next we are going to process matrix A with a size of 2x2,
// using only the upper triangular portion. The memory layout of
// matrix A is as follows:
//0 1
// 3
// Sequentially extract data from A in row order
// Load 0
ldrepl_macro 0, 1, 0
GMUL xvf, d, G0, G0, U0
nmsub_macro 17, 17, 1, G0
PTR_ADDI A0, A0, 3 * 8
// Load 1
ldrepl_macro 1, 1, 0
GMUL xvf, d, G1, G1, U1
// Finally, We can store the result.
// For B, stored sequentially, and C, first transpose and then store
B_st_macro 16, 17, 0, \N
GSBUTTERFLY xv, d, U0, U1, G1, G0
.if \N == 4
vst $vr0, C0, 0x00
vst $vr1, C1, 0x00
xvstelm.d U0, C2, 0x00, 0x02
xvstelm.d U1, C3, 0x00, 0x02
xvstelm.d U0, C2, 0x08, 0x03
xvstelm.d U1, C3, 0x08, 0x03
.elseif \N == 2
vst $vr0, C0, 0x00
vst $vr1, C1, 0x00
.elseif \N == 1
vst $vr0, C0, 0x00
.endif
.endm
.macro dgemm_dsolve_2x4
bge ZERO, L, .L_dsolve_2x4_load
dgemm_2x4
b .L_dsolve_2x4
.L_dsolve_2x4_load:
/* Load C0 */
xvld U0, C0, 0x00
/* Load C1 */
xvld U1, C1, 0x00
/* Load C2 */
xvld U2, C2, 0x00
/* Load C3 */
xvld U3, C3, 0x00
xvpermi.q U0, U2, 0x02
xvpermi.q U1, U3, 0x02
/********************** solver ******************/
.L_dsolve_2x4:
dsolve_2 4
.endm
.macro dgemm_dsolve_1x4
bge ZERO, L, .L_dsolve_1x4_load
dgemm_1x4
b .L_dsolve_1x4
.L_dsolve_1x4_load:
// Load C
fld.d $f0, C0, 0x00
fld.d $f1, C1, 0x00
fld.d $f2, C2, 0x00
fld.d $f3, C3, 0x00
xvinsve0.d U0, U1, 0x01
xvinsve0.d U0, U2, 0x02
xvinsve0.d U0, U3, 0x03
.L_dsolve_1x4:
GLDREPL xv, d, D0, A0, 0x00
GMUL xvf, d, U0, U0, D0
// Store C
xvstelm.d U0, C0, 0x00, 0x00
xvstelm.d U0, C1, 0x00, 0x01
xvstelm.d U0, C2, 0x00, 0x02
xvstelm.d U0, C3, 0x00, 0x03
// Store B
xvst U0, B0, 0x00
.endm
.macro dgemm_dsolve_16x2
bge ZERO, L, .L_dsolve_16x2_load
dgemm_16x2
b .L_dsolve_16x2
.L_dsolve_16x2_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
xvld U2, C0, 0x40
xvld U3, C0, 0x60
/* Load C1 */
xvld U4, C1, 0x00
xvld U5, C1, 0x20
xvld U6, C1, 0x40
xvld U7, C1, 0x60
.L_dsolve_16x2:
dsolve_16 2
.endm
.macro dgemm_dsolve_8x2
bge ZERO, L, .L_dsolve_8x2_load
dgemm_8x2
b .L_dsolve_8x2
.L_dsolve_8x2_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
/* Load C1 */
xvld U2, C1, 0x00
xvld U3, C1, 0x20
.L_dsolve_8x2:
dsolve_8 2
.endm
.macro dgemm_dsolve_4x2
bge ZERO, L, .L_dsolve_4x2_load
dgemm_4x2
b .L_dsolve_4x2
.L_dsolve_4x2_load:
/* Load C0 */
xvld U0, C0, 0x00
/* Load C1 */
xvld U1, C1, 0x00
.L_dsolve_4x2:
dsolve_4 2
.endm
.macro dgemm_dsolve_1x2
bge ZERO, L, .L_dsolve_1x2_load
dgemm_1x2
b .L_dsolve_1x2
.L_dsolve_1x2_load:
// Load C
fld.d $f0, C0, 0x00
fld.d $f1, C1, 0x00
xvinsve0.d U0, U1, 0x01
.L_dsolve_1x2:
GLDREPL xv, d, D0, A0, 0x00
GMUL xvf, d, U0, U0, D0
// Store C
xvstelm.d U0, C0, 0x00, 0x00
xvstelm.d U0, C1, 0x00, 0x01
// Store B
vst $vr0, B0, 0x00
.endm
.macro dgemm_dsolve_2x2
bge ZERO, L, .L_dsolve_2x2_load
dgemm_2x2
b .L_dsolve_2x2
.L_dsolve_2x2_load:
/* Load C0 */
xvld U0, C0, 0x00
/* Load C1 */
xvld U1, C1, 0x00
.L_dsolve_2x2:
dsolve_2 2
.endm
.macro dgemm_dsolve_16x1
bge ZERO, L, .L_dsolve_16x1_load
dgemm_16x1
b .L_dsolve_16x1
.L_dsolve_16x1_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
xvld U2, C0, 0x40
xvld U3, C0, 0x60
.L_dsolve_16x1:
dsolve_16 1
.endm
.macro dgemm_dsolve_8x1
bge ZERO, L, .L_dsolve_8x1_load
dgemm_8x1
b .L_dsolve_8x1
.L_dsolve_8x1_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
.L_dsolve_8x1:
dsolve_8 1
.endm
.macro dgemm_dsolve_4x1
bge ZERO, L, .L_dsolve_4x1_load
dgemm_4x1
b .L_dsolve_4x1
.L_dsolve_4x1_load:
/* Load C0 */
xvld U0, C0, 0x00
.L_dsolve_4x1:
dsolve_4 1
.endm
.macro dgemm_dsolve_2x1
bge ZERO, L, .L_dsolve_2x1_load
dgemm_2x1
b .L_dsolve_2x1
.L_dsolve_2x1_load:
/* Load C0 */
xvld U0, C0, 0x00
.L_dsolve_2x1:
dsolve_2 1
.endm
.macro dgemm_dsolve_1x1
bge ZERO, L, .L_dsolve_1x1_load
dgemm_1x1
b .L_dsolve_1x1
.L_dsolve_1x1_load:
// Load C
fld.d $f0, C0, 0x00
.L_dsolve_1x1:
GLDREPL xv, d, D0, A0, 0x00
GMUL xvf, d, U0, U0, D0
// Store C
xvstelm.d U0, C0, 0x00, 0x00
// Store B
xvstelm.d U0, B0, 0x00, 0x00
.endm
PROLOGUE
push_if_used 26, 32
PTR_SLLI LDC, LDC, 3
/* if (!(N >> 2)) goto L_N3 */
PTR_SRAI J, N, 2 /* J = bn >> 2 */
andi N, N, 0x03
beq ZERO, J, .L_N3
.align 5
.L_J1:
PTR_ADDI J, J, -1
move KK, OFFSET
move AA, A
move CC, C
PTR_SRAI I, M, 4 // M >> 4
beqz I, .L_M15
.align 4
.L_I1:
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_16x4
PTR_ADDI I, I, -1
PTR_SLLI T0, K, 7
PTR_ADDI CC, CC, 0x80 // cc += 16
PTR_ADDI KK, KK, 0x10 // kk += 16
PTR_ADD AA, AA, T0 // aa += 16 * k
bnez I, .L_I1
.L_M15:
andi I, M, 8
beqz I, .L_M7
.L_M8:
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_8x4
PTR_SLLI T0, K, 6
PTR_ADDI CC, CC, 0x40 // cc += 8
PTR_ADDI KK, KK, 0x08 // kk += 8
PTR_ADD AA, AA, T0 // aa += 8 * k
.L_M7:
andi I, M, 4
beqz I, .L_M3
.L_M4:
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_4x4
PTR_SLLI T0, K, 5
PTR_ADDI CC, CC, 0x20 // cc += 4
PTR_ADDI KK, KK, 0x04 // kk += 4
PTR_ADD AA, AA, T0 // aa += 4 * k
.L_M3:
andi I, M, 2
beqz I, .L_M1
.L_M2:
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_2x4
PTR_SLLI T0, K, 4
PTR_ADDI CC, CC, 0x10 // cc += 2
PTR_ADDI KK, KK, 0x02 // kk += 2
PTR_ADD AA, AA, T0 // aa += 2 * k
.L_M1:
andi I, M, 1
beqz I, .L_M0
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_1x4
PTR_SLLI T0, K, 3
PTR_ADDI CC, CC, 0x08 // cc += 1
PTR_ADDI KK, KK, 0x01 // kk += 1
PTR_ADD AA, AA, T0 // aa += 1 * k
.L_M0:
PTR_SLLI T0, K, 5
PTR_SLLI T1, LDC, 2
PTR_ADD B, B, T0 // b += 4 * k
PTR_ADD C, C, T1 // c += 4 * ldc
bnez J, .L_J1
.L_N3:
andi J, N, 2
beq ZERO, J, .L_N1
.L_N2:
move KK, OFFSET
move AA, A
move CC, C
PTR_SRAI I, M, 4 // M >> 4
beqz I, .L_N2_M15
.align 4
.L_N2_I1:
GADD , d, C0, CC, ZERO, C1, C0, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_16x2
PTR_ADDI I, I, -1
PTR_SLLI T0, K, 7
PTR_ADDI CC, CC, 0x80 // cc += 16
PTR_ADDI KK, KK, 0x10 // kk += 16
PTR_ADD AA, AA, T0 // aa += 16 * k
bnez I, .L_N2_I1
.L_N2_M15:
andi I, M, 8
beqz I, .L_N2_M7
.L_N2_M8:
GADD , d, C0, CC, ZERO, C1, C0, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_8x2
PTR_SLLI T0, K, 6
PTR_ADDI CC, CC, 0x40 // cc += 8
PTR_ADDI KK, KK, 0x08 // kk += 8
PTR_ADD AA, AA, T0 // aa += 8 * k
.L_N2_M7:
andi I, M, 4
beqz I, .L_N2_M3
.L_N2_M4:
GADD , d, C0, CC, ZERO, C1, C0, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_4x2
PTR_SLLI T0, K, 5
PTR_ADDI CC, CC, 0x20 // cc += 4
PTR_ADDI KK, KK, 0x04 // kk += 4
PTR_ADD AA, AA, T0 // aa += 4 * k
.L_N2_M3:
andi I, M, 2
beqz I, .L_N2_M1
.L_N2_M2:
GADD , d, C0, CC, ZERO, C1, C0, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_2x2
PTR_SLLI T0, K, 4
PTR_ADDI CC, CC, 0x10 // cc += 2
PTR_ADDI KK, KK, 0x02 // kk += 2
PTR_ADD AA, AA, T0 // aa += 2 * k
.L_N2_M1:
andi I, M, 1
beqz I, .L_N2_M0
GADD , d, C0, CC, ZERO, C1, C0, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_1x2
PTR_SLLI T0, K, 3
PTR_ADDI CC, CC, 0x08 // cc += 1
PTR_ADDI KK, KK, 0x01 // kk += 1
PTR_ADD AA, AA, T0 // aa += 1 * k
.L_N2_M0:
PTR_SLLI T0, K, 4
PTR_SLLI T1, LDC, 1
PTR_ADD B, B, T0 // b += 2 * k
PTR_ADD C, C, T1 // c += 2 * ldc
.L_N1:
andi J, N, 1
beq ZERO, J, .L_N0
move KK, OFFSET
move AA, A
move CC, C
PTR_SRAI I, M, 4 // M >> 4
beqz I, .L_N1_M15
.align 4
.L_N1_I1:
GADD , d, C0, CC, ZERO
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_16x1
PTR_ADDI I, I, -1
PTR_SLLI T0, K, 7
PTR_ADDI CC, CC, 0x80 // cc += 16
PTR_ADDI KK, KK, 0x10 // kk += 16
PTR_ADD AA, AA, T0 // aa += 16 * k
bnez I, .L_N1_I1
.L_N1_M15:
andi I, M, 8
beqz I, .L_N1_M7
.L_N1_M8:
GADD , d, C0, CC, ZERO
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_8x1
PTR_SLLI T0, K, 6
PTR_ADDI CC, CC, 0x40 // cc += 8
PTR_ADDI KK, KK, 0x08 // kk += 8
PTR_ADD AA, AA, T0 // aa += 8 * k
.L_N1_M7:
andi I, M, 4
beqz I, .L_N1_M3
.L_N1_M4:
GADD , d, C0, CC, ZERO
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_4x1
PTR_SLLI T0, K, 5
PTR_ADDI CC, CC, 0x20 // cc += 4
PTR_ADDI KK, KK, 0x04 // kk += 4
PTR_ADD AA, AA, T0 // aa += 4 * k
.L_N1_M3:
andi I, M, 2
beqz I, .L_N1_M1
.L_N1_M2:
GADD , d, C0, CC, ZERO
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_2x1
PTR_SLLI T0, K, 4
PTR_ADDI CC, CC, 0x10 // cc += 2
PTR_ADDI KK, KK, 0x02 // kk += 2
PTR_ADD AA, AA, T0 // aa += 2 * k
.L_N1_M1:
andi I, M, 1
beqz I, .L_N1_M0
GADD , d, C0, CC, ZERO
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_1x1
PTR_SLLI T0, K, 3
PTR_ADDI CC, CC, 0x08 // cc += 1
PTR_ADDI KK, KK, 0x01 // kk += 1
PTR_ADD AA, AA, T0 // aa += 1 * k
.L_N1_M0:
.L_N0:
pop_if_used 26, 32
jirl $r0, $r1, 0x0
EPILOGUE

View File

@ -0,0 +1,882 @@
/*******************************************************************************
Copyright (c) 2023, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#define ASSEMBLER
#include "common.h"
#include "loongarch64_asm.S"
/*********************************************************************
* 2023/09/26 guxiwei
* UTEST : OK
* CTEST : OK
* TEST : OK
*
*
*********************************************************************/
/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, FLOAT *a, FLOAT *b,
* FLOAT *c, BLASLONG ldc, BLASLONG offset)
*/
#define M $r4 // param 1: bm
#define N $r5 // param 2: bn
#define K $r6 // param 3: bk
#define A $r7 // param 5: ba
#define B $r8 // param 6: bb
#define C $r9 // param 7: bc
#define LDC $r10 // param 8: ldc
#define OFFSET $r11 // param 9: offset
/* Cycle control parameters */
#define I $r13
#define J $r14
#define L $r15
#define TL $r16
/* Matrix address */
#define A0 $r17
#define B0 $r18
#define C0 $r19
#define C1 $r20
#define C2 $r23
#define C3 $r24
#define T0 $r25
#define T1 $r26
#define T2 $r27
#define KK $r28
#define AA $r29
#define CC $r30
#define BB B0
#undef ZERO
#define ZERO $r0
#define U0 $xr0
#define U1 $xr1
#define U2 $xr2
#define U3 $xr3
#define U4 $xr4
#define U5 $xr5
#define U6 $xr6
#define U7 $xr7
#define U8 $xr8
#define U9 $xr9
#define U10 $xr10
#define U11 $xr11
#define U12 $xr12
#define U13 $xr13
#define U14 $xr14
#define U15 $xr15
#define D0 $xr16
#define D1 $xr17
#define D2 $xr18
#define D3 $xr19
#define D4 $xr20
#define D5 $xr21
#define D6 $xr22
#define D7 $xr23
#define D8 $xr24
#define D9 $xr25
#define D10 $xr26
#define D11 $xr27
#define D12 $xr28
#define D13 $xr29
#define D14 $xr30
#define D15 $xr31
#define G0 D0
#define G1 D1
#define G2 D2
#define G3 D3
#define G4 D4
#define G5 D5
#define G6 D6
#define G7 D7
#define G8 D8
#define G9 D9
#define G10 D10
#define G11 D11
#define G12 D12
#define G13 D13
#define G14 D14
#define G15 D15
/* Prefetch interval */
#define A_PRE 0x400
#define B_PRE 0x100
#include "dtrsm_kernel_macro.S"
.macro ldrepl_macro start, end, stride
// Load Ux (x = 0...15)
.if \start <= \end
GLDREPL xv, d, $xr\start, B0, \stride * 8
ldrepl_macro %start + 1, \end, %stride + 1
.endif
.endm
.macro nmsub_macro start0, end0, start1, reg
// Ux -= reg * Dx
.if \start0 <= \end0
xvfnmsub.d $xr\start0, \reg, $xr\start1, $xr\start0
nmsub_macro %start0 + 1, \end0, %start1 + 1, \reg
.endif
.endm
.macro A_st_macro start, end, stride, N
// Store Ux(x = 0...15)
.if \start <= \end
.if \N == 4
xvst $xr\start, A0, \stride * 0x20
.elseif \N == 2
vst $vr\start, A0, \stride * 0x10
.elseif \N == 1
fst.d $f\start, A0, \stride * 0x08
.endif
A_st_macro %start + 1, \end, %stride + 1, \N
.endif
.endm
.macro dsolve_16x4
// We are going to process matrix B with a size of 4x4,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0 1 2 3
// 5 6 7
// 10 11
// 15
// Sequentially extract data from B in row order
ldrepl_macro 16, 19, 0
GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3
ldrepl_macro 20, 22, 5
nmsub_macro 4, 7, 0, D1
ldrepl_macro 23, 24, 10
GMUL xvf, d, U4, D4, U4, U5, D4, U5, U6, D4, U6, U7, D4, U7
ldrepl_macro 25, 25, 15
nmsub_macro 8, 11, 0, D2
nmsub_macro 8, 11, 4, D5
GMUL xvf, d, U8, D7, U8, U9, D7, U9, U10, D7, U10, U11, D7, U11
nmsub_macro 12, 15, 0, D3
nmsub_macro 12, 15, 4, D6
nmsub_macro 12, 15, 8, D8
GMUL xvf, d, U12, D9, U12, U13, D9, U13, U14, D9, U14, U15, D9, U15
// Store A
A_st_macro 0, 15, 0, 4
// Store C
GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \
U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60, \
U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60, \
U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60
.endm
.macro dsolve_16x2
// We are going to process matrix B with a size of 2x2,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0 1
// 3
// Sequentially extract data from B in row order
ldrepl_macro 16, 17, 0
GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3
ldrepl_macro 18, 18, 3
nmsub_macro 4, 7, 0, D1
GMUL xvf, d, U4, D2, U4, U5, D2, U5, U6, D2, U6, U7, D2, U7
// Store A
A_st_macro 0, 7, 0, 4
// Store C
GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \
U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60
.endm
.macro dsolve_8x4
// We are going to process matrix B with a size of 4x4,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0 1 2 3
// 5 6 7
// 10 11
// 15
// Sequentially extract data from B in row order
ldrepl_macro 16, 19, 0
GMUL xvf, d, U0, D0, U0, U1, D0, U1
ldrepl_macro 20, 22, 5
nmsub_macro 2, 3, 0, D1
ldrepl_macro 23, 24, 10
GMUL xvf, d, U2, D4, U2, U3, D4, U3
ldrepl_macro 25, 25, 15
nmsub_macro 4, 5, 0, D2
nmsub_macro 4, 5, 2, D5
GMUL xvf, d, U4, D7, U4, U5, D7, U5
nmsub_macro 6, 7, 0, D3
nmsub_macro 6, 7, 2, D6
nmsub_macro 6, 7, 4, D8
GMUL xvf, d, U6, D9, U6, U7, D9, U7
// Store A
A_st_macro 0, 7, 0, 4
// Store C
GST xv, , U0, C0, 0x00, U1, C0, 0x20, \
U2, C1, 0x00, U3, C1, 0x20, \
U4, C2, 0x00, U5, C2, 0x20, \
U6, C3, 0x00, U7, C3, 0x20
.endm
.macro dsolve_8x2
// We are going to process matrix B with a size of 2x2,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0 1
// 3
// Sequentially extract data from B in row order
ldrepl_macro 16, 17, 0
GMUL xvf, d, U0, D0, U0, U1, D0, U1
ldrepl_macro 18, 18, 3
nmsub_macro 2, 3, 0, D1
GMUL xvf, d, U2, D2, U2, U3, D2, U3
// Store A
A_st_macro 0, 3, 0, 4
// Store C
GST xv, , U0, C0, 0x00, U1, C0, 0x20, \
U2, C1, 0x00, U3, C1, 0x20
.endm
.macro dsolve_4x4
// We are going to process matrix B with a size of 4x4,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0 1 2 3
// 5 6 7
// 10 11
// 15
// Sequentially extract data from B in row order
ldrepl_macro 16, 19, 0
GMUL xvf, d, U0, D0, U0
ldrepl_macro 20, 22, 5
nmsub_macro 1, 1, 0, D1
ldrepl_macro 23, 24, 10
GMUL xvf, d, U1, D4, U1
ldrepl_macro 25, 25, 15
nmsub_macro 2, 2, 0, D2
nmsub_macro 2, 2, 1, D5
GMUL xvf, d, U2, D7, U2
nmsub_macro 3, 3, 0, D3
nmsub_macro 3, 3, 1, D6
nmsub_macro 3, 3, 2, D8
GMUL xvf, d, U3, D9, U3
// Store A
A_st_macro 0, 3, 0, 4
// Store C
GST xv, , U0, C0, 0x00, U1, C1, 0x00, U2, C2, 0x00, U3, C3, 0x00
.endm
.macro dsolve_4x2
// We are going to process matrix B with a size of 2x2,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0 1
// 3
// Sequentially extract data from B in row order
ldrepl_macro 16, 17, 0
GMUL xvf, d, U0, D0, U0
ldrepl_macro 18, 18, 3
nmsub_macro 1, 1, 0, D1
GMUL xvf, d, U1, D2, U1
// Store A
A_st_macro 0, 1, 0, 4
// Store C
GST xv, , U0, C0, 0x00, U1, C1, 0x00
.endm
.macro dsolve_2x4
// We are going to process matrix B with a size of 4x4,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0 1 2 3
// 5 6 7
// 10 11
// 15
// Sequentially extract data from B in row order
ldrepl_macro 16, 19, 0
GMUL xvf, d, U0, D0, U0
ldrepl_macro 20, 22, 5
nmsub_macro 1, 1, 0, D1
ldrepl_macro 23, 24, 10
GMUL xvf, d, U1, D4, U1
ldrepl_macro 25, 25, 15
nmsub_macro 2, 2, 0, D2
nmsub_macro 2, 2, 1, D5
GMUL xvf, d, U2, D7, U2
nmsub_macro 3, 3, 0, D3
nmsub_macro 3, 3, 1, D6
nmsub_macro 3, 3, 2, D8
GMUL xvf, d, U3, D9, U3
// Store A
A_st_macro 0, 3, 0, 2
// Store C
GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00, $vr2, C2, 0x00, $vr3, C3, 0x00,
.endm
.macro dsolve_2x2
// We are going to process matrix B with a size of 2x2,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0 1
// 3
// Sequentially extract data from B in row order
ldrepl_macro 16, 17, 0
GMUL xvf, d, U0, D0, U0
ldrepl_macro 18, 18, 3
nmsub_macro 1, 1, 0, D1
GMUL xvf, d, U1, D2, U1
// Store A
A_st_macro 0, 1, 0, 2
// Store C
GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00
.endm
.macro dsolve_1x4
// We are going to process matrix B with a size of 4x4,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0 1 2 3
// 5 6 7
// 10 11
// 15
// Sequentially extract data from B in row order
ldrepl_macro 16, 19, 0
GMUL xvf, d, U0, D0, U0
ldrepl_macro 20, 22, 5
nmsub_macro 1, 1, 0, D1
ldrepl_macro 23, 24, 10
GMUL xvf, d, U1, D4, U1
ldrepl_macro 25, 25, 15
nmsub_macro 2, 2, 0, D2
nmsub_macro 2, 2, 1, D5
GMUL xvf, d, U2, D7, U2
nmsub_macro 3, 3, 0, D3
nmsub_macro 3, 3, 1, D6
nmsub_macro 3, 3, 2, D8
GMUL xvf, d, U3, D9, U3
// Store A
A_st_macro 0, 3, 0, 1
// Store C
GST f, d, $f0, C0, 0x00, $f1, C1, 0x00, $f2, C2, 0x00, $f3, C3, 0x00,
.endm
.macro dsolve_1x2
// We are going to process matrix B with a size of 2x2,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0 1
// 3
// Sequentially extract data from B in row order
ldrepl_macro 16, 17, 0
GMUL xvf, d, U0, D0, U0
ldrepl_macro 18, 18, 3
nmsub_macro 1, 1, 0, D1
GMUL xvf, d, U1, D2, U1
// Store A
A_st_macro 0, 1, 0, 1
// Store C
GST f, d, $f0, C0, 0x00, $f1, C1, 0x00
.endm
.macro dgemm_dsolve_16x4
bge ZERO, L, .L_dsolve_16x4_load
dgemm_16x4
b .L_dsolve_16x4
.L_dsolve_16x4_load:
// Load C
GLD xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60
GLD xv, , U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60
GLD xv, , U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60
GLD xv, , U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60
/********************** solver ******************/
.L_dsolve_16x4:
dsolve_16x4
.endm
.macro dgemm_dsolve_8x4
bge ZERO, L, .L_dsolve_8x4_load
dgemm_8x4
b .L_dsolve_8x4
.L_dsolve_8x4_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
/* Load C1 */
xvld U2, C1, 0x00
xvld U3, C1, 0x20
/* Load C2 */
xvld U4, C2, 0x00
xvld U5, C2, 0x20
/* Load C3 */
xvld U6, C3, 0x00
xvld U7, C3, 0x20
/********* solver *********/
.L_dsolve_8x4:
dsolve_8x4
.endm
.macro dgemm_dsolve_4x4
bge ZERO, L, .L_dsolve_4x4_load
dgemm_4x4
b .L_dsolve_4x4
.L_dsolve_4x4_load:
/* Load C0 */
xvld U0, C0, 0x00
/* Load C1 */
xvld U1, C1, 0x00
/* Load C2 */
xvld U2, C2, 0x00
/* Load C3 */
xvld U3, C3, 0x00
/************** solver *****************/
.L_dsolve_4x4:
dsolve_4x4
.endm
.macro dgemm_dsolve_2x4
bge ZERO, L, .L_dsolve_2x4_load
dgemm_2x4
xvpermi.q U2, U0, 0x01
xvpermi.q U3, U1, 0x01
b .L_dsolve_2x4
.L_dsolve_2x4_load:
/* Load C0 */
xvld U0, C0, 0x00
/* Load C1 */
xvld U1, C1, 0x00
/* Load C2 */
xvld U2, C2, 0x00
/* Load C3 */
xvld U3, C3, 0x00
/********************** solver ******************/
.L_dsolve_2x4:
dsolve_2x4
.endm
.macro dgemm_dsolve_1x4
bge ZERO, L, .L_dsolve_1x4_load
dgemm_1x4
xvpackod.d U1, U0, U0
xvpermi.q U2, U0, 0x01
xvpermi.q U3, U1, 0x01
b .L_dsolve_1x4
.L_dsolve_1x4_load:
// Load C
fld.d $f0, C0, 0x00
fld.d $f1, C1, 0x00
fld.d $f2, C2, 0x00
fld.d $f3, C3, 0x00
.L_dsolve_1x4:
dsolve_1x4
.endm
.macro dgemm_dsolve_16x2
bge ZERO, L, .L_dsolve_16x2_load
dgemm_16x2
b .L_dsolve_16x2
.L_dsolve_16x2_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
xvld U2, C0, 0x40
xvld U3, C0, 0x60
/* Load C1 */
xvld U4, C1, 0x00
xvld U5, C1, 0x20
xvld U6, C1, 0x40
xvld U7, C1, 0x60
.L_dsolve_16x2:
dsolve_16x2
.endm
.macro dgemm_dsolve_8x2
bge ZERO, L, .L_dsolve_8x2_load
dgemm_8x2
b .L_dsolve_8x2
.L_dsolve_8x2_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
/* Load C1 */
xvld U2, C1, 0x00
xvld U3, C1, 0x20
.L_dsolve_8x2:
dsolve_8x2
.endm
.macro dgemm_dsolve_4x2
bge ZERO, L, .L_dsolve_4x2_load
dgemm_4x2
b .L_dsolve_4x2
.L_dsolve_4x2_load:
/* Load C0 */
xvld U0, C0, 0x00
/* Load C1 */
xvld U1, C1, 0x00
.L_dsolve_4x2:
dsolve_4x2
.endm
.macro dgemm_dsolve_2x2
bge ZERO, L, .L_dsolve_2x2_load
dgemm_2x2
b .L_dsolve_2x2
.L_dsolve_2x2_load:
/* Load C0 */
xvld U0, C0, 0x00
/* Load C1 */
xvld U1, C1, 0x00
.L_dsolve_2x2:
dsolve_2x2
.endm
.macro dgemm_dsolve_1x2
bge ZERO, L, .L_dsolve_1x2_load
dgemm_1x2
xvpackod.d U1, U0, U0
b .L_dsolve_1x2
.L_dsolve_1x2_load:
// Load C
fld.d $f0, C0, 0x00
fld.d $f1, C1, 0x00
.L_dsolve_1x2:
dsolve_1x2
.endm
.macro dgemm_dsolve_16x1
bge ZERO, L, .L_dsolve_16x1_load
dgemm_16x1
b .L_dsolve_16x1
.L_dsolve_16x1_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
xvld U2, C0, 0x40
xvld U3, C0, 0x60
.L_dsolve_16x1:
ldrepl_macro 16, 16, 0
GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3
// Store A
A_st_macro 0, 3, 0, 4
// Strore C
GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60
.endm
.macro dgemm_dsolve_8x1
bge ZERO, L, .L_dsolve_8x1_load
dgemm_8x1
b .L_dsolve_8x1
.L_dsolve_8x1_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
.L_dsolve_8x1:
ldrepl_macro 16, 16, 0
GMUL xvf, d, U0, D0, U0, U1, D0, U1
// Store A
A_st_macro 0, 1, 0, 4
// Strore C
GST xv, , U0, C0, 0x00, U1, C0, 0x20
.endm
.macro dgemm_dsolve_4x1
bge ZERO, L, .L_dsolve_4x1_load
dgemm_4x1
b .L_dsolve_4x1
.L_dsolve_4x1_load:
/* Load C0 */
xvld U0, C0, 0x00
.L_dsolve_4x1:
ldrepl_macro 16, 16, 0
GMUL xvf, d, U0, D0, U0
// Store A
A_st_macro 0, 0, 0, 4
// Strore C
GST xv, , U0, C0, 0x00
.endm
.macro dgemm_dsolve_2x1
bge ZERO, L, .L_dsolve_2x1_load
dgemm_2x1
b .L_dsolve_2x1
.L_dsolve_2x1_load:
/* Load C0 */
xvld U0, C0, 0x00
.L_dsolve_2x1:
ldrepl_macro 16, 16, 0
GMUL xvf, d, U0, D0, U0
// Store A
A_st_macro 0, 0, 0, 2
// Strore C
GST v, , $vr0, C0, 0x00
.endm
.macro dgemm_dsolve_1x1
bge ZERO, L, .L_dsolve_1x1_load
dgemm_1x1
b .L_dsolve_1x1
.L_dsolve_1x1_load:
// Load C
fld.d $f0, C0, 0x00
.L_dsolve_1x1:
ldrepl_macro 16, 16, 0
GMUL xvf, d, U0, D0, U0
// Store A
A_st_macro 0, 0, 0, 1
// Strore C
GST f, d, $f0, C0, 0x00
.endm
PROLOGUE
push_if_used 26, 32
PTR_SLLI LDC, LDC, 3
PTR_SUB KK, ZERO, OFFSET
/* if (!(N >> 2)) goto L_N3 */
PTR_SRAI J, N, 2 /* J = bn >> 2 */
andi N, N, 0x03
beq ZERO, J, .L_N3
.align 5
.L_J1:
PTR_ADDI J, J, -1
move AA, A
move CC, C
PTR_SRAI I, M, 4 // M >> 4
beqz I, .L_M15
.align 4
.L_I1:
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_16x4
PTR_ADDI I, I, -1
PTR_SLLI T0, K, 7
PTR_ADDI CC, CC, 0x80 // cc += 16
PTR_ADD AA, AA, T0 // aa += 16 * k
bnez I, .L_I1
.L_M15:
andi I, M, 8
beqz I, .L_M7
.L_M8:
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_8x4
PTR_SLLI T0, K, 6
PTR_ADDI CC, CC, 0x40 // cc += 8
PTR_ADD AA, AA, T0 // aa += 8 * k
.L_M7:
andi I, M, 4
beqz I, .L_M3
.L_M4:
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_4x4
PTR_SLLI T0, K, 5
PTR_ADDI CC, CC, 0x20 // cc += 4
PTR_ADD AA, AA, T0 // aa += 4 * k
.L_M3:
andi I, M, 2
beqz I, .L_M1
.L_M2:
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_2x4
PTR_SLLI T0, K, 4
PTR_ADDI CC, CC, 0x10 // cc += 2
PTR_ADD AA, AA, T0 // aa += 2 * k
.L_M1:
andi I, M, 1
beqz I, .L_M0
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_1x4
PTR_SLLI T0, K, 3
PTR_ADDI CC, CC, 0x08 // cc += 1
PTR_ADD AA, AA, T0 // aa += 1 * k
.L_M0:
PTR_SLLI T0, K, 5
PTR_SLLI T1, LDC, 2
PTR_ADD B, B, T0 // b += 4 * k
PTR_ADD C, C, T1 // c += 4 * ldc
PTR_ADDI KK, KK, 4 // kk += 4
bnez J, .L_J1
.L_N3:
andi J, N, 2
beq ZERO, J, .L_N1
.L_N2:
move AA, A
move CC, C
PTR_SRAI I, M, 4 // M >> 4
beqz I, .L_N2_M15
.align 4
.L_N2_I1:
GADD , d, C0, CC, ZERO, C1, C0, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_16x2
PTR_ADDI I, I, -1
PTR_SLLI T0, K, 7
PTR_ADDI CC, CC, 0x80 // cc += 16
PTR_ADD AA, AA, T0 // aa += 16 * k
bnez I, .L_N2_I1
.L_N2_M15:
andi I, M, 8
beqz I, .L_N2_M7
.L_N2_M8:
GADD , d, C0, CC, ZERO, C1, C0, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_8x2
PTR_SLLI T0, K, 6
PTR_ADDI CC, CC, 0x40 // cc += 8
PTR_ADD AA, AA, T0 // aa += 8 * k
.L_N2_M7:
andi I, M, 4
beqz I, .L_N2_M3
.L_N2_M4:
GADD , d, C0, CC, ZERO, C1, C0, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_4x2
PTR_SLLI T0, K, 5
PTR_ADDI CC, CC, 0x20 // cc += 4
PTR_ADD AA, AA, T0 // aa += 4 * k
.L_N2_M3:
andi I, M, 2
beqz I, .L_N2_M1
.L_N2_M2:
GADD , d, C0, CC, ZERO, C1, C0, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_2x2
PTR_SLLI T0, K, 4
PTR_ADDI CC, CC, 0x10 // cc += 2
PTR_ADD AA, AA, T0 // aa += 2 * k
.L_N2_M1:
andi I, M, 1
beqz I, .L_N2_M0
GADD , d, C0, CC, ZERO, C1, C0, LDC
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_1x2
PTR_SLLI T0, K, 3
PTR_ADDI CC, CC, 0x08 // cc += 1
PTR_ADD AA, AA, T0 // aa += 1 * k
.L_N2_M0:
PTR_SLLI T0, K, 4
PTR_SLLI T1, LDC, 1
PTR_ADD B, B, T0 // b += 2 * k
PTR_ADD C, C, T1 // c += 2 * ldc
PTR_ADDI KK, KK, 2 // kk += 2
.L_N1:
andi J, N, 1
beq ZERO, J, .L_N0
move AA, A
move CC, C
PTR_SRAI I, M, 4 // M >> 4
beqz I, .L_N1_M15
.align 4
.L_N1_I1:
GADD , d, C0, CC, ZERO
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_16x1
PTR_ADDI I, I, -1
PTR_SLLI T0, K, 7
PTR_ADDI CC, CC, 0x80 // cc += 16
PTR_ADD AA, AA, T0 // aa += 16 * k
bnez I, .L_N1_I1
.L_N1_M15:
andi I, M, 8
beqz I, .L_N1_M7
.L_N1_M8:
GADD , d, C0, CC, ZERO
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_8x1
PTR_SLLI T0, K, 6
PTR_ADDI CC, CC, 0x40 // cc += 8
PTR_ADD AA, AA, T0 // aa += 8 * k
.L_N1_M7:
andi I, M, 4
beqz I, .L_N1_M3
.L_N1_M4:
GADD , d, C0, CC, ZERO
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_4x1
PTR_SLLI T0, K, 5
PTR_ADDI CC, CC, 0x20 // cc += 4
PTR_ADD AA, AA, T0 // aa += 4 * k
.L_N1_M3:
andi I, M, 2
beqz I, .L_N1_M1
.L_N1_M2:
GADD , d, C0, CC, ZERO
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_2x1
PTR_SLLI T0, K, 4
PTR_ADDI CC, CC, 0x10 // cc += 2
PTR_ADD AA, AA, T0 // aa += 2 * k
.L_N1_M1:
andi I, M, 1
beqz I, .L_N1_M0
GADD , d, C0, CC, ZERO
move A0, AA
move B0, B
move L, KK
dgemm_dsolve_1x1
PTR_SLLI T0, K, 3
PTR_ADDI CC, CC, 0x08 // cc += 1
PTR_ADD AA, AA, T0 // aa += 1 * k
.L_N1_M0:
.L_N0:
pop_if_used 26, 32
jirl $r0, $r1, 0x0
EPILOGUE

View File

@ -0,0 +1,953 @@
/*******************************************************************************
Copyright (c) 2023, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#define ASSEMBLER
#include "common.h"
#include "loongarch64_asm.S"
/*********************************************************************
* 2023/09/26 guxiwei
* UTEST : OK
* CTEST : OK
* TEST : OK
*
*
*********************************************************************/
/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, FLOAT *a, FLOAT *b,
* FLOAT *c, BLASLONG ldc, BLASLONG offset)
*/
#define M $r4 // param 1: bm
#define N $r5 // param 2: bn
#define K $r6 // param 3: bk
#define A $r7 // param 5: ba
#define B $r8 // param 6: bb
#define C $r9 // param 7: bc
#define LDC $r10 // param 8: ldc
#define OFFSET $r11 // param 9: offset
/* Cycle control parameters */
#define I $r13
#define J $r14
#define L $r15
#define TL $r16
/* Matrix address */
#define A0 $r17
#define B0 $r18
#define C0 $r19
#define C1 $r20
#define C2 $r23
#define C3 $r24
#define T0 $r25
#define T1 $r26
#define T2 $r27
#define KK $r28
#define AA $r29
#define CC $r30
#define BB $r31
#undef ZERO
#define ZERO $r0
#define U0 $xr0
#define U1 $xr1
#define U2 $xr2
#define U3 $xr3
#define U4 $xr4
#define U5 $xr5
#define U6 $xr6
#define U7 $xr7
#define U8 $xr8
#define U9 $xr9
#define U10 $xr10
#define U11 $xr11
#define U12 $xr12
#define U13 $xr13
#define U14 $xr14
#define U15 $xr15
#define D0 $xr16
#define D1 $xr17
#define D2 $xr18
#define D3 $xr19
#define D4 $xr20
#define D5 $xr21
#define D6 $xr22
#define D7 $xr23
#define D8 $xr24
#define D9 $xr25
#define D10 $xr26
#define D11 $xr27
#define D12 $xr28
#define D13 $xr29
#define D14 $xr30
#define D15 $xr31
/* Prefetch interval */
#define A_PRE 0x400
#define B_PRE 0x100
#include "dtrsm_kernel_macro.S"
.macro ldrepl_macro start, end, stride
// Load Ux (x = 0...15)
.if \start <= \end
GLDREPL xv, d, $xr\start, B0, \stride * 8
ldrepl_macro %start + 1, \end, %stride + 1
.endif
.endm
.macro nmsub_macro start0, end0, start1, reg
// Ux -= reg * Dx
.if \start0 <= \end0
xvfnmsub.d $xr\start0, \reg, $xr\start1, $xr\start0
nmsub_macro %start0 + 1, \end0, %start1 + 1, \reg
.endif
.endm
.macro A_st_macro start, end, stride, N
// Store Ux(x = 0...15)
.if \start <= \end
.if \N == 4
xvst $xr\start, A0, \stride * 0x20
.elseif \N == 2
vst $vr\start, A0, \stride * 0x10
.elseif \N == 1
fst.d $f\start, A0, \stride * 0x08
.endif
A_st_macro %start + 1, \end, %stride + 1, \N
.endif
.endm
.macro dsolve_16x2
// We are going to process matrix B with a size of 2x2,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0
//2 3
// Sequentially extract data from B in row order
ldrepl_macro 16, 16, 0
ldrepl_macro 17, 18, 2
GMUL xvf, d, U4, D2, U4, U5, D2, U5, U6, D2, U6, U7, D2, U7
nmsub_macro 0, 3, 4, D1
GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3
// Store A
A_st_macro 0, 7, 0, 4
// Store C
GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \
U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60
.endm
.macro dsolve_8x2
// We are going to process matrix B with a size of 2x2,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0
//2 3
// Sequentially extract data from B in row order
ldrepl_macro 16, 16, 0
ldrepl_macro 17, 18, 2
GMUL xvf, d, U2, D2, U2, U3, D2, U3
nmsub_macro 0, 1, 2, D1
GMUL xvf, d, U0, D0, U0, U1, D0, U1
// Store A
A_st_macro 0, 3, 0, 4
// Store C
GST xv, , U0, C0, 0x00, U1, C0, 0x20, \
U2, C1, 0x00, U3, C1, 0x20
.endm
.macro dsolve_4x2
// We are going to process matrix B with a size of 2x2,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0
//2 3
// Sequentially extract data from B in row order
ldrepl_macro 16, 16, 0
ldrepl_macro 17, 18, 2
GMUL xvf, d, U1, D2, U1
nmsub_macro 0, 0, 1, D1
GMUL xvf, d, U0, D0, U0
// Store A
A_st_macro 0, 1, 0, 4
// Store C
GST xv, , U0, C0, 0x00, U1, C1, 0x00
.endm
.macro dsolve_2x2
// We are going to process matrix B with a size of 2x2,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0
//2 3
// Sequentially extract data from B in row order
ldrepl_macro 16, 16, 0
ldrepl_macro 17, 18, 2
GMUL xvf, d, U1, D2, U1
nmsub_macro 0, 0, 1, D1
GMUL xvf, d, U0, D0, U0
// Store A
A_st_macro 0, 1, 0, 2
// Store C
GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00
.endm
.macro dsolve_1x2
// We are going to process matrix B with a size of 2x2,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0
//2 3
// Sequentially extract data from B in row order
ldrepl_macro 16, 16, 0
ldrepl_macro 17, 18, 2
GMUL xvf, d, U1, D2, U1
nmsub_macro 0, 0, 1, D1
GMUL xvf, d, U0, D0, U0
// Store A
A_st_macro 0, 1, 0, 1
// Store C
GST f, d, $f0, C0, 0x00, $f1, C1, 0x00
.endm
.macro dsolve_16x4
// We are going to process matrix B with a size of 4x4,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0
//4 5
//8 9 10
//12 13 14 15
// Sequentially extract data from B in row order
ldrepl_macro 22, 25, 12
GMUL xvf, d, U12, D9, U12, U13, D9, U13, U14, D9, U14, U15, D9, U15
ldrepl_macro 19, 21, 8
nmsub_macro 8, 11, 12, D8
ldrepl_macro 17, 18, 4
GMUL xvf, d, U8, D5, U8, U9, D5, U9, U10, D5, U10, U11, D5, U11
ldrepl_macro 16, 16, 0
nmsub_macro 4, 7, 12, D7
nmsub_macro 4, 7, 8, D4
GMUL xvf, d, U4, D2, U4, U5, D2, U5, U6, D2, U6, U7, D2, U7
nmsub_macro 0, 3, 12, D6
nmsub_macro 0, 3, 8, D3
nmsub_macro 0, 3, 4, D1
GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3
// Store A
A_st_macro 0, 15, 0, 4
// Store C
GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60, \
U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60, \
U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60, \
U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60
.endm
.macro dsolve_8x4
// We are going to process matrix B with a size of 4x4,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0
//4 5
//8 9 10
//12 13 14 15
// Sequentially extract data from B in row order
ldrepl_macro 22, 25, 12
GMUL xvf, d, U6, D9, U6, U7, D9, U7
ldrepl_macro 19, 21, 8
nmsub_macro 4, 5, 6, D8
ldrepl_macro 17, 18, 4
GMUL xvf, d, U4, D5, U4, U5, D5, U5
ldrepl_macro 16, 16, 0
nmsub_macro 2, 3, 6, D7
nmsub_macro 2, 3, 4, D4
GMUL xvf, d, U2, D2, U2, U3, D2, U3
nmsub_macro 0, 1, 6, D6
nmsub_macro 0, 1, 4, D3
nmsub_macro 0, 1, 2, D1
GMUL xvf, d, U0, D0, U0, U1, D0, U1
// Store A
A_st_macro 0, 7, 0, 4
// Store C
GST xv, , U0, C0, 0x00, U1, C0, 0x20, \
U2, C1, 0x00, U3, C1, 0x20, \
U4, C2, 0x00, U5, C2, 0x20, \
U6, C3, 0x00, U7, C3, 0x20
.endm
.macro dsolve_4x4
// We are going to process matrix B with a size of 4x4,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0
//4 5
//8 9 10
//12 13 14 15
// Sequentially extract data from B in row order
ldrepl_macro 22, 25, 12
GMUL xvf, d, U3, D9, U3
ldrepl_macro 19, 21, 8
nmsub_macro 2, 2, 3, D8
ldrepl_macro 17, 18, 4
GMUL xvf, d, U2, D5, U2
ldrepl_macro 16, 16, 0
nmsub_macro 1, 1, 3, D7
nmsub_macro 1, 1, 2, D4
GMUL xvf, d, U1, D2, U1
nmsub_macro 0, 0, 3, D6
nmsub_macro 0, 0, 2, D3
nmsub_macro 0, 0, 1, D1
GMUL xvf, d, U0, D0, U0
// Store A
A_st_macro 0, 3, 0, 4
// Store C
GST xv, , U0, C0, 0x00, U1, C1, 0x00, U2, C2, 0x00, U3, C3, 0x00
.endm
.macro dsolve_2x4
// We are going to process matrix B with a size of 4x4,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0
//4 5
//8 9 10
//12 13 14 15
// Sequentially extract data from B in row order
ldrepl_macro 22, 25, 12
GMUL xvf, d, U3, D9, U3
ldrepl_macro 19, 21, 8
nmsub_macro 2, 2, 3, D8
ldrepl_macro 17, 18, 4
GMUL xvf, d, U2, D5, U2
ldrepl_macro 16, 16, 0
nmsub_macro 1, 1, 3, D7
nmsub_macro 1, 1, 2, D4
GMUL xvf, d, U1, D2, U1
nmsub_macro 0, 0, 3, D6
nmsub_macro 0, 0, 2, D3
nmsub_macro 0, 0, 1, D1
GMUL xvf, d, U0, D0, U0
// Store A
A_st_macro 0, 3, 0, 2
// Store C
GST v, , $vr0, C0, 0x00, $vr1, C1, 0x00, $vr2, C2, 0x00, $vr3, C3, 0x00
.endm
.macro dsolve_1x4
// We are going to process matrix B with a size of 4x4,
// using only the upper triangular portion. The memory layout of
// matrix B is as follows:
//0
//4 5
//8 9 10
//12 13 14 15
// Sequentially extract data from B in row order
ldrepl_macro 22, 25, 12
GMUL xvf, d, U3, D9, U3
ldrepl_macro 19, 21, 8
nmsub_macro 2, 2, 3, D8
ldrepl_macro 17, 18, 4
GMUL xvf, d, U2, D5, U2
ldrepl_macro 16, 16, 0
nmsub_macro 1, 1, 3, D7
nmsub_macro 1, 1, 2, D4
GMUL xvf, d, U1, D2, U1
nmsub_macro 0, 0, 3, D6
nmsub_macro 0, 0, 2, D3
nmsub_macro 0, 0, 1, D1
GMUL xvf, d, U0, D0, U0
// Store A
A_st_macro 0, 3, 0, 1
// Store C
GST f, d, $f0, C0, 0x00, $f1, C1, 0x00, $f2, C2, 0x00, $f3, C3, 0x00,
.endm
.macro dgemm_dsolve_16x1
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_16x1_load
dgemm_16x1
b .L_dsolve_16x1
.L_dsolve_16x1_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
xvld U2, C0, 0x40
xvld U3, C0, 0x60
.L_dsolve_16x1:
PTR_ADDI A0, T1, -16 * 8
PTR_ADDI B0, T2, -1 * 8
ldrepl_macro 16, 16, 0
GMUL xvf, d, U0, D0, U0, U1, D0, U1, U2, D0, U2, U3, D0, U3
// Store A
A_st_macro 0, 3, 0, 4
// Strore C
GST xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60
.endm
.macro dgemm_dsolve_8x1
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_8x1_load
dgemm_8x1
b .L_dsolve_8x1
.L_dsolve_8x1_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
.L_dsolve_8x1:
PTR_ADDI A0, T1, -8 * 8
PTR_ADDI B0, T2, -1 * 8
ldrepl_macro 16, 16, 0
GMUL xvf, d, U0, D0, U0, U1, D0, U1
// Store A
A_st_macro 0, 1, 0, 4
// Strore C
GST xv, , U0, C0, 0x00, U1, C0, 0x20
.endm
.macro dgemm_dsolve_4x1
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_4x1_load
dgemm_4x1
b .L_dsolve_4x1
.L_dsolve_4x1_load:
/* Load C0 */
xvld U0, C0, 0x00
.L_dsolve_4x1:
PTR_ADDI A0, T1, -4 * 8
PTR_ADDI B0, T2, -1 * 8
ldrepl_macro 16, 16, 0
GMUL xvf, d, U0, D0, U0
// Store A
A_st_macro 0, 0, 0, 4
// Strore C
GST xv, , U0, C0, 0x00
.endm
.macro dgemm_dsolve_2x1
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_2x1_load
dgemm_2x1
b .L_dsolve_2x1
.L_dsolve_2x1_load:
/* Load C0 */
xvld U0, C0, 0x00
.L_dsolve_2x1:
PTR_ADDI A0, T1, -2 * 8
PTR_ADDI B0, T2, -1 * 8
ldrepl_macro 16, 16, 0
GMUL xvf, d, U0, D0, U0
// Store A
A_st_macro 0, 0, 0, 2
// Strore C
GST v, , $vr0, C0, 0x00
.endm
.macro dgemm_dsolve_1x1
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_1x1_load
dgemm_1x1
b .L_dsolve_1x1
.L_dsolve_1x1_load:
// Load C
fld.d $f0, C0, 0x00
.L_dsolve_1x1:
PTR_ADDI A0, T1, -1 * 8
PTR_ADDI B0, T2, -1 * 8
ldrepl_macro 16, 16, 0
GMUL xvf, d, U0, D0, U0
// Store A
A_st_macro 0, 0, 0, 1
// Strore C
GST f, d, $f0, C0, 0x00
.endm
.macro dgemm_dsolve_16x2
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_16x2_load
dgemm_16x2
b .L_dsolve_16x2
.L_dsolve_16x2_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
xvld U2, C0, 0x40
xvld U3, C0, 0x60
/* Load C1 */
xvld U4, C1, 0x00
xvld U5, C1, 0x20
xvld U6, C1, 0x40
xvld U7, C1, 0x60
.L_dsolve_16x2:
PTR_ADDI A0, T1, -(16 * 2) * 8
PTR_ADDI B0, T2, -(2 * 2) * 8
dsolve_16x2
.endm
.macro dgemm_dsolve_8x2
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_8x2_load
dgemm_8x2
b .L_dsolve_8x2
.L_dsolve_8x2_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
/* Load C1 */
xvld U2, C1, 0x00
xvld U3, C1, 0x20
.L_dsolve_8x2:
PTR_ADDI A0, T1, -(8 * 2) * 8
PTR_ADDI B0, T2, -(2 * 2) * 8
dsolve_8x2
.endm
.macro dgemm_dsolve_4x2
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_4x2_load
dgemm_4x2
b .L_dsolve_4x2
.L_dsolve_4x2_load:
/* Load C0 */
xvld U0, C0, 0x00
/* Load C1 */
xvld U1, C1, 0x00
.L_dsolve_4x2:
PTR_ADDI A0, T1, -(4 * 2) * 8
PTR_ADDI B0, T2, -(2 * 2) * 8
dsolve_4x2
.endm
.macro dgemm_dsolve_2x2
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_2x2_load
dgemm_2x2
b .L_dsolve_2x2
.L_dsolve_2x2_load:
/* Load C0 */
xvld U0, C0, 0x00
/* Load C1 */
xvld U1, C1, 0x00
.L_dsolve_2x2:
PTR_ADDI A0, T1, -(2 * 2) * 8
PTR_ADDI B0, T2, -(2 * 2) * 8
dsolve_2x2
.endm
.macro dgemm_dsolve_1x2
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_1x2_load
dgemm_1x2
xvpackod.d U1, U0, U0
b .L_dsolve_1x2
.L_dsolve_1x2_load:
// Load C
fld.d $f0, C0, 0x00
fld.d $f1, C1, 0x00
.L_dsolve_1x2:
PTR_ADDI A0, T1, -(1 * 2) * 8
PTR_ADDI B0, T2, -(2 * 2) * 8
dsolve_1x2
.endm
.macro dgemm_dsolve_16x4
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_16x4_load
dgemm_16x4
b .L_dsolve_16x4
.L_dsolve_16x4_load:
// Load C
GLD xv, , U0, C0, 0x00, U1, C0, 0x20, U2, C0, 0x40, U3, C0, 0x60
GLD xv, , U4, C1, 0x00, U5, C1, 0x20, U6, C1, 0x40, U7, C1, 0x60
GLD xv, , U8, C2, 0x00, U9, C2, 0x20, U10, C2, 0x40, U11, C2, 0x60
GLD xv, , U12, C3, 0x00, U13, C3, 0x20, U14, C3, 0x40, U15, C3, 0x60
/********************** solver ******************/
.L_dsolve_16x4:
PTR_ADDI A0, T1, -(16 * 4) * 8
PTR_ADDI B0, T2, -(4 * 4) * 8
dsolve_16x4
.endm
.macro dgemm_dsolve_8x4
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_8x4_load
dgemm_8x4
b .L_dsolve_8x4
.L_dsolve_8x4_load:
/* Load C0 */
xvld U0, C0, 0x00
xvld U1, C0, 0x20
/* Load C1 */
xvld U2, C1, 0x00
xvld U3, C1, 0x20
/* Load C2 */
xvld U4, C2, 0x00
xvld U5, C2, 0x20
/* Load C3 */
xvld U6, C3, 0x00
xvld U7, C3, 0x20
/********* solver *********/
.L_dsolve_8x4:
PTR_ADDI A0, T1, -(8 * 4) * 8
PTR_ADDI B0, T2, -(4 * 4) * 8
dsolve_8x4
.endm
.macro dgemm_dsolve_4x4
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_4x4_load
dgemm_4x4
b .L_dsolve_4x4
.L_dsolve_4x4_load:
/* Load C0 */
xvld U0, C0, 0x00
/* Load C1 */
xvld U1, C1, 0x00
/* Load C2 */
xvld U2, C2, 0x00
/* Load C3 */
xvld U3, C3, 0x00
/************** solver *****************/
.L_dsolve_4x4:
PTR_ADDI A0, T1, -(4 * 4) * 8
PTR_ADDI B0, T2, -(4 * 4) * 8
dsolve_4x4
.endm
.macro dgemm_dsolve_2x4
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_2x4_load
dgemm_2x4
xvpermi.q U2, U0, 0x01
xvpermi.q U3, U1, 0x01
b .L_dsolve_2x4
.L_dsolve_2x4_load:
/* Load C0 */
xvld U0, C0, 0x00
/* Load C1 */
xvld U1, C1, 0x00
/* Load C2 */
xvld U2, C2, 0x00
/* Load C3 */
xvld U3, C3, 0x00
/********************** solver ******************/
.L_dsolve_2x4:
PTR_ADDI A0, T1, -(2 * 4) * 8
PTR_ADDI B0, T2, -(4 * 4) * 8
dsolve_2x4
.endm
.macro dgemm_dsolve_1x4
or T1, A0, A0
or T2, B0, B0
bge ZERO, L, .L_dsolve_1x4_load
dgemm_1x4
xvpackod.d U1, U0, U0
xvpermi.q U2, U0, 0x01
xvpermi.q U3, U1, 0x01
b .L_dsolve_1x4
.L_dsolve_1x4_load:
// Load C
fld.d $f0, C0, 0x00
fld.d $f1, C1, 0x00
fld.d $f2, C2, 0x00
fld.d $f3, C3, 0x00
.L_dsolve_1x4:
PTR_ADDI A0, T1, -(1 * 4) * 8
PTR_ADDI B0, T2, -(4 * 4) * 8
dsolve_1x4
.endm
PROLOGUE
push_if_used 26, 32
PTR_SLLI LDC, LDC, 3
PTR_SUB KK, N, OFFSET
PTR_MUL T0, N, LDC
PTR_MUL T1, N, K
PTR_ADD C, C, T0 // c += n * ldc
PTR_SLLI T1, T1, 3
PTR_ADD B, B, T1
andi J, N, 1
beqz J, .L_N2
.L_N1:
move AA, A
PTR_SUB C, C, LDC // c -= ldc
PTR_SLLI T0, K, 3
PTR_SLLI T1, KK, 3
PTR_SUB B, B, T0 // b -= k
PTR_ADD BB, B, T1 // bb = b + kk
move CC, C
PTR_SRAI I, M, 4 // M >> 4
beqz I, .L_N1_M15
.align 4
.L_N1_I1:
PTR_SLLI T1, KK, 7
GADD , d, C0, CC, ZERO
PTR_ADD A0, AA, T1 // a0 = aa + 16 * kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_16x1
PTR_ADDI I, I, -1
PTR_SLLI T0, K, 7
PTR_ADDI CC, CC, 0x80 // cc += 16
PTR_ADD AA, AA, T0 // aa += 16 * k
bnez I, .L_N1_I1
.L_N1_M15:
andi I, M, 8
beqz I, .L_N1_M7
.L_N1_M8:
PTR_SLLI T1, KK, 6
GADD , d, C0, CC, ZERO
PTR_ADD A0, AA, T1 // a0 = aa + 8 * kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_8x1
PTR_SLLI T0, K, 6
PTR_ADDI CC, CC, 0x40 // cc += 8
PTR_ADD AA, AA, T0 // aa += 8 * k
.L_N1_M7:
andi I, M, 4
beqz I, .L_N1_M3
.L_N1_M4:
PTR_SLLI T1, KK, 5
GADD , d, C0, CC, ZERO
PTR_ADD A0, AA, T1 // a0 = aa + 4 * kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_4x1
PTR_SLLI T0, K, 5
PTR_ADDI CC, CC, 0x20 // cc += 4
PTR_ADD AA, AA, T0 // aa += 4 * k
.L_N1_M3:
andi I, M, 2
beqz I, .L_N1_M1
.L_N1_M2:
PTR_SLLI T1, KK, 4
GADD , d, C0, CC, ZERO
PTR_ADD A0, AA, T1 // a0 = aa + 2 * kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_2x1
PTR_SLLI T0, K, 4
PTR_ADDI CC, CC, 0x10 // cc += 2
PTR_ADD AA, AA, T0 // aa += 2 * k
.L_N1_M1:
andi I, M, 1
beqz I, .L_N1_M0
PTR_SLLI T1, KK, 3
GADD , d, C0, CC, ZERO
PTR_ADD A0, AA, T1 // a0 = aa + kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_1x1
PTR_SLLI T0, K, 3
PTR_ADDI CC, CC, 0x08 // cc += 1
PTR_ADD AA, AA, T0 // aa += 1 * k
.L_N1_M0:
PTR_ADDI KK, KK, -1
.L_N2:
andi J, N, 2
beq ZERO, J, .L_N4
move AA, A
PTR_SLLI T0, LDC, 1
PTR_SLLI T1, K, 4
PTR_SLLI T2, KK, 4
PTR_SUB B, B, T1
PTR_SUB C, C, T0
PTR_ADD BB, B, T2
move CC, C
PTR_SRAI I, M, 4 // M >> 4
beqz I, .L_N2_M15
.align 4
.L_N2_I1:
PTR_SLLI T1, KK, 7
GADD , d, C0, CC, ZERO, C1, C0, LDC
PTR_ADD A0, AA, T1 // a0 = aa + 16 * kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_16x2
PTR_ADDI I, I, -1
PTR_SLLI T0, K, 7
PTR_ADDI CC, CC, 0x80 // cc += 16
PTR_ADD AA, AA, T0 // aa += 16 * k
bnez I, .L_N2_I1
.L_N2_M15:
andi I, M, 8
beqz I, .L_N2_M7
.L_N2_M8:
PTR_SLLI T1, KK, 6
GADD , d, C0, CC, ZERO, C1, C0, LDC
PTR_ADD A0, AA, T1 // a0 = aa + 8 * kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_8x2
PTR_SLLI T0, K, 6
PTR_ADDI CC, CC, 0x40 // cc += 8
PTR_ADD AA, AA, T0 // aa += 8 * k
.L_N2_M7:
andi I, M, 4
beqz I, .L_N2_M3
.L_N2_M4:
PTR_SLLI T1, KK, 5
GADD , d, C0, CC, ZERO, C1, C0, LDC
PTR_ADD A0, AA, T1 // a0 = aa + 4 * kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_4x2
PTR_SLLI T0, K, 5
PTR_ADDI CC, CC, 0x20 // cc += 4
PTR_ADD AA, AA, T0 // aa += 4 * k
.L_N2_M3:
andi I, M, 2
beqz I, .L_N2_M1
.L_N2_M2:
PTR_SLLI T1, KK, 4
GADD , d, C0, CC, ZERO, C1, C0, LDC
PTR_ADD A0, AA, T1 // a0 = aa + 2 * kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_2x2
PTR_SLLI T0, K, 4
PTR_ADDI CC, CC, 0x10 // cc += 2
PTR_ADD AA, AA, T0 // aa += 2 * k
.L_N2_M1:
andi I, M, 1
beqz I, .L_N2_M0
PTR_SLLI T1, KK, 3
GADD , d, C0, CC, ZERO, C1, C0, LDC
PTR_ADD A0, AA, T1 // a0 = aa + kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_1x2
PTR_SLLI T0, K, 3
PTR_ADDI CC, CC, 0x08 // cc += 1
PTR_ADD AA, AA, T0 // aa += 1 * k
.L_N2_M0:
PTR_ADDI KK, KK, -2
.L_N4:
PTR_SRAI J, N, 2 /* J = bn >> 2 */
beq ZERO, J, .L_N0
.align 5
.L_J1:
PTR_ADDI J, J, -1
move AA, A
PTR_SLLI T0, LDC, 2
PTR_SLLI T1, K, 5
PTR_SLLI T2, KK, 5
PTR_SUB B, B, T1
PTR_SUB C, C, T0
PTR_ADD BB, B, T2
move CC, C
PTR_SRAI I, M, 4 // M >> 4
beqz I, .L_M15
.align 4
.L_I1:
PTR_SLLI T1, KK, 7
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
PTR_ADD A0, AA, T1 // a0 = aa + 16 * kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_16x4
PTR_ADDI I, I, -1
PTR_SLLI T0, K, 7
PTR_ADDI CC, CC, 0x80 // cc += 16
PTR_ADD AA, AA, T0 // aa += 16 * k
bnez I, .L_I1
.L_M15:
andi I, M, 8
beqz I, .L_M7
.L_M8:
PTR_SLLI T1, KK, 6
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
PTR_ADD A0, AA, T1 // a0 = aa + 8 * kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_8x4
PTR_SLLI T0, K, 6
PTR_ADDI CC, CC, 0x40 // cc += 8
PTR_ADD AA, AA, T0 // aa += 8 * k
.L_M7:
andi I, M, 4
beqz I, .L_M3
.L_M4:
PTR_SLLI T1, KK, 5
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
PTR_ADD A0, AA, T1 // a0 = aa + 4 * kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_4x4
PTR_SLLI T0, K, 5
PTR_ADDI CC, CC, 0x20 // cc += 4
PTR_ADD AA, AA, T0 // aa += 4 * k
.L_M3:
andi I, M, 2
beqz I, .L_M1
.L_M2:
PTR_SLLI T1, KK, 4
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
PTR_ADD A0, AA, T1 // a0 = aa + 2 * kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_2x4
PTR_SLLI T0, K, 4
PTR_ADDI CC, CC, 0x10 // cc += 2
PTR_ADD AA, AA, T0 // aa += 2 * k
.L_M1:
andi I, M, 1
beqz I, .L_M0
PTR_SLLI T1, KK, 3
GADD , d, C0, CC, ZERO, C1, C0, LDC, C2, C1, LDC, C3, C2, LDC
PTR_ADD A0, AA, T1 // a0 = aa + kk
move B0, BB
PTR_SUB L, K, KK // L = K - KK
dgemm_dsolve_1x4
PTR_SLLI T0, K, 3
PTR_ADDI CC, CC, 0x08 // cc += 1
PTR_ADD AA, AA, T0 // aa += 1 * k
.L_M0:
PTR_ADDI KK, KK, -4
bnez J, .L_J1
.L_N0:
pop_if_used 26, 32
jirl $r0, $r1, 0x0
EPILOGUE

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,463 @@
/*******************************************************************************
Copyright (c) 2023, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#define ASSEMBLER
#include "common.h"
#include "loongarch64_asm.S"
/*********************************************************************
* 2023/08/30 guxiwei
* UTEST : OK
* CTEST : OK
* TEST : OK
*
*
*********************************************************************/
/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha,
* FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
*/
#define M $r4
#define N $r5
#define ALPHA $f0
#define A $r7
#define LDA $r8
#define X $r9
#define INC_X $r10
#define Y $r11
#define INC_Y $r6
#define J $r12
#define I $r13
#define K $r14
#define Y_ORG $r15
#define OFFSET $r16
#define K_LDA $r17
#define M4 $r18
#define T0 $r19
#define PA0 $r20
#define PA1 $r23
#define PA2 $r24
#define PA3 $r25
#define PA4 $r26
#define PA5 $r27
#define PA6 $r28
#define PA7 $r29
#define VALPHA $xr1
#define X0 $xr2
#define X1 $xr3
#define X2 $xr4
#define X3 $xr5
#define X4 $xr6
#define X5 $xr7
#define X6 $xr8
#define X7 $xr9
#define Y0 $xr10
#define A0 $xr11
#define A1 $xr12
#define A2 $xr13
#define A3 $xr14
#define A4 $xr15
#define A5 $xr16
#define A6 $xr17
#define A7 $xr18
#define X0_F $f2
#define X1_F $f3
#define X2_F $f4
#define X3_F $f5
#define X4_F $f6
#define X5_F $f7
#define X6_F $f8
#define X7_F $f9
#define Y0_F $f10
#define A0_F $f11
#define A1_F $f12
#define A2_F $f13
#define A3_F $f14
#define A4_F $f15
#define A5_F $f16
#define A6_F $f17
#define A7_F $f18
.macro SLOAD_X_8
GLDREPL xv, w, X0, X, 0x00, X1, X, 0x04, X2, X, 0x08, X3, X, 0x0C, \
X4, X, 0x10, X5, X, 0x14, X6, X, 0x18, X7, X, 0x1C
GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA, \
X4, X4, VALPHA, X5, X5, VALPHA, X6, X6, VALPHA, X7, X7, VALPHA
.endm
.macro SLOAD_X_8_GAP
xvldrepl.w X0, X, 0x00
PTR_ADD T0, X, INC_X
xvldrepl.w X1, T0, 0x00
PTR_ADD T0, T0, INC_X
xvldrepl.w X2, T0, 0x00
PTR_ADD T0, T0, INC_X
xvldrepl.w X3, T0, 0x00
PTR_ADD T0, T0, INC_X
xvldrepl.w X4, T0, 0x00
PTR_ADD T0, T0, INC_X
xvldrepl.w X5, T0, 0x00
PTR_ADD T0, T0, INC_X
xvldrepl.w X6, T0, 0x00
PTR_ADD T0, T0, INC_X
xvldrepl.w X7, T0, 0x00
GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA, \
X4, X4, VALPHA, X5, X5, VALPHA, X6, X6, VALPHA, X7, X7, VALPHA
.endm
.macro SLOAD_X_4
GLDREPL xv, w, X0, X, 0x00, X1, X, 0x04, X2, X, 0x08, X3, X, 0x0C
GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA
.endm
.macro SLOAD_X_4_GAP
xvldrepl.w X0, X, 0x00
PTR_ADD T0, X, INC_X
xvldrepl.w X1, T0, 0x00
PTR_ADD T0, T0, INC_X
xvldrepl.w X2, T0, 0x00
PTR_ADD T0, T0, INC_X
xvldrepl.w X3, T0, 0x00
GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA, X2, X2, VALPHA, X3, X3, VALPHA
.endm
.macro SLOAD_X_2
GLDREPL xv, w, X0, X, 0x00, X1, X, 0x04
GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA
.endm
.macro SLOAD_X_2_GAP
xvldrepl.w X0, X, 0x00
PTR_ADD T0, X, INC_X
xvldrepl.w X1, T0, 0x00
GMUL xvf, s, X0, X0, VALPHA, X1, X1, VALPHA
.endm
.macro SLOAD_X_1
GLDREPL xv, w, X0, X, 0x00
GMUL xvf, s, X0, X0, VALPHA
.endm
.macro SLOAD_Y_8
GLD xv, , Y0, Y, 0
.endm
.macro SLOAD_Y_8_GAP
fld.s Y0_F, Y, 0
fldx.s A0_F, Y, INC_Y
PTR_ALSL T0, INC_Y, Y, 1
fld.s A1_F, T0, 0
fldx.s A2_F, T0, INC_Y
PTR_ALSL T0, INC_Y, Y, 2
fld.s A3_F, T0, 0
fldx.s A4_F, T0, INC_Y
PTR_ADD T0, T0, INC_Y
PTR_ADD T0, T0, INC_Y
fld.s A5_F, T0, 0
fldx.s A6_F, T0, INC_Y
GINSVE0 xv, w, Y0, A0, 1, Y0, A1, 2, Y0, A2, 3, Y0, A3, 4, \
Y0, A4, 5, Y0, A5, 6, Y0, A6, 7
.endm
.macro SLOAD_Y_1
GLD f, s, Y0_F, Y, 0
.endm
.macro SGEMV_N_8x8
GLD_INC xv, , 0x20, \
A0, PA0, 0, A1, PA1, 0, \
A2, PA2, 0, A3, PA3, 0, \
A4, PA4, 0, A5, PA5, 0, \
A6, PA6, 0, A7, PA7, 0
GMADD xvf, s, Y0, A0, X0, Y0, Y0, A1, X1, Y0, \
Y0, A2, X2, Y0, Y0, A3, X3, Y0, \
Y0, A4, X4, Y0, Y0, A5, X5, Y0, \
Y0, A6, X6, Y0, Y0, A7, X7, Y0
.endm
.macro SGEMV_N_1x8
GLD_INC f, s, 0x04, \
A0_F, PA0, 0, A1_F, PA1, 0, \
A2_F, PA2, 0, A3_F, PA3, 0, \
A4_F, PA4, 0, A5_F, PA5, 0, \
A6_F, PA6, 0, A7_F, PA7, 0
GMADD f, s, Y0_F, A0_F, X0_F, Y0_F, Y0_F, A1_F, X1_F, Y0_F, \
Y0_F, A2_F, X2_F, Y0_F, Y0_F, A3_F, X3_F, Y0_F, \
Y0_F, A4_F, X4_F, Y0_F, Y0_F, A5_F, X5_F, Y0_F, \
Y0_F, A6_F, X6_F, Y0_F, Y0_F, A7_F, X7_F, Y0_F
.endm
.macro SGEMV_N_8x4
GLD_INC xv, , 0x20, \
A0, PA0, 0, A1, PA1, 0, \
A2, PA2, 0, A3, PA3, 0
GMADD xvf, s, Y0, A0, X0, Y0, Y0, A1, X1, Y0, \
Y0, A2, X2, Y0, Y0, A3, X3, Y0
.endm
.macro SGEMV_N_1x4
GLD_INC f, s, 0x04, \
A0_F, PA0, 0, A1_F, PA1, 0, \
A2_F, PA2, 0, A3_F, PA3, 0
GMADD f, s, Y0_F, A0_F, X0_F, Y0_F, Y0_F, A1_F, X1_F, Y0_F, \
Y0_F, A2_F, X2_F, Y0_F, Y0_F, A3_F, X3_F, Y0_F
.endm
.macro SGEMV_N_8x2
GLD_INC xv, , 0x20, \
A0, PA0, 0, A1, PA1, 0
GMADD xvf, s, Y0, A0, X0, Y0, Y0, A1, X1, Y0
.endm
.macro SGEMV_N_1x2
GLD_INC f, s, 0x04, \
A0_F, PA0, 0, A1_F, PA1, 0
GMADD f, s, Y0_F, A0_F, X0_F, Y0_F, Y0_F, A1_F, X1_F, Y0_F
.endm
.macro SGEMV_N_1x1
GLD_INC f, s, 0x04, A0_F, PA0, 0
GMADD f, s, Y0_F, A0_F, X0_F, Y0_F
.endm
.macro SSTORE_Y_8
GST xv, , Y0, Y, 0
.endm
.macro SSTORE_Y_8_GAP
xvstelm.w Y0, Y, 0, 0
PTR_ADD T0, Y, INC_Y
xvstelm.w Y0, T0, 0, 1
PTR_ADD T0, T0, INC_Y
xvstelm.w Y0, T0, 0, 2
PTR_ADD T0, T0, INC_Y
xvstelm.w Y0, T0, 0, 3
PTR_ADD T0, T0, INC_Y
xvstelm.w Y0, T0, 0, 4
PTR_ADD T0, T0, INC_Y
xvstelm.w Y0, T0, 0, 5
PTR_ADD T0, T0, INC_Y
xvstelm.w Y0, T0, 0, 6
PTR_ADD T0, T0, INC_Y
xvstelm.w Y0, T0, 0, 7
.endm
.macro SSTORE_Y_1
GST f, s, Y0_F, Y, 0
.endm
.macro SGEMV_N_LASX XW:req, X_8:req, X_4:req, X_2:req, X_1:req, Y_8:req, Y_4:req, Y_1:req
PTR_SRLI J, N, 3
beqz J, .L_\XW\()_N_7
PTR_SLLI K_LDA, LDA, 3
PTR_SUB K_LDA, K_LDA, M4
.L_\XW\()_N_L8:
SLOAD_\X_8
xor K, K, K
move Y, Y_ORG
PTR_SRLI I, M, 3
beqz I, .L_\XW\()_M_7
.align 5
.L_\XW\()_M_L8:
SLOAD_\Y_8
SGEMV_N_8x8
SSTORE_\Y_8
PTR_ADDI I, I, -1
PTR_ALSL Y, INC_Y, Y, 3
PTR_ADDI K, K, 8
bnez I, .L_\XW\()_M_L8
.L_\XW\()_M_7:
andi I, M, 7
beqz I, .L_\XW\()_M_END
.align 5
.L_\XW\()_M_L1:
SLOAD_\Y_1
SGEMV_N_1x8
SSTORE_\Y_1
PTR_ADDI I, I, -1
PTR_ADD Y, Y, INC_Y
PTR_ADDI K, K, 1
bnez I, .L_\XW\()_M_L1
.L_\XW\()_M_END:
PTR_ADDI J, J, -1
#if __loongarch_grlen == 64
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \
PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA
#elif __loongarch_grlen == 32
GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \
PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA
#else
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \
PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA
#endif
PTR_ALSL X, INC_X, X, 3
bnez J, .L_\XW\()_N_L8
.L_\XW\()_N_7:
andi J, N, 4
beqz J, .L_\XW\()_N_3
SLOAD_\X_4
xor K, K, K
move Y, Y_ORG
PTR_SRLI I, M, 3
beqz I, .L_\XW\()_N_4_M_7
.align 5
.L_\XW\()_N_4_M_L8:
SLOAD_\Y_8
SGEMV_N_8x4
SSTORE_\Y_8
PTR_ADDI I, I, -1
PTR_ADDI K, K, 8
PTR_ALSL Y, INC_Y, Y, 3
bnez I, .L_\XW\()_N_4_M_L8
.L_\XW\()_N_4_M_7:
andi I, M, 7
beqz I, .L_\XW\()_N_4_M_END
.align 5
.L_\XW\()_N_4_M_L1:
SLOAD_\Y_1
SGEMV_N_1x4
SSTORE_\Y_1
PTR_ADDI I, I, -1
PTR_ADD Y, Y, INC_Y
PTR_ADDI K, K, 1
bnez I, .L_\XW\()_N_4_M_L1
.L_\XW\()_N_4_M_END:
PTR_SLLI K_LDA, LDA, 2
PTR_SUB K_LDA, K_LDA, M4
#if __loongarch_grlen == 64
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA
#elif __loongarch_grlen == 32
GADD , w PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA
#else
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA
#endif
PTR_ALSL X, INC_X, X, 2
.L_\XW\()_N_3:
andi J, N, 2
beqz J, .L_\XW\()_N_1
SLOAD_\X_2
xor K, K, K
move Y, Y_ORG
PTR_SRLI I, M, 3
beqz I, .L_\XW\()_N_2_M_7
.align 5
.L_\XW\()_N_2_M_L8:
SLOAD_\Y_8
SGEMV_N_8x2
SSTORE_\Y_8
PTR_ADDI I, I, -1
PTR_ADDI K, K, 8
PTR_ALSL Y, INC_Y, Y, 3
bnez I, .L_\XW\()_N_2_M_L8
.L_\XW\()_N_2_M_7:
andi I, M, 7
beqz I, .L_\XW\()_N_2_M_END
.align 5
.L_\XW\()_N_2_M_L1:
SLOAD_\Y_1
SGEMV_N_1x2
SSTORE_\Y_1
PTR_ADDI I, I, -1
PTR_ADD Y, Y, INC_Y
PTR_ADDI K, K, 1
bnez I, .L_\XW\()_N_2_M_L1
.L_\XW\()_N_2_M_END:
PTR_SLLI K_LDA, LDA, 1
PTR_SUB K_LDA, K_LDA, M4
PTR_ADD PA0, PA0, K_LDA
PTR_ADD PA1, PA1, K_LDA
PTR_ALSL X, INC_X, X, 1
.L_\XW\()_N_1:
andi J, N, 1
beqz J, .L_END
SLOAD_\X_1
xor K, K, K
move Y, Y_ORG
move I, M
beqz I, .L_END
.align 5
.L_\XW\()_N_1_M_L1:
SLOAD_\Y_1
SGEMV_N_1x1
SSTORE_\Y_1
PTR_ADDI I, I, -1
PTR_ADD Y, Y, INC_Y
PTR_ADDI K, K, 1
bnez I, .L_\XW\()_N_1_M_L1
b .L_END
.endm
PROLOGUE
PTR_LD INC_Y, $sp, 0
push_if_used 17 + 7, 19
PTR_ADDI K, $r0, 0x01
PTR_SUB I, INC_X, K
PTR_SUB J, INC_Y, K
maskeqz I, K, I /* if(inc_x == 1) I = 0; else I = 1; */
maskeqz J, K, J /* if(inc_y == 1) j = 0; else j = 1; */
PTR_ALSL I, I, J, 1
GSLLI , d, LDA, LDA, 2, INC_X, INC_X, 2, INC_Y, INC_Y, 2, M4, M, 2
xvreplve0.w VALPHA, $xr0
move Y_ORG, Y
move PA0, A
#if __loongarch_grlen == 64
GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \
PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA
#elif __loongarch_grlen == 32
GADD , w, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \
PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA
#else
GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \
PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA
#endif
la.local T0, .L_GAP_TABLE
PTR_ALSL I, I, T0, 1
ld.h K, I, 0
PTR_ADD T0, T0, K
jirl $r0, T0, 0
.L_GAP_TABLE:
.hword .L_GAP_0_0 - .L_GAP_TABLE
.hword .L_GAP_0_1 - .L_GAP_TABLE
.hword .L_GAP_1_0 - .L_GAP_TABLE
.hword .L_GAP_1_1 - .L_GAP_TABLE
.L_GAP_0_0: /* if (inc_x == 1) && (incy == 1) */
SGEMV_N_LASX GAP_0_0, X_8, X_4, X_2, X_1, Y_8, Y_4, Y_1
.L_GAP_0_1: /* if (inc_x == 1) && (incy != 1) */
SGEMV_N_LASX GAP_0_1, X_8, X_4, X_2, X_1, Y_8_GAP, Y_4_GAP, Y_1
.L_GAP_1_0: /* if (inc_x != 1) && (incy == 1) */
SGEMV_N_LASX GAP_1_0, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8, Y_4, Y_1
.L_GAP_1_1: /* if (inc_x != 1) && (incy != 1) */
SGEMV_N_LASX GAP_1_1, X_8_GAP, X_4_GAP, X_2_GAP, X_1, Y_8_GAP, Y_4_GAP, Y_1
.L_END:
pop_if_used 17 + 7, 19
jirl $r0, $r1, 0x0
EPILOGUE

View File

@ -0,0 +1,405 @@
/*******************************************************************************
Copyright (c) 2023, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#define ASSEMBLER
#include "common.h"
#include "loongarch64_asm.S"
/*********************************************************************
* 2023/08/30 guxiwei
* UTEST : OK
* CTEST : OK
* TEST : OK
*
*
*********************************************************************/
/* int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha,
* FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
*/
#define M $r4
#define N $r5
#define ALPHA $f0
#define A $r7
#define LDA $r8
#define X $r9
#define INC_X $r10
#define Y $r11
#define INC_Y $r6
#define J $r12
#define I $r13
#define K $r14
#define PY0 $r14
#define X_ORG $r15
#define PY1 $r16
#define K_LDA $r17
#define PY2 $r18
#define T0 $r19
#define PA0 $r20
#define PA1 $r23
#define PA2 $r24
#define PA3 $r25
#define PA4 $r26
#define PA5 $r27
#define PA6 $r28
#define PA7 $r29
#define M4 $r30
#define VALPHA $xr0
#define X0 $xr1
#define A0 $xr2
#define A1 $xr3
#define A2 $xr4
#define A3 $xr5
#define A4 $xr6
#define A5 $xr7
#define A6 $xr8
#define A7 $xr9
#define TP0 $xr10
#define TP1 $xr11
#define TP2 $xr12
#define TP3 $xr13
#define TP4 $xr14
#define TP5 $xr15
#define TP6 $xr16
#define TP7 $xr17
#define Y0 $xr2
#define Y1 $xr3
#define Y2 $xr4
#define Y3 $xr5
#define Y4 $xr6
#define Y5 $xr7
#define Y6 $xr8
#define Y7 $xr9
.macro ZERO_Y8
GXOR xv, v, TP0, TP0, TP0, TP1, TP1, TP1, TP2, TP2, TP2, TP3, TP3, TP3, \
TP4, TP4, TP4, TP5, TP5, TP5, TP6, TP6, TP6, TP7, TP7, TP7
.endm
.macro ZERO_Y4
GXOR xv, v, TP0, TP0, TP0, TP1, TP1, TP1, TP2, TP2, TP2, TP3, TP3, TP3
.endm
.macro ZERO_Y2
GXOR xv, v, TP0, TP0, TP0, TP1, TP1, TP1
.endm
.macro ZERO_Y1
GXOR xv, v, TP0, TP0, TP0
.endm
.macro SLOAD_X8
GLD xv, , X0, X, 0x00
.endm
.macro SLOAD_X8_GAP
fld.s $f1, X, 0x00
fldx.s $f2, X, INC_X
PTR_ALSL T0, INC_X, X, 1
fld.s $f3, T0, 0x00
fldx.s $f4, T0, INC_X
GINSVE0 xv, w, X0, A0, 1, X0, A1, 2, X0, A2, 3
PTR_ALSL T0, INC_X, X, 2
fld.s $f2, T0, 0x00
fldx.s $f3, T0, INC_X
PTR_ALSL T0, INC_X, T0, 1
fld.s $f4, T0, 0x00
fldx.s $f5, T0, INC_X
GINSVE0 xv, w, X0, A0, 4, X0, A1, 5, X0, A2, 6, X0, A3, 7
.endm
.macro SGEMV_T_8x8
GLD_INC xv, , 0x20, \
A0, PA0, 0, A1, PA1, 0, \
A2, PA2, 0, A3, PA3, 0, \
A4, PA4, 0, A5, PA5, 0, \
A6, PA6, 0, A7, PA7, 0
GMADD xvf, s, TP0, A0, X0, TP0, TP1, A1, X0, TP1, \
TP2, A2, X0, TP2, TP3, A3, X0, TP3, \
TP4, A4, X0, TP4, TP5, A5, X0, TP5, \
TP6, A6, X0, TP6, TP7, A7, X0, TP7
.endm
.macro SGEMV_T_4x8
GLD_INC xv, , 0x20, \
A0, PA0, 0, A1, PA1, 0, \
A2, PA2, 0, A3, PA3, 0
GMADD xvf, s, TP0, A0, X0, TP0, TP1, A1, X0, TP1, \
TP2, A2, X0, TP2, TP3, A3, X0, TP3
.endm
.macro SGEMV_T_2x8
GLD_INC xv, , 0x20, \
A0, PA0, 0, A1, PA1, 0
GMADD xvf, s, TP0, A0, X0, TP0, TP1, A1, X0, TP1
.endm
.macro SGEMV_T_LASX XW:req X8:req, X4:req
PTR_SRLI J, N, 3
beqz J, .L_\XW\()_N_7
PTR_SLLI K_LDA, LDA, 3
PTR_SUB K_LDA, K_LDA, M4
.L_\XW\()_N_L8:
ZERO_Y8
move X, X_ORG
PTR_SRLI I, M, 3
beqz I, .L_\XW\()_M_7
.align 5
.L_\XW\()_M_L8:
SLOAD_\X8
SGEMV_T_8x8
PTR_ADDI I, I, -1
PTR_ALSL X, INC_X, X, 3
bnez I, .L_\XW\()_M_L8
.L_\XW\()_M_7:
// Accumulated
GACC xvf, s, Y0, TP0, Y1, TP1, Y2, TP2, Y3, TP3, Y4, TP4, \
Y5, TP5, Y6, TP6, Y7, TP7
andi I, M, 7
beqz I, .L_\XW\()_M_END
.align 5
.L_\XW\()_M_L1:
fld.s $f1, X, 0x00
fld.s $f10, PA0, 0x00
fld.s $f11, PA1, 0x00
fld.s $f12, PA2, 0x00
fld.s $f13, PA3, 0x00
fld.s $f14, PA4, 0x00
fld.s $f15, PA5, 0x00
fld.s $f16, PA6, 0x00
fld.s $f17, PA7, 0x00
#if __loongarch_grlen == 64
GADDI , d, PA0, PA0, 0x04, PA1, PA1, 0x04, PA2, PA2, 0x04, PA3, PA3, 0x04, \
PA4, PA4, 0x04, PA5, PA5, 0x04, PA6, PA6, 0x04, PA7, PA7, 0x04
#elif __loongarch_grlen == 32
GADDI , w, PA0, PA0, 0x04, PA1, PA1, 0x04, PA2, PA2, 0x04, PA3, PA3, 0x04, \
PA4, PA4, 0x04, PA5, PA5, 0x04, PA6, PA6, 0x04, PA7, PA7, 0x04
#else
GADDI , d, PA0, PA0, 0x04, PA1, PA1, 0x04, PA2, PA2, 0x04, PA3, PA3, 0x04, \
PA4, PA4, 0x04, PA5, PA5, 0x04, PA6, PA6, 0x04, PA7, PA7, 0x04
#endif
GMADD f, s, $f2, $f10, $f1, $f2, $f3, $f11, $f1, $f3, $f4, $f12, $f1, $f4, $f5, $f13, $f1, $f5, \
$f6, $f14, $f1, $f6, $f7, $f15, $f1, $f7, $f8, $f16, $f1, $f8, $f9, $f17, $f1, $f9,
PTR_ADDI I, I, -1
PTR_ADD X, X, INC_X
bnez I, .L_\XW\()_M_L1
.L_\XW\()_M_END:
fld.s $f10, Y, 0x00
fldx.s $f11, Y, INC_Y
PTR_ALSL PY0, INC_Y, Y, 1
fld.s $f12, PY0, 0x00
fldx.s $f13, PY0, INC_Y
PTR_ALSL PY1, INC_Y, Y, 2
fld.s $f14, PY1, 0x00
fldx.s $f15, PY1, INC_Y
PTR_ALSL PY2, INC_Y, PY1, 1
fld.s $f16, PY2, 0x00
fldx.s $f17, PY2, INC_Y
GMADD f, s, $f10, ALPHA, $f2, $f10, $f11, ALPHA, $f3, $f11, $f12, ALPHA, $f4, $f12, $f13, ALPHA, $f5, $f13, \
$f14, ALPHA, $f6, $f14, $f15, ALPHA, $f7, $f15, $f16, ALPHA, $f8, $f16, $f17, ALPHA, $f9, $f17
PTR_ADDI J, J, -1
#if __loongarch_grlen == 64
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \
PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA
#elif __loongarch_grlen == 32
GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \
PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA
#else
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA, \
PA4, PA4, K_LDA, PA5, PA5, K_LDA, PA6, PA6, K_LDA, PA7, PA7, K_LDA
#endif
fst.s $f10, Y, 0x00
fstx.s $f11, Y, INC_Y
fst.s $f12, PY0, 0x00
fstx.s $f13, PY0, INC_Y
fst.s $f14, PY1, 0x00
fstx.s $f15, PY1, INC_Y
fst.s $f16, PY2, 0x00
fstx.s $f17, PY2, INC_Y
PTR_ALSL Y, INC_Y, Y, 3
bnez J, .L_\XW\()_N_L8
.L_\XW\()_N_7:
andi J, N, 4
beqz J, .L_\XW\()_N_3
ZERO_Y4
move X, X_ORG
PTR_SRLI I, M, 3
beqz I, .L_\XW\()_N_4_M_7
.align 5
.L_\XW\()_N_4_M_L8:
SLOAD_\X8
SGEMV_T_4x8
PTR_ADDI I, I, -1
PTR_ALSL X, INC_X, X, 3
bnez I, .L_\XW\()_N_4_M_L8
.L_\XW\()_N_4_M_7:
// Accumulated
GACC xvf, s, Y0, TP0, Y1, TP1, Y2, TP2, Y3, TP3
andi I, M, 7
beqz I, .L_\XW\()_N_4_M_END
.align 5
.L_\XW\()_N_4_M_L1:
fld.s $f1, X, 0x00
GLD_INC f, s, 0x04, $f10, PA0, 0x00, $f11, PA1, 0x00, $f12, PA2, 0x00, $f13, PA3, 0x00
GMADD f, s, $f2, $f10, $f1, $f2, $f3, $f11, $f1, $f3, $f4, $f12, $f1, $f4, $f5, $f13, $f1, $f5
PTR_ADDI I, I, -1
PTR_ADD X, X, INC_X
bnez I, .L_\XW\()_N_4_M_L1
.L_\XW\()_N_4_M_END:
fld.s $f10, Y, 0x00
fldx.s $f11, Y, INC_Y
PTR_ALSL PY0, INC_Y, Y, 1
fld.s $f12, PY0, 0x00
fldx.s $f13, PY0, INC_Y
GMADD f, s, $f10, ALPHA, $f2, $f10, $f11, ALPHA, $f3, $f11, $f12, ALPHA, $f4, $f12, $f13, ALPHA, $f5, $f13
PTR_SLLI K_LDA, LDA, 2
PTR_SUB K_LDA, K_LDA, M4
#if __loongarch_grlen == 64
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA
#elif __loongarch_grlen == 32
GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA
#else
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA, PA2, PA2, K_LDA, PA3, PA3, K_LDA
#endif
fst.s $f10, Y, 0x00
fstx.s $f11, Y, INC_Y
fst.s $f12, PY0, 0x00
fstx.s $f13, PY0, INC_Y
PTR_ALSL Y, INC_Y, Y, 2
.L_\XW\()_N_3:
andi J, N, 2
beqz J, .L_\XW\()_N_1
ZERO_Y2
move X, X_ORG
PTR_SRLI I, M, 3
beqz I, .L_\XW\()_N_2_M_7
.align 5
.L_\XW\()_N_2_M_L8:
SLOAD_\X8
SGEMV_T_2x8
PTR_ADDI I, I, -1
PTR_ALSL X, INC_X, X, 3
bnez I, .L_\XW\()_N_2_M_L8
.L_\XW\()_N_2_M_7:
// Accumulated
GACC xvf, s, Y0, TP0, Y1, TP1
andi I, M, 7
beqz I, .L_\XW\()_N_2_M_END
.align 5
.L_\XW\()_N_2_M_L1:
fld.s $f1, X, 0x00
GLD_INC f, s, 0x04, $f10, PA0, 0x00, $f11, PA1, 0x00
GMADD f, s, $f2, $f10, $f1, $f2, $f3, $f11, $f1, $f3
PTR_ADDI I, I, -1
PTR_ADD X, X, INC_X
bnez I, .L_\XW\()_N_2_M_L1
.L_\XW\()_N_2_M_END:
fld.s $f10, Y, 0x00
fldx.s $f11, Y, INC_Y
GMADD f, s, $f10, ALPHA, $f2, $f10, $f11, ALPHA, $f3, $f11
PTR_SLLI K_LDA, LDA, 1
PTR_SUB K_LDA, K_LDA, M4
#if __loongarch_grlen == 64
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA
#elif __loongarch_grlen == 32
GADD , w, PA0, PA0, K_LDA, PA1, PA1, K_LDA
#else
GADD , d, PA0, PA0, K_LDA, PA1, PA1, K_LDA
#endif
fst.s $f10, Y, 0x00
fstx.s $f11, Y, INC_Y
PTR_ALSL Y, INC_Y, Y, 1
.L_\XW\()_N_1:
andi J, N, 1
beqz J, .L_END
ZERO_Y1
move X, X_ORG
move I, M
beqz I, .L_END
.align 5
.L_\XW\()_N_1_M_L1:
fld.s $f2, PA0, 0x00
fld.s $f1, X, 0x00
fmadd.s $f10, $f2, $f1, $f10
PTR_ADDI I, I, -1
PTR_ADD X, X, INC_X
PTR_ADDI PA0, PA0, 0x04
bnez I, .L_\XW\()_N_1_M_L1
fld.s $f2, Y, 0x00
fmadd.s $f2, ALPHA, $f10, $f2
fst.s $f2, Y, 0x00
b .L_END
.endm
PROLOGUE
PTR_LD INC_Y, $sp, 0
push_if_used 17 + 8, 18
PTR_ADDI K, $r0, 0x01
PTR_SUB I, INC_X, K
maskeqz I, K, I /* if(inc_x == 1) I = 0; else I = 1; */
GSLLI , d, LDA, LDA, 2, INC_X, INC_X, 2, INC_Y, INC_Y, 2, M4, M, 2
xvreplve0.w VALPHA, $xr0
move X_ORG, X
move PA0, A
#if __loongarch_grlen == 64
GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \
PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA
#elif __loongarch_grlen == 32
GADD , w, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \
PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA
#else
GADD , d, PA1, PA0, LDA, PA2, PA1, LDA, PA3, PA2, LDA, PA4, PA3, LDA, \
PA5, PA4, LDA, PA6, PA5, LDA, PA7, PA6, LDA
#endif
la.local T0, .L_GAP_TABLE
PTR_ALSL I, I, T0, 1
ld.h K, I, 0
PTR_ADD T0, T0, K
jirl $r0, T0, 0
.L_GAP_TABLE:
.hword .L_GAP_0 - .L_GAP_TABLE
.hword .L_GAP_1 - .L_GAP_TABLE
.L_GAP_0: /* if (incx == 1) */
SGEMV_T_LASX GAP_0, X8, X4
.L_GAP_1: /* if (incx != 1) */
SGEMV_T_LASX GAP_1, X8_GAP, X4_GAP
.L_END:
pop_if_used 17 + 8, 18
jirl $r0, $r1, 0x0
EPILOGUE

View File

@ -1,6 +1,3 @@
ifeq ($(HAVE_GAS), 1)
include $(KERNELDIR)/KERNEL.POWER8
else
#SGEMM_BETA = ../generic/gemm_beta.c
#DGEMM_BETA = ../generic/gemm_beta.c
#CGEMM_BETA = ../generic/zgemm_beta.c
@ -19,8 +16,13 @@ SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRMMKERNEL = sgemm_kernel_power10.c
DTRMMKERNEL = dgemm_kernel_power10.c
ifeq ($(OSNAME), AIX)
CTRMMKERNEL = ctrmm_kernel_8x4_power8.S
ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S
else
CTRMMKERNEL = cgemm_kernel_power10.S
ZTRMMKERNEL = zgemm_kernel_power10.S
endif
SGEMMKERNEL = sgemm_kernel_power10.c
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
@ -62,10 +64,18 @@ DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_power10.c
DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_power10.c
DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_power10.c
ifeq ($(OSNAME), AIX)
CGEMMKERNEL = cgemm_kernel_8x4_power8.S
else
CGEMMKERNEL = cgemm_kernel_power10.S
endif
#CGEMMKERNEL = cgemm_kernel_8x4_power8.S
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
ifeq ($(OSNAME), AIX)
CGEMMITCOPY = cgemm_tcopy_8_power8.S
else
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
endif
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
@ -73,7 +83,11 @@ CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
ifeq ($(OSNAME), AIX)
ZGEMMKERNEL = zgemm_kernel_8x2_power8.S
else
ZGEMMKERNEL = zgemm_kernel_power10.S
endif
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c
@ -124,6 +138,7 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
#SMINKERNEL = ../arm/min.c
#DMINKERNEL = ../arm/min.c
#
ifeq ($(C_COMPILER), GCC)
ifneq ($(GCCVERSIONGTEQ9),1)
ISAMAXKERNEL = isamax_power9.S
else
@ -148,6 +163,15 @@ ICAMINKERNEL = icamin_power9.S
else
ICAMINKERNEL = icamin.c
endif
else
ISAMAXKERNEL = isamax.c
IDAMAXKERNEL = idamax.c
ICAMAXKERNEL = icamax.c
IZAMAXKERNEL = izamax.c
ISAMINKERNEL = isamin.c
IDAMINKERNEL = idamin.c
ICAMINKERNEL = icamin.c
endif
IZAMINKERNEL = izamin.c
#
#ISMAXKERNEL = ../arm/imax.c
@ -238,4 +262,3 @@ QCABS_KERNEL = ../generic/cabs.c
#Dump kernel
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
endif

View File

@ -4,7 +4,7 @@
#define ABS_K(a) ((a) > 0 ? (a) : (-(a)))
#endif
#if defined(SKYLAKEX)
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
#include "casum_microk_skylakex-2.c"
#endif

View File

@ -4,7 +4,7 @@
#define ABS_K(a) ((a) > 0 ? (a) : (-(a)))
#endif
#if defined(SKYLAKEX)
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
#include "dasum_microk_skylakex-2.c"
#elif defined(HASWELL) || defined(ZEN)
#include "dasum_microk_haswell-2.c"

View File

@ -159,7 +159,7 @@ static int dot_thread_function(BLASLONG n, BLASLONG dummy0,
extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n,
BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int nthreads);
void *c, BLASLONG ldc, int (*function)(void), int nthreads);
#endif
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)

View File

@ -169,7 +169,7 @@ static int rot_thread_function(blas_arg_t *args)
return 0;
}
extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(), int nthreads);
extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(void), int nthreads);
#endif
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)
{

View File

@ -9,7 +9,7 @@
#endif
#if defined(SKYLAKEX)
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
#include "sasum_microk_skylakex-2.c"
#elif defined(HASWELL) || defined(ZEN)
#include "sasum_microk_haswell-2.c"

View File

@ -171,7 +171,7 @@ static int rot_thread_function(blas_arg_t *args)
return 0;
}
extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(), int nthreads);
extern int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(void), int nthreads);
#endif
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)
{

View File

@ -4,7 +4,7 @@
#define ABS_K(a) ((a) > 0 ? (a) : (-(a)))
#endif
#if defined(SKYLAKEX)
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
#include "zasum_microk_skylakex-2.c"
#endif

View File

@ -92,7 +92,7 @@ static void zdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d)
#if defined(SMP)
extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n,
BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int nthreads);
void *c, BLASLONG ldc, int (*function)(void), int nthreads);
#endif

View File

@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@ -261,24 +260,7 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
@ -291,217 +273,7 @@ static double dpow_ui(double x, integer n) {
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)

View File

@ -223,7 +223,6 @@ typedef struct Namelist Namelist;
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@ -237,145 +236,5 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif

View File

@ -223,7 +223,6 @@ typedef struct Namelist Namelist;
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@ -237,149 +236,10 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
/* -- translated by f2c (version 20000121).
/*
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/

View File

@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@ -261,248 +260,8 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
/* -- translated by f2c (version 20000121).
/*
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/

View File

@ -223,7 +223,6 @@ typedef struct Namelist Namelist;
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@ -237,149 +236,10 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
/* -- translated by f2c (version 20000121).
/*
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/

View File

@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@ -261,11 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
@ -279,229 +273,6 @@ static float spow_ui(float x, integer n) {
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)

View File

@ -223,7 +223,6 @@ typedef struct Namelist Namelist;
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@ -237,145 +236,5 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif

View File

@ -39,7 +39,7 @@ void LAPACKE_set_nancheck( int flag )
nancheck_flag = ( flag ) ? 1 : 0;
}
int LAPACKE_get_nancheck( )
int LAPACKE_get_nancheck( void )
{
char* env;
if ( nancheck_flag != -1 ) {

View File

@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@ -261,247 +260,7 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)

View File

@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@ -261,247 +260,6 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)

View File

@ -0,0 +1,479 @@
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <complex.h>
#ifdef complex
#undef complex
#endif
#ifdef I
#undef I
#endif
#if defined(_WIN64)
typedef long long BLASLONG;
typedef unsigned long long BLASULONG;
#else
typedef long BLASLONG;
typedef unsigned long BLASULONG;
#endif
#ifdef LAPACK_ILP64
typedef BLASLONG blasint;
#if defined(_WIN64)
#define blasabs(x) llabs(x)
#else
#define blasabs(x) labs(x)
#endif
#else
typedef int blasint;
#define blasabs(x) abs(x)
#endif
typedef blasint integer;
typedef unsigned int uinteger;
typedef char *address;
typedef short int shortint;
typedef float real;
typedef double doublereal;
typedef struct { real r, i; } complex;
typedef struct { doublereal r, i; } doublecomplex;
#ifdef _MSC_VER
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
#else
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
#endif
#define pCf(z) (*_pCf(z))
#define pCd(z) (*_pCd(z))
typedef int logical;
typedef short int shortlogical;
typedef char logical1;
typedef char integer1;
#define TRUE_ (1)
#define FALSE_ (0)
/* Extern is for use with -E */
#ifndef Extern
#define Extern extern
#endif
/* I/O stuff */
typedef int flag;
typedef int ftnlen;
typedef int ftnint;
/*external read, write*/
typedef struct
{ flag cierr;
ftnint ciunit;
flag ciend;
char *cifmt;
ftnint cirec;
} cilist;
/*internal read, write*/
typedef struct
{ flag icierr;
char *iciunit;
flag iciend;
char *icifmt;
ftnint icirlen;
ftnint icirnum;
} icilist;
/*open*/
typedef struct
{ flag oerr;
ftnint ounit;
char *ofnm;
ftnlen ofnmlen;
char *osta;
char *oacc;
char *ofm;
ftnint orl;
char *oblnk;
} olist;
/*close*/
typedef struct
{ flag cerr;
ftnint cunit;
char *csta;
} cllist;
/*rewind, backspace, endfile*/
typedef struct
{ flag aerr;
ftnint aunit;
} alist;
/* inquire */
typedef struct
{ flag inerr;
ftnint inunit;
char *infile;
ftnlen infilen;
ftnint *inex; /*parameters in standard's order*/
ftnint *inopen;
ftnint *innum;
ftnint *innamed;
char *inname;
ftnlen innamlen;
char *inacc;
ftnlen inacclen;
char *inseq;
ftnlen inseqlen;
char *indir;
ftnlen indirlen;
char *infmt;
ftnlen infmtlen;
char *inform;
ftnint informlen;
char *inunf;
ftnlen inunflen;
ftnint *inrecl;
ftnint *innrec;
char *inblank;
ftnlen inblanklen;
} inlist;
#define VOID void
union Multitype { /* for multiple entry points */
integer1 g;
shortint h;
integer i;
/* longint j; */
real r;
doublereal d;
complex c;
doublecomplex z;
};
typedef union Multitype Multitype;
struct Vardesc { /* for Namelist */
char *name;
char *addr;
ftnlen *dims;
int type;
};
typedef struct Vardesc Vardesc;
struct Namelist {
char *name;
Vardesc **vars;
int nvars;
};
typedef struct Namelist Namelist;
#define abs(x) ((x) >= 0 ? (x) : -(x))
#define dabs(x) (fabs(x))
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
#define dmin(a,b) (f2cmin(a,b))
#define dmax(a,b) (f2cmax(a,b))
#define bit_test(a,b) ((a) >> (b) & 1)
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b)))
#define bit_set(a,b) ((a) | ((uinteger)1 << (b)))
#define abort_() { sig_die("Fortran abort routine called", 1); }
#define c_abs(z) (cabsf(Cf(z)))
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
#ifdef _MSC_VER
#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
#else
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
#endif
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
#define d_abs(x) (fabs(*(x)))
#define d_acos(x) (acos(*(x)))
#define d_asin(x) (asin(*(x)))
#define d_atan(x) (atan(*(x)))
#define d_atn2(x, y) (atan2(*(x),*(y)))
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
#define d_cos(x) (cos(*(x)))
#define d_cosh(x) (cosh(*(x)))
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
#define d_exp(x) (exp(*(x)))
#define d_imag(z) (cimag(Cd(z)))
#define r_imag(z) (cimagf(Cf(z)))
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
#define d_log(x) (log(*(x)))
#define d_mod(x, y) (fmod(*(x), *(y)))
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
#define d_nint(x) u_nint(*(x))
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
#define d_sign(a,b) u_sign(*(a),*(b))
#define r_sign(a,b) u_sign(*(a),*(b))
#define d_sin(x) (sin(*(x)))
#define d_sinh(x) (sinh(*(x)))
#define d_sqrt(x) (sqrt(*(x)))
#define d_tan(x) (tan(*(x)))
#define d_tanh(x) (tanh(*(x)))
#define i_abs(x) abs(*(x))
#define i_dnnt(x) ((integer)u_nint(*(x)))
#define i_len(s, n) (n)
#define i_nint(x) ((integer)u_nint(*(x)))
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
#define pow_si(B,E) spow_ui(*(B),*(E))
#define pow_ri(B,E) spow_ui(*(B),*(E))
#define pow_di(B,E) dpow_ui(*(B),*(E))
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; }
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
#define myexit_() break;
#define mycycle_() continue;
#define myceiling_(w) {ceil(w)}
#define myhuge_(w) {HUGE_VAL}
#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Table of constant values */
static complex c_b1 = {0.f,0.f};
static complex c_b2 = {1.f,0.f};
/* > \brief \b CGELQS */
/* =========== DOCUMENTATION =========== */
/* Online html documentation available at */
/* http://www.netlib.org/lapack/explore-html/ */
/* Definition: */
/* =========== */
/* SUBROUTINE CGELQS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, */
/* INFO ) */
/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */
/* COMPLEX A( LDA, * ), B( LDB, * ), TAU( * ), */
/* $ WORK( LWORK ) */
/* > \par Purpose: */
/* ============= */
/* > */
/* > \verbatim */
/* > */
/* > Compute a minimum-norm solution */
/* > f2cmin || A*X - B || */
/* > using the LQ factorization */
/* > A = L*Q */
/* > computed by CGELQF. */
/* > \endverbatim */
/* Arguments: */
/* ========== */
/* > \param[in] M */
/* > \verbatim */
/* > M is INTEGER */
/* > The number of rows of the matrix A. M >= 0. */
/* > \endverbatim */
/* > */
/* > \param[in] N */
/* > \verbatim */
/* > N is INTEGER */
/* > The number of columns of the matrix A. N >= M >= 0. */
/* > \endverbatim */
/* > */
/* > \param[in] NRHS */
/* > \verbatim */
/* > NRHS is INTEGER */
/* > The number of columns of B. NRHS >= 0. */
/* > \endverbatim */
/* > */
/* > \param[in] A */
/* > \verbatim */
/* > A is COMPLEX array, dimension (LDA,N) */
/* > Details of the LQ factorization of the original matrix A as */
/* > returned by CGELQF. */
/* > \endverbatim */
/* > */
/* > \param[in] LDA */
/* > \verbatim */
/* > LDA is INTEGER */
/* > The leading dimension of the array A. LDA >= M. */
/* > \endverbatim */
/* > */
/* > \param[in] TAU */
/* > \verbatim */
/* > TAU is COMPLEX array, dimension (M) */
/* > Details of the orthogonal matrix Q. */
/* > \endverbatim */
/* > */
/* > \param[in,out] B */
/* > \verbatim */
/* > B is COMPLEX array, dimension (LDB,NRHS) */
/* > On entry, the m-by-nrhs right hand side matrix B. */
/* > On exit, the n-by-nrhs solution matrix X. */
/* > \endverbatim */
/* > */
/* > \param[in] LDB */
/* > \verbatim */
/* > LDB is INTEGER */
/* > The leading dimension of the array B. LDB >= N. */
/* > \endverbatim */
/* > */
/* > \param[out] WORK */
/* > \verbatim */
/* > WORK is COMPLEX array, dimension (LWORK) */
/* > \endverbatim */
/* > */
/* > \param[in] LWORK */
/* > \verbatim */
/* > LWORK is INTEGER */
/* > The length of the array WORK. LWORK must be at least NRHS, */
/* > and should be at least NRHS*NB, where NB is the block size */
/* > for this environment. */
/* > \endverbatim */
/* > */
/* > \param[out] INFO */
/* > \verbatim */
/* > INFO is INTEGER */
/* > = 0: successful exit */
/* > < 0: if INFO = -i, the i-th argument had an illegal value */
/* > \endverbatim */
/* Authors: */
/* ======== */
/* > \author Univ. of Tennessee */
/* > \author Univ. of California Berkeley */
/* > \author Univ. of Colorado Denver */
/* > \author NAG Ltd. */
/* > \ingroup complex_lin */
/* ===================================================================== */
/* Subroutine */ int cgelqs_(integer *m, integer *n, integer *nrhs, complex *
a, integer *lda, complex *tau, complex *b, integer *ldb, complex *
work, integer *lwork, integer *info)
{
/* System generated locals */
integer a_dim1, a_offset, b_dim1, b_offset, i__1;
/* Local variables */
extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
integer *, integer *, complex *, complex *, integer *, complex *,
integer *), claset_(char *,
integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *), cunmlq_(char *, char
*, integer *, integer *, integer *, complex *, integer *, complex
*, complex *, integer *, complex *, integer *, integer *);
/* -- LAPACK test routine -- */
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
/* ===================================================================== */
/* Test the input parameters. */
/* Parameter adjustments */
a_dim1 = *lda;
a_offset = 1 + a_dim1 * 1;
a -= a_offset;
--tau;
b_dim1 = *ldb;
b_offset = 1 + b_dim1 * 1;
b -= b_offset;
--work;
/* Function Body */
*info = 0;
if (*m < 0) {
*info = -1;
} else if (*n < 0 || *m > *n) {
*info = -2;
} else if (*nrhs < 0) {
*info = -3;
} else if (*lda < f2cmax(1,*m)) {
*info = -5;
} else if (*ldb < f2cmax(1,*n)) {
*info = -8;
} else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) {
*info = -10;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_("CGELQS", &i__1);
return 0;
}
/* Quick return if possible */
if (*n == 0 || *nrhs == 0 || *m == 0) {
return 0;
}
/* Solve L*X = B(1:m,:) */
ctrsm_("Left", "Lower", "No transpose", "Non-unit", m, nrhs, &c_b2, &a[
a_offset], lda, &b[b_offset], ldb);
/* Set B(m+1:n,:) to zero */
if (*m < *n) {
i__1 = *n - *m;
claset_("Full", &i__1, nrhs, &c_b1, &c_b1, &b[*m + 1 + b_dim1], ldb);
}
/* B := Q' * B */
cunmlq_("Left", "Conjugate transpose", n, nrhs, m, &a[a_offset], lda, &
tau[1], &b[b_offset], ldb, &work[1], lwork, info);
return 0;
/* End of CGELQS */
} /* cgelqs_ */

Some files were not shown because too many files have changed in this diff Show More