Merge branch 'develop' into release-0.3.0
This commit is contained in:
commit
e46971b9d5
|
@ -2,6 +2,9 @@ name: continuous build
|
|||
|
||||
on: [push, pull_request]
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
@ -34,7 +37,7 @@ jobs:
|
|||
- name: Install Dependencies
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
sudo apt-get install -y gfortran cmake ccache
|
||||
sudo apt-get install -y gfortran cmake ccache libtinfo5
|
||||
elif [ "$RUNNER_OS" == "macOS" ]; then
|
||||
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed.
|
||||
brew reinstall gcc
|
||||
|
@ -150,6 +153,7 @@ jobs:
|
|||
matrix:
|
||||
msystem: [MINGW64, MINGW32, CLANG64]
|
||||
idx: [int32, int64]
|
||||
build-type: [Release]
|
||||
include:
|
||||
- msystem: MINGW64
|
||||
idx: int32
|
||||
|
@ -173,6 +177,11 @@ jobs:
|
|||
idx64-flags: -DBINARY=64 -DINTERFACE64=1
|
||||
target-prefix: mingw-w64-clang-x86_64
|
||||
c-lapack-flags: -DC_LAPACK=ON
|
||||
- msystem: MINGW64
|
||||
idx: int32
|
||||
target-prefix: mingw-w64-x86_64
|
||||
fc-pkg: mingw-w64-x86_64-gcc-fortran
|
||||
build-type: None
|
||||
exclude:
|
||||
- msystem: MINGW32
|
||||
idx: int64
|
||||
|
@ -215,11 +224,11 @@ jobs:
|
|||
path: C:/msys64/home/runneradmin/.ccache
|
||||
# We include the commit sha in the cache key, as new cache entries are
|
||||
# only created if there is no existing entry for the key yet.
|
||||
key: ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ github.ref }}-${{ github.sha }}
|
||||
key: ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}-${{ github.sha }}
|
||||
# Restore a matching ccache cache entry. Prefer same branch.
|
||||
restore-keys: |
|
||||
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ github.ref }}
|
||||
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}
|
||||
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}
|
||||
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}
|
||||
|
||||
- name: Configure ccache
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota.
|
||||
|
@ -235,7 +244,8 @@ jobs:
|
|||
- name: Configure OpenBLAS
|
||||
run: |
|
||||
mkdir build && cd build
|
||||
cmake -DBUILD_SHARED_LIBS=ON \
|
||||
cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DBUILD_STATIC_LIBS=ON \
|
||||
-DDYNAMIC_ARCH=ON \
|
||||
-DUSE_THREAD=ON \
|
||||
|
@ -257,3 +267,54 @@ jobs:
|
|||
- name: Run tests
|
||||
timeout-minutes: 60
|
||||
run: cd build && ctest
|
||||
|
||||
|
||||
cross_build:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: mips64el
|
||||
triple: mips64el-linux-gnuabi64
|
||||
opts: DYNAMIC_ARCH=1 TARGET=GENERIC
|
||||
- target: riscv64
|
||||
triple: riscv64-linux-gnu
|
||||
opts: TARGET=RISCV64_GENERIC
|
||||
- target: mipsel
|
||||
triple: mipsel-linux-gnu
|
||||
opts: TARGET=MIPS1004K
|
||||
- target: alpha
|
||||
triple: alpha-linux-gnu
|
||||
opts: TARGET=EV4
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo apt-get install -y ccache gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-${{ matrix.target }}-cross
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: |
|
||||
make -j$(nproc) HOSTCC="ccache gcc" CC="ccache ${{ matrix.triple }}-gcc" FC="ccache ${{ matrix.triple }}-gfortran" ARCH=${{ matrix.target }} ${{ matrix.opts }}
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
name: mips64 qemu test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: MIPS64_GENERIC
|
||||
triple: mips64el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=MIPS64_GENERIC
|
||||
- target: SICORTEX
|
||||
triple: mips64el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=SICORTEX
|
||||
- target: I6400
|
||||
triple: mipsisa64r6el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=I6400
|
||||
- target: P6600
|
||||
triple: mipsisa64r6el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=P6600
|
||||
- target: I6500
|
||||
triple: mipsisa64r6el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=I6500
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: install build deps
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
|
||||
gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-mips64el-cross
|
||||
|
||||
- name: checkout qemu
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: qemu/qemu
|
||||
path: qemu
|
||||
ref: 79dfa177ae348bb5ab5f97c0915359b13d6186e2
|
||||
|
||||
- name: build qemu
|
||||
run: |
|
||||
cd qemu
|
||||
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=mips64el-linux-user --disable-system
|
||||
make -j$(nproc)
|
||||
make install
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: build OpenBLAS
|
||||
run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
|
||||
|
||||
- name: test
|
||||
run: |
|
||||
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
|
||||
qemu-mips64el ./utest/openblas_utest
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat2 < ./ctest/sin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat2 < ./ctest/din2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat2 < ./ctest/cin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat2 < ./ctest/zin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat3 < ./ctest/sin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat3 < ./ctest/din3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat3 < ./ctest/cin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat3 < ./ctest/zin3
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat1
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat
|
|
@ -17,6 +17,10 @@ on:
|
|||
# it only makes sense to test if this file has been changed
|
||||
|
||||
name: Nightly-Homebrew-Build
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
build-OpenBLAS-with-Homebrew:
|
||||
runs-on: macos-latest
|
||||
|
@ -28,6 +32,8 @@ jobs:
|
|||
HOMEBREW_NO_AUTO_UPDATE: "ON"
|
||||
HOMEBREW_NO_BOTTLE_SOURCE_FALLBACK: "ON"
|
||||
HOMEBREW_NO_INSTALL_CLEANUP: "ON"
|
||||
HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: "ON"
|
||||
HOMEBREW_NO_INSTALL_FROM_API: "ON"
|
||||
|
||||
steps:
|
||||
- name: Random delay for cron job
|
||||
|
|
25
.travis.yml
25
.travis.yml
|
@ -30,7 +30,7 @@ matrix:
|
|||
before_script: &common-before
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32"
|
||||
script:
|
||||
- travis_wait 20 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- travis_wait 50 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
|
@ -104,7 +104,7 @@ matrix:
|
|||
- sudo apt-get update
|
||||
- sudo apt-get install gcc-9 gfortran-9 -y
|
||||
script:
|
||||
- travis_wait 20 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||
- travis_wait 50 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
|
@ -121,7 +121,7 @@ matrix:
|
|||
- sudo apt-get update
|
||||
- sudo apt-get install gcc-9 gfortran-9 -y
|
||||
script:
|
||||
- travis_wait 20 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||
- travis_wait 50 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
|
@ -285,6 +285,25 @@ matrix:
|
|||
- gfortran
|
||||
script:
|
||||
- travis_wait 45 make && make lapack-test
|
||||
env:
|
||||
- TARGET_BOX=NEOVERSE_N1
|
||||
|
||||
- &test-neon1-gcc8
|
||||
os: linux
|
||||
arch: arm64
|
||||
dist: focal
|
||||
group: edge
|
||||
virt: lxd
|
||||
compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gcc-8
|
||||
- gfortran-8
|
||||
script:
|
||||
- travis_wait 45 make QUIET_MAKE=1 CC=gcc-8 FC=gfortran-8 DYNAMIC_ARCH=1
|
||||
env:
|
||||
- TARGET_BOX=NEOVERSE_N1-GCC8
|
||||
|
||||
# whitelist
|
||||
branches:
|
||||
|
|
|
@ -8,7 +8,7 @@ project(OpenBLAS C ASM)
|
|||
|
||||
set(OpenBLAS_MAJOR_VERSION 0)
|
||||
set(OpenBLAS_MINOR_VERSION 3)
|
||||
set(OpenBLAS_PATCH_VERSION 21)
|
||||
set(OpenBLAS_PATCH_VERSION 22)
|
||||
|
||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||
|
||||
|
@ -36,6 +36,8 @@ option(USE_LOCKING "Use locks even in single-threaded builds to make them callab
|
|||
|
||||
option(USE_PERL "Use the older PERL scripts for build preparation instead of universal shell scripts" OFF)
|
||||
|
||||
option(NO_WARMUP "Do not run a benchmark on each startup just to find the best location for the memory buffer" ON)
|
||||
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
|
||||
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON)
|
||||
else()
|
||||
|
@ -212,10 +214,10 @@ if(NOT NO_LAPACKE)
|
|||
add_library(LAPACKE OBJECT ${LAPACKE_SOURCES})
|
||||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACKE>")
|
||||
endif()
|
||||
if(BUILD_RELAPACK)
|
||||
add_library(RELAPACK OBJECT ${RELA_SOURCES})
|
||||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:RELAPACK>")
|
||||
endif()
|
||||
#if(BUILD_RELAPACK)
|
||||
# add_library(RELAPACK OBJECT ${RELA_SOURCES})
|
||||
# list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:RELAPACK>")
|
||||
#endif()
|
||||
set(OpenBLAS_LIBS "")
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(${OpenBLAS_LIBNAME}_static STATIC ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
||||
|
@ -236,7 +238,7 @@ endif()
|
|||
set_target_properties(${OpenBLAS_LIBS} PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
|
||||
|
||||
# Android needs to explicitly link against libm
|
||||
if(ANDROID)
|
||||
if (${CMAKE_SYSTEM_NAME} MATCHES "AIX|Android|Linux|FreeBSD|OpenBSD|NetBSD|DragonFly|Darwin")
|
||||
if(BUILD_STATIC_LIBS)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_static m)
|
||||
endif()
|
||||
|
@ -396,7 +398,7 @@ if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "")
|
|||
message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED USE_PERL)
|
||||
if (NOT USE_PERL)
|
||||
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
|
||||
COMMAND ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" "${BUILD_BFLOAT16}" "${BUILD_SINGLE}" "${BUILD_DOUBLE}" "${BUILD_COMPLEX}" "${BUILD_COMPLEX16}" > ${PROJECT_BINARY_DIR}/objcopy.def
|
||||
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
|
||||
|
|
|
@ -211,4 +211,5 @@ In chronological order:
|
|||
* PLCT Lab, Institute of Software Chinese Academy of Sciences
|
||||
* [2022-03] Support RISC-V Vector Intrinisc 1.0 version.
|
||||
|
||||
|
||||
* Pablo Romero <https://github.com/pablorcum>
|
||||
* [2022-08] Fix building from sources for QNX
|
|
@ -80,7 +80,7 @@
|
|||
SUN
|
||||
Fujitsu
|
||||
|
||||
4. Suported precision
|
||||
4. Supported precision
|
||||
|
||||
Now x86/x86_64 version support 80bit FP precision in addition to
|
||||
normal double presicion and single precision. Currently only
|
||||
|
|
15
Makefile
15
Makefile
|
@ -110,6 +110,10 @@ ifeq ($(OSNAME), Darwin)
|
|||
@echo "\"make PREFIX=/your_installation_path/ install\"."
|
||||
@echo
|
||||
@echo "(or set PREFIX in Makefile.rule and run make install."
|
||||
@echo
|
||||
@echo "Note that any flags passed to make during build should also be passed to make install"
|
||||
@echo "to circumvent any install errors."
|
||||
@echo
|
||||
@echo "If you want to move the .dylib to a new location later, make sure you change"
|
||||
@echo "the internal name of the dylib with:"
|
||||
@echo
|
||||
|
@ -118,8 +122,11 @@ endif
|
|||
@echo
|
||||
@echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"."
|
||||
@echo
|
||||
@echo "Note that any flags passed to make during build should also be passed to make install"
|
||||
@echo "to circumvent any install errors."
|
||||
@echo
|
||||
|
||||
shared :
|
||||
shared : libs netlib $(RELA)
|
||||
ifneq ($(NO_SHARED), 1)
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
|
||||
@$(MAKE) -C exports so
|
||||
|
@ -143,7 +150,7 @@ ifeq ($(OSNAME), CYGWIN_NT)
|
|||
endif
|
||||
endif
|
||||
|
||||
tests :
|
||||
tests : libs netlib $(RELA) shared
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
touch $(LIBNAME)
|
||||
ifndef NO_FBLAS
|
||||
|
@ -271,7 +278,11 @@ prof_lapack : lapack_prebuild
|
|||
lapack_prebuild :
|
||||
ifeq ($(NO_LAPACK), $(filter 0,$(NO_LAPACK)))
|
||||
-@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
-@echo "override FFLAGS = $(LAPACK_FFLAGS) -fno-tree-vectorize" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "override FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
-@echo "FFLAGS_DRV = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,42 +1,24 @@
|
|||
CPP = $(CC) -E
|
||||
RANLIB = ranlib
|
||||
|
||||
ifeq ($(LIBSUBARCH), EV4)
|
||||
LIBNAME = $(LIBPREFIX)_ev4.a
|
||||
LIBNAME_P = $(LIBPREFIX)_ev4_p.a
|
||||
endif
|
||||
|
||||
ifeq ($(LIBSUBARCH), EV5)
|
||||
LIBNAME = $(LIBPREFIX)_ev5.a
|
||||
LIBNAME_P = $(LIBPREFIX)_ev5_p.a
|
||||
endif
|
||||
|
||||
ifeq ($(LIBSUBARCH), EV6)
|
||||
LIBNAME = $(LIBPREFIX)_ev6.a
|
||||
LIBNAME_P = $(LIBPREFIX)_ev6_p.a
|
||||
endif
|
||||
|
||||
ifneq ($(COMPILER), NATIVE)
|
||||
# GCC User
|
||||
ifeq ($(LIBSUBARCH), EV4)
|
||||
OPTION += -DEV4 -mcpu=ev4
|
||||
ifeq ($(CORE), EV4)
|
||||
CCOMMON_OPT += -mcpu=ev4
|
||||
endif
|
||||
ifeq ($(LIBSUBARCH), EV5)
|
||||
OPTION += -DEV5 -mcpu=ev5
|
||||
ifeq ($(CORE), EV5)
|
||||
CCOMMON_OPT += -mcpu=ev5
|
||||
endif
|
||||
ifeq ($(LIBSUBARCH), EV6)
|
||||
OPTION += -DEV6 -mcpu=ev6
|
||||
ifeq ($(CORE), EV6)
|
||||
CCOMMON_OPT += -mcpu=ev6
|
||||
endif
|
||||
else
|
||||
# Compaq Compiler User
|
||||
ifeq ($(LIBSUBARCH), EV4)
|
||||
OPTION += -DEV4 -tune ev4 -arch ev4
|
||||
ifeq ($(CORE), EV4)
|
||||
CCOMMON_OPT += -tune ev4 -arch ev4
|
||||
endif
|
||||
ifeq ($(LIBSUBARCH), EV5)
|
||||
OPTION += -DEV5 -tune ev5 -arch ev5
|
||||
ifeq ($(CORE), EV5)
|
||||
CCOMMON_OPT += -tune ev5 -arch ev5
|
||||
endif
|
||||
ifeq ($(LIBSUBARCH), EV6)
|
||||
OPTION += -DEV6 -tune ev6 -arch ev6
|
||||
ifeq ($(CORE), EV6)
|
||||
CCOMMON_OPT += -tune ev6 -arch ev6
|
||||
endif
|
||||
endif
|
||||
|
||||
|
|
|
@ -89,17 +89,17 @@ endif
|
|||
endif
|
||||
|
||||
# Use a72 tunings because Neoverse-V1 is only available
|
||||
# in GCC>=9.4
|
||||
# in GCC>=10.4
|
||||
ifeq ($(CORE), NEOVERSEV1)
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
|
||||
ifeq ($(GCCVERSIONGTEQ9), 1)
|
||||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ10)))
|
||||
CCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1
|
||||
ifeq ($(GCCVERSIONGTEQ10), 1)
|
||||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11)))
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve -mtune=neoverse-v1
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.4-a -mtune=native
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve -mtune=native
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a -mtune=native
|
||||
endif
|
||||
|
@ -119,17 +119,21 @@ endif
|
|||
endif
|
||||
|
||||
# Use a72 tunings because Neoverse-N2 is only available
|
||||
# in GCC>=9.4
|
||||
# in GCC>=10.4
|
||||
ifeq ($(CORE), NEOVERSEN2)
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
|
||||
ifeq ($(GCCVERSIONGTEQ9), 1)
|
||||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ10)))
|
||||
ifeq ($(GCCVERSIONGTEQ10), 1)
|
||||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11)))
|
||||
ifneq ($(OSNAME), Darwin)
|
||||
CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||
endif
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.5-a -mtune=native
|
||||
CCOMMON_OPT += -march=armv8.5-a+sve -mtune=native
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.5-a -mtune=native
|
||||
endif
|
||||
|
|
|
@ -14,6 +14,11 @@ OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
|||
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
|
||||
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
|
||||
PKG_EXTRALIB := $(EXTRALIB)
|
||||
ifeq ($(INTERFACE64),1)
|
||||
SUFFIX64=64
|
||||
endif
|
||||
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc"
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifeq ($(C_COMPILER), PGI)
|
||||
PKG_EXTRALIB += -lomp
|
||||
|
@ -150,13 +155,19 @@ endif
|
|||
endif
|
||||
|
||||
#Generating openblas.pc
|
||||
@echo Generating $(LIBSONAMEBASE).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
||||
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
|
||||
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
|
||||
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
|
||||
@echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
|
||||
@echo 'extralib='$(PKG_EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
|
||||
@cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
|
||||
ifeq ($(INTERFACE64),1)
|
||||
SUFFIX64=64
|
||||
endif
|
||||
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc"
|
||||
|
||||
@echo Generating $(LIBSONAMEBASE)$(SUFFIX64).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
||||
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(PKGFILE)"
|
||||
@echo 'libsuffix='$(SYMBOLSUFFIX) >> "$(PKGFILE)"
|
||||
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(PKGFILE)"
|
||||
@echo 'openblas_config= USE_64BITINT='$(INTERFACE64) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(PKGFILE)"
|
||||
@echo 'version='$(VERSION) >> "$(PKGFILE)"
|
||||
@echo 'extralib='$(PKG_EXTRALIB) >> "$(PKGFILE)"
|
||||
@cat openblas.pc.in >> "$(PKGFILE)"
|
||||
|
||||
|
||||
#Generating OpenBLASConfig.cmake
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
MSA_FLAGS = -mmsa -mfp64 -mload-store-pairs
|
||||
ifdef BINARY64
|
||||
else
|
||||
endif
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
MSA_FLAGS = -mmsa -mfp64 -mload-store-pairs
|
||||
ifdef BINARY64
|
||||
else
|
||||
endif
|
||||
|
|
|
@ -60,9 +60,9 @@ all: getarch_2nd
|
|||
./getarch_2nd 1 >> $(TARGET_CONF)
|
||||
|
||||
$(TARGET_CONF): c_check$(SCRIPTSUFFIX) f_check$(SCRIPTSUFFIX) getarch
|
||||
./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) $(CFLAGS)
|
||||
./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(CC)" $(TARGET_FLAGS) $(CFLAGS)
|
||||
ifneq ($(ONLY_CBLAS), 1)
|
||||
./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS)
|
||||
./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(FC)" $(TARGET_FLAGS)
|
||||
else
|
||||
#When we only build CBLAS, we set NOFORTRAN=2
|
||||
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
||||
|
@ -77,8 +77,8 @@ endif
|
|||
|
||||
|
||||
getarch : getarch.c cpuid.S dummy $(CPUIDEMU)
|
||||
avx512=$$(./c_check$(SCRIPTSUFFIX) - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \
|
||||
rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_RV64GV); \
|
||||
avx512=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \
|
||||
rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | grep NO_RV64GV); \
|
||||
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} $${rv64gv:+-D$${rv64gv}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
|
||||
|
||||
getarch_2nd : getarch_2nd.c $(TARGET_CONF) dummy
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.3.21
|
||||
VERSION = 0.3.22
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
|
@ -131,6 +131,9 @@ BUILD_LAPACK_DEPRECATED = 1
|
|||
|
||||
# Build RecursiveLAPACK on top of LAPACK
|
||||
# BUILD_RELAPACK = 1
|
||||
# Have RecursiveLAPACK actually replace standard LAPACK routines instead of
|
||||
# just adding its equivalents with a RELAPACK_ prefix
|
||||
# RELAPACK_REPLACE = 1
|
||||
|
||||
# If you want to use the legacy threaded Level 3 implementation.
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
|
@ -207,7 +210,7 @@ NO_AFFINITY = 1
|
|||
# to the user space. If bigphysarea is enabled, it will use it.
|
||||
# DEVICEDRIVER_ALLOCATION = 1
|
||||
|
||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only).
|
||||
# CONSISTENT_FPCSR = 1
|
||||
|
||||
# If any gemm argument m, n or k is less or equal this threshold, gemm will be execute
|
||||
|
|
|
@ -9,6 +9,10 @@ ifndef TOPDIR
|
|||
TOPDIR = .
|
||||
endif
|
||||
|
||||
ifndef RELAPACK_REPLACE
|
||||
RELAPACK_REPLACE=0
|
||||
endif
|
||||
|
||||
# we need to use the host system's architecture for getarch compile options even especially when cross-compiling
|
||||
HOSTARCH := $(shell uname -m)
|
||||
ifeq ($(HOSTARCH), amd64)
|
||||
|
@ -280,8 +284,10 @@ GETARCH_FLAGS += -DHAVE_GAS=$(HAVE_GAS)
|
|||
# Generating Makefile.conf and config.h
|
||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" HOST_CFLAGS="$(GETARCH_FLAGS)" CFLAGS="$(CFLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) DYNAMIC_ARCH=$(DYNAMIC_ARCH) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
|
||||
|
||||
endif
|
||||
|
||||
ifndef TARGET_CORE
|
||||
include $(TOPDIR)/Makefile.conf
|
||||
-include $(TOPDIR)/Makefile.conf
|
||||
else
|
||||
HAVE_NEON=
|
||||
HAVE_VFP=
|
||||
|
@ -302,7 +308,6 @@ HAVE_FMA3=
|
|||
include $(TOPDIR)/Makefile_kernel.conf
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
ifndef NUM_PARALLEL
|
||||
NUM_PARALLEL = 1
|
||||
|
@ -415,7 +420,7 @@ ifeq ($(OSNAME), AIX)
|
|||
EXTRALIB += -lm
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
|
||||
ifeq ($(ARCH), $(filter $(ARCH),arm arm64))
|
||||
EXTRALIB += -lm
|
||||
endif
|
||||
|
@ -660,8 +665,10 @@ DYNAMIC_CORE += CORTEXA57
|
|||
DYNAMIC_CORE += CORTEXA72
|
||||
DYNAMIC_CORE += CORTEXA73
|
||||
DYNAMIC_CORE += NEOVERSEN1
|
||||
ifneq ($(NO_SVE), 1)
|
||||
DYNAMIC_CORE += NEOVERSEV1
|
||||
DYNAMIC_CORE += NEOVERSEN2
|
||||
endif
|
||||
DYNAMIC_CORE += CORTEXA55
|
||||
DYNAMIC_CORE += FALKOR
|
||||
DYNAMIC_CORE += THUNDERX
|
||||
|
@ -677,7 +684,12 @@ endif
|
|||
endif
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4
|
||||
DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4 MIPS64_GENERIC
|
||||
ifdef DYNAMIC_LIST
|
||||
override DYNAMIC_CORE = MIPS64_GENERIC $(DYNAMIC_LIST)
|
||||
XCCOMMON_OPT = -DDYNAMIC_LIST -DDYN_MIPS64_GENERIC
|
||||
XCCOMMON_OPT += $(foreach dcore,$(DYNAMIC_LIST),-DDYN_$(dcore))
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), loongarch64)
|
||||
|
@ -818,13 +830,32 @@ endif
|
|||
ifeq ($(ARCH), riscv64)
|
||||
NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
FCOMMON_OPT += -fdefault-integer-8
|
||||
endif
|
||||
ifeq ($(F_COMPILER), FLANG)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), loongarch64)
|
||||
NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
FCOMMON_OPT += -fdefault-integer-8
|
||||
endif
|
||||
ifeq ($(F_COMPILER), FLANG)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
#
|
||||
# C Compiler dependent settings
|
||||
|
@ -856,6 +887,11 @@ CCOMMON_OPT += -mabi=32
|
|||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
ifneq (, $(filter $(CORE), MIPS64_GENERIC))
|
||||
CCOMMON_OPT += -DNO_MSA
|
||||
FCOMMON_OPT += -DNO_MSA
|
||||
endif
|
||||
|
||||
ifneq (, $(filter $(CORE),LOONGSON3R3 LOONGSON3R4))
|
||||
CCOMMON_OPT += -march=loongson3a
|
||||
FCOMMON_OPT += -march=loongson3a
|
||||
|
@ -932,16 +968,19 @@ endif
|
|||
endif
|
||||
ifdef BINARY64
|
||||
ifeq ($(ARCH), x86_64)
|
||||
ifeq (,$(findstring tp,$(CFLAGS)))
|
||||
ifneq ($(NEWPGI2),1)
|
||||
CCOMMON_OPT += -tp p7-64
|
||||
else
|
||||
CCOMMON_OPT += -tp px
|
||||
endif
|
||||
endif
|
||||
ifneq ($(NEWPGI),1)
|
||||
CCOMMON_OPT += -D__MMX__ -Mnollvm
|
||||
endif
|
||||
else
|
||||
ifeq ($(ARCH), power)
|
||||
ifeq (,$(findstring tp,$(CFLAGS)))
|
||||
ifeq ($(CORE), POWER8)
|
||||
CCOMMON_OPT += -tp pwr8
|
||||
endif
|
||||
|
@ -950,14 +989,17 @@ CCOMMON_OPT += -tp pwr9
|
|||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
else
|
||||
ifneq ($(NEWPGI2),1)
|
||||
ifeq (,$(findstring tp,$(CFLAGS)))
|
||||
CCOMMON_OPT += -tp p7
|
||||
else
|
||||
CCOMMON_OPT += -tp px
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), PATHSCALE)
|
||||
ifdef BINARY64
|
||||
|
@ -1349,6 +1391,10 @@ ifeq ($(NO_AVX512), 1)
|
|||
CCOMMON_OPT += -DNO_AVX512
|
||||
endif
|
||||
|
||||
ifeq ($(NO_SVE), 1)
|
||||
CCOMMON_OPT += -DNO_SVE
|
||||
endif
|
||||
|
||||
ifdef SMP
|
||||
CCOMMON_OPT += -DSMP_SERVER
|
||||
|
||||
|
|
|
@ -130,6 +130,28 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ZEN)
|
||||
ifdef HAVE_AVX512VL
|
||||
ifndef NO_AVX512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifdef HAVE_AVX2
|
||||
ifndef NO_AVX2
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
|
@ -143,6 +165,7 @@ ifeq ($(C_COMPILER), CLANG)
|
|||
CCOMMON_OPT += -mavx2
|
||||
endif
|
||||
endif
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
# AVX2 support was added in 4.7.0
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
|
@ -159,6 +182,7 @@ endif
|
|||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
# Security Policy
|
||||
|
||||
## Supported Versions
|
||||
|
||||
It is generally recommended to use the latest release as this project
|
||||
does not maintain multiple stable branches and providing packages e.g.
|
||||
for Linux distributions is outside our scope. In particular, versions
|
||||
before 0.3.18 can be assumed to carry the out-of-bounds-read error in
|
||||
the LAPACK ?LARRV family of functions that was the subject of
|
||||
CVE-2021-4048
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
If you suspect that you have found a vulnerability - a defect that could
|
||||
be abused to compromise the security of a user's code or systems - please
|
||||
do not use the normal github issue tracker (except perhaps to post a general
|
||||
warning if you deem that necessary). Instead, please contact the project
|
||||
maintainers through the email addresses given in their github user profiles.
|
||||
Defects found in the "lapack-netlib" subtree should ideally be reported to
|
||||
the maintainers of the reference implementation of LAPACK, lapack@icl.itk.edu
|
|
@ -65,6 +65,7 @@ MIPS1004K
|
|||
MIPS24K
|
||||
|
||||
4.MIPS64 CPU:
|
||||
MIPS64_GENERIC
|
||||
SICORTEX
|
||||
LOONGSON3A
|
||||
LOONGSON3B
|
||||
|
@ -128,3 +129,7 @@ LOONGSON2K1000
|
|||
12. Elbrus E2000:
|
||||
E2K
|
||||
|
||||
13. Alpha
|
||||
EV4
|
||||
EV5
|
||||
EV6
|
||||
|
|
|
@ -141,7 +141,7 @@ jobs:
|
|||
|
||||
- job: OSX_OpenMP
|
||||
pool:
|
||||
vmImage: 'macOS-10.15'
|
||||
vmImage: 'macOS-11'
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
|
@ -151,15 +151,23 @@ jobs:
|
|||
|
||||
- job: OSX_GCC_Nothreads
|
||||
pool:
|
||||
vmImage: 'macOS-10.15'
|
||||
vmImage: 'macOS-11'
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
make USE_THREADS=0 CC=gcc-10 FC=gfortran-10
|
||||
|
||||
- job: OSX_GCC12
|
||||
pool:
|
||||
vmImage: 'macOS-latest'
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
make CC=gcc-12 FC=gfortran-12
|
||||
|
||||
- job: OSX_OpenMP_Clang
|
||||
pool:
|
||||
vmImage: 'macOS-10.15'
|
||||
vmImage: 'macOS-11'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
|
@ -172,7 +180,7 @@ jobs:
|
|||
|
||||
- job: OSX_OpenMP_Clang_cmake
|
||||
pool:
|
||||
vmImage: 'macOS-10.15'
|
||||
vmImage: 'macOS-11'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
|
@ -188,7 +196,7 @@ jobs:
|
|||
|
||||
- job: OSX_dynarch_cmake
|
||||
pool:
|
||||
vmImage: 'macOS-10.15'
|
||||
vmImage: 'macOS-11'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
|
@ -196,13 +204,13 @@ jobs:
|
|||
- script: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_Fortran_COMPILER=gfortran-10 -DBUILD_SHARED_LIBS=ON ..
|
||||
cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DDYNAMIC_LIST='NEHALEM HASWELL SKYLAKEX' -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_Fortran_COMPILER=gfortran-10 -DBUILD_SHARED_LIBS=ON ..
|
||||
cmake --build .
|
||||
ctest
|
||||
|
||||
- job: OSX_Ifort_Clang
|
||||
pool:
|
||||
vmImage: 'macOS-10.15'
|
||||
vmImage: 'macOS-11'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/17643/m_HPCKit_p_2021.2.0.2903_offline.dmg
|
||||
|
@ -235,7 +243,7 @@ jobs:
|
|||
|
||||
- job: OSX_NDK_ARMV7
|
||||
pool:
|
||||
vmImage: 'macOS-10.15'
|
||||
vmImage: 'macOS-11'
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
|
@ -255,7 +263,7 @@ jobs:
|
|||
|
||||
- job: OSX_IOS_ARMV7
|
||||
pool:
|
||||
vmImage: 'macOS-10.15'
|
||||
vmImage: 'macOS-11'
|
||||
variables:
|
||||
CC: /Applications/Xcode_12.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||
CFLAGS: -O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode_12.4.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS14.4.sdk -arch armv7 -miphoneos-version-min=5.1
|
||||
|
|
|
@ -74,6 +74,24 @@ static void *huge_malloc(BLASLONG size){
|
|||
|
||||
#endif
|
||||
|
||||
/* Benchmarks should allocate with cacheline (often 64 bytes) alignment
|
||||
to avoid unreliable results. This technique, storing the allocated
|
||||
pointer value just before the aligned memory, doesn't require
|
||||
C11's aligned_alloc for compatibility with older compilers. */
|
||||
static void *aligned_alloc_cacheline(size_t n)
|
||||
{
|
||||
void *p = malloc((size_t)(void *) + n + L1_DATA_LINESIZE - 1);
|
||||
if (p) {
|
||||
void **newp = (void **)
|
||||
(((uintptr_t)p + L1_DATA_LINESIZE) & (uintptr_t)-L1_DATA_LINESIZE);
|
||||
newp[-1] = p;
|
||||
p = newp;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
#define malloc aligned_alloc_cacheline
|
||||
#define free(p) free((p) ? ((void **)(p))[-1] : (p))
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||
struct timeval start, stop;
|
||||
#elif defined(__APPLE__)
|
||||
|
|
|
@ -45,7 +45,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT *x;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
|
@ -74,10 +74,6 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
@ -91,30 +87,20 @@ int main(int argc, char *argv[]){
|
|||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
SCAL (&m, alpha, x, &inc_x);
|
||||
|
||||
}
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
timeg = time1 / loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 6. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
|
32
c_check
32
c_check
|
@ -31,8 +31,8 @@ flags="$*"
|
|||
|
||||
cross_suffix=""
|
||||
|
||||
if [ "`dirname $compiler_name`" != '.' ]; then
|
||||
cross_suffix="$cross_suffix`dirname $compiler_name`/"
|
||||
if [ "`dirname \"$compiler_name\"`" != '.' ]; then
|
||||
cross_suffix="$cross_suffix`dirname \"$compiler_name\"`/"
|
||||
fi
|
||||
|
||||
bn=`basename $compiler_name`
|
||||
|
@ -162,7 +162,7 @@ fi
|
|||
exit 1
|
||||
}
|
||||
|
||||
have_msa=0
|
||||
no_msa=0
|
||||
if [ "$architecture" = "mips" ] || [ "$architecture" = "mips64" ]; then
|
||||
tmpd="$(mktemp -d)"
|
||||
tmpf="$tmpd/a.c"
|
||||
|
@ -172,11 +172,10 @@ if [ "$architecture" = "mips" ] || [ "$architecture" = "mips64" ]; then
|
|||
printf "void main(void){ __asm__ volatile(%s); }\n" "$code" >> "$tmpf"
|
||||
|
||||
args="$msa_flags -o $tmpf.o $tmpf"
|
||||
have_msa=1
|
||||
{
|
||||
$compiler_name $flags $args >/dev/null 2>&1
|
||||
} || {
|
||||
have_msa=0
|
||||
no_msa=1
|
||||
}
|
||||
|
||||
rm -rf "$tmpd"
|
||||
|
@ -240,6 +239,21 @@ if [ "$architecture" = "riscv64" ]; then
|
|||
rm -rf "$tmpd"
|
||||
fi
|
||||
|
||||
no_sve=0
|
||||
if [ "$architecture" = "arm64" ]; then
|
||||
tmpd=`mktemp -d`
|
||||
tmpf="$tmpd/a.c"
|
||||
printf "#include <arm_sve.h>\n\n int main(void){}\n">> "$tmpf"
|
||||
args=" -march=armv8-a+sve -c -o $tmpf.o $tmpf"
|
||||
no_sve=0
|
||||
{
|
||||
$compiler_name $flags $args >/dev/null 2>&1
|
||||
} || {
|
||||
no_sve=1
|
||||
}
|
||||
rm -rf "$tmpd"
|
||||
fi
|
||||
|
||||
c11_atomics=0
|
||||
case "$data" in
|
||||
*HAVE_C11*)
|
||||
|
@ -375,10 +389,8 @@ done
|
|||
printf "CROSS_SUFFIX=%s\n" "$cross_suffix"
|
||||
[ "$cross" -ne 0 ] && printf "CROSS=1\n"
|
||||
printf "CEXTRALIB=%s %s %s\n" "$linker_L" "$linker_l" "$linker_a"
|
||||
[ "$have_msa" -eq 1 ] && {
|
||||
printf "HAVE_MSA=1\n"
|
||||
printf "MSA_FLAGS=%s\n" "$msa_flags"
|
||||
}
|
||||
[ "$no_msa" -eq 1 ] && printf "NO_MSA=1\n"
|
||||
[ "$no_sve" -eq 1 ] && printf "NO_SVE=1\n"
|
||||
[ "$no_rv64gv" -eq 1 ] && printf "NO_RV64GV=1\n"
|
||||
[ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n"
|
||||
[ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n"
|
||||
|
@ -396,7 +408,7 @@ compiler=`echo "$compiler" | tr '[[:lower:]]' '[[:upper:]]' `
|
|||
[ "$binformat" = "bin32" ] && printf "#define __32BIT__\t1\n"
|
||||
[ "$binformat" = "bin64" ] && printf "#define __64BIT__\t1\n"
|
||||
[ -n "$need_fu" ] && printf "#define FUNDERSCORE\t%s\n" "$need_fu"
|
||||
[ "$have_msa" -eq 1 ] && printf "#define HAVE_MSA\t1\n"
|
||||
[ "$no_msa" -eq 1 ] && printf "#define NO_MSA\t1\n"
|
||||
[ "$c11_atomics" -eq 1 ] && printf "#define HAVE_C11\t1\n"
|
||||
} >> "$config"
|
||||
|
||||
|
|
|
@ -44,7 +44,10 @@ endif ()
|
|||
|
||||
if (DYNAMIC_ARCH)
|
||||
if (ARM64)
|
||||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA55 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 NEOVERSEV1 NEOVERSEN2 THUNDERX3T110)
|
||||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA55 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
|
||||
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 9.99)
|
||||
set(DYNAMIC_CORE "${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2")
|
||||
endif ()
|
||||
if (DYNAMIC_LIST)
|
||||
set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST})
|
||||
endif ()
|
||||
|
@ -132,3 +135,8 @@ if (ARM64)
|
|||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "riscv64")
|
||||
set(NO_BINARY_MODE 1)
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
|
|
|
@ -144,6 +144,21 @@ if (${CORE} STREQUAL SAPPHIRERAPIDS)
|
|||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL ZEN)
|
||||
if (HAVE_AVX512VL)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
if (NOT NO_AVX512)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 13.0 OR ${GCC_VERSION} VERSION_EQUAL 13.0)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=znver4")
|
||||
else ()
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL A64FX)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
|
@ -155,6 +170,39 @@ if (${CORE} STREQUAL A64FX)
|
|||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL NEOVERSEN2)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
|
||||
else ()
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
|
||||
endif()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL NEOVERSEV1)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1")
|
||||
else ()
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
|
||||
endif()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL NEOVERSEN1)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 9.4 OR ${GCC_VERSION} VERSION_EQUAL 9.4)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve -mtune=neoverse-n1")
|
||||
else ()
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
|
||||
endif()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL ARMV8SVE)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
||||
|
|
|
@ -46,7 +46,7 @@ if (${F_COMPILER} STREQUAL "GFORTRAN")
|
|||
set(FCOMMON_OPT "${FCOMMON_OPT} -fno-optimize-sibling-calls")
|
||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||
if (NOT NO_LAPACK)
|
||||
set(EXTRALIB "{EXTRALIB} -lgfortran")
|
||||
set(EXTRALIB "${EXTRALIB} -lgfortran")
|
||||
endif ()
|
||||
if (NO_BINARY_MODE)
|
||||
if (MIPS64)
|
||||
|
|
|
@ -22,7 +22,7 @@ set(SCLAUX
|
|||
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
|
||||
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
|
||||
slasr.f slasrt.f slassq.f90 slasv2.f spttrf.f sstebz.f sstedc.f
|
||||
ssteqr.f ssterf.f slaisnan.f sisnan.f
|
||||
ssteqr.f ssterf.f slaisnan.f sisnan.f slarmm.f
|
||||
slartgp.f slartgs.f ../INSTALL/sroundup_lwork.f
|
||||
../INSTALL/second_${TIMER}.f)
|
||||
|
||||
|
@ -42,7 +42,7 @@ set(DZLAUX
|
|||
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
|
||||
dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
|
||||
dlasr.f dlasrt.f dlassq.f90 dlasv2.f dpttrf.f dstebz.f dstedc.f
|
||||
dsteqr.f dsterf.f dlaisnan.f disnan.f
|
||||
dsteqr.f dsterf.f dlaisnan.f disnan.f dlarmm.f
|
||||
dlartgp.f dlartgs.f ../INSTALL/droundup_lwork.f
|
||||
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f)
|
||||
|
||||
|
@ -123,7 +123,8 @@ set(SLASRC
|
|||
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
|
||||
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
|
||||
sgesvdq.f slaorhr_col_getrfnp.f
|
||||
slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f )
|
||||
slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f
|
||||
slatrs3.f strsyl3.f sgelst.f)
|
||||
|
||||
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
|
||||
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
|
||||
|
@ -221,7 +222,8 @@ set(CLASRC
|
|||
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
|
||||
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
|
||||
cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f
|
||||
cungtsqr.f cungtsqr_row.f cunhr_col.f )
|
||||
cungtsqr.f cungtsqr_row.f cunhr_col.f
|
||||
clatrs3.f ctrsyl3.f cgelst.f)
|
||||
|
||||
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
|
||||
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
|
||||
|
@ -313,7 +315,8 @@ set(DLASRC
|
|||
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
|
||||
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
|
||||
dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
|
||||
dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f )
|
||||
dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f
|
||||
dlatrs3.f dtrsyl3.f dgelst.f)
|
||||
|
||||
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
|
||||
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
|
||||
|
@ -415,7 +418,8 @@ set(ZLASRC
|
|||
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
|
||||
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
|
||||
zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
|
||||
zungtsqr.f zungtsqr_row.f zunhr_col.f)
|
||||
zungtsqr.f zungtsqr_row.f zunhr_col.f
|
||||
zlatrs3.f ztrsyl3.f zgelst.f)
|
||||
|
||||
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
|
||||
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f
|
||||
|
@ -519,7 +523,7 @@ set(SCLAUX
|
|||
slaset.c slasq1.c slasq2.c slasq3.c slasq4.c slasq5.c slasq6.c
|
||||
slasr.c slasrt.c slassq.c slasv2.c spttrf.c sstebz.c sstedc.c
|
||||
ssteqr.c ssterf.c slaisnan.c sisnan.c
|
||||
slartgp.c slartgs.c
|
||||
slartgp.c slartgs.c slarmm.c
|
||||
../INSTALL/second_${TIMER}.c)
|
||||
|
||||
set(DZLAUX
|
||||
|
@ -538,7 +542,7 @@ set(DZLAUX
|
|||
dlasq1.c dlasq2.c dlasq3.c dlasq4.c dlasq5.c dlasq6.c
|
||||
dlasr.c dlasrt.c dlassq.c dlasv2.c dpttrf.c dstebz.c dstedc.c
|
||||
dsteqr.c dsterf.c dlaisnan.c disnan.c
|
||||
dlartgp.c dlartgs.c
|
||||
dlartgp.c dlartgs.c dlarmm.c
|
||||
../INSTALL/dlamch.c ../INSTALL/dsecnd_${TIMER}.c)
|
||||
|
||||
set(SLASRC
|
||||
|
@ -617,7 +621,8 @@ set(SLASRC
|
|||
ssyevd_2stage.c ssyev_2stage.c ssyevx_2stage.c ssyevr_2stage.c
|
||||
ssbev_2stage.c ssbevx_2stage.c ssbevd_2stage.c ssygv_2stage.c
|
||||
sgesvdq.c slaorhr_col_getrfnp.c
|
||||
slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c )
|
||||
slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c
|
||||
slatrs3.c strsyl3.c sgelst.c)
|
||||
|
||||
set(SXLASRC sgesvxx.c sgerfsx.c sla_gerfsx_extended.c sla_geamv.c
|
||||
sla_gercond.c sla_gerpvgrw.c ssysvxx.c ssyrfsx.c
|
||||
|
@ -714,7 +719,8 @@ set(CLASRC
|
|||
cheevd_2stage.c cheev_2stage.c cheevx_2stage.c cheevr_2stage.c
|
||||
chbev_2stage.c chbevx_2stage.c chbevd_2stage.c chegv_2stage.c
|
||||
cgesvdq.c claunhr_col_getrfnp.c claunhr_col_getrfnp2.c
|
||||
cungtsqr.c cungtsqr_row.c cunhr_col.c )
|
||||
cungtsqr.c cungtsqr_row.c cunhr_col.c
|
||||
clatrs3.c ctrsyl3.c cgelst.c)
|
||||
|
||||
set(CXLASRC cgesvxx.c cgerfsx.c cla_gerfsx_extended.c cla_geamv.c
|
||||
cla_gercond_c.c cla_gercond_x.c cla_gerpvgrw.c
|
||||
|
@ -805,7 +811,8 @@ set(DLASRC
|
|||
dsyevd_2stage.c dsyev_2stage.c dsyevx_2stage.c dsyevr_2stage.c
|
||||
dsbev_2stage.c dsbevx_2stage.c dsbevd_2stage.c dsygv_2stage.c
|
||||
dcombssq.c dgesvdq.c dlaorhr_col_getrfnp.c
|
||||
dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c )
|
||||
dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c
|
||||
dlatrs3.c dtrsyl3.c dgelst.c)
|
||||
|
||||
set(DXLASRC dgesvxx.c dgerfsx.c dla_gerfsx_extended.c dla_geamv.c
|
||||
dla_gercond.c dla_gerpvgrw.c dsysvxx.c dsyrfsx.c
|
||||
|
@ -906,7 +913,7 @@ set(ZLASRC
|
|||
zheevd_2stage.c zheev_2stage.c zheevx_2stage.c zheevr_2stage.c
|
||||
zhbev_2stage.c zhbevx_2stage.c zhbevd_2stage.c zhegv_2stage.c
|
||||
zgesvdq.c zlaunhr_col_getrfnp.c zlaunhr_col_getrfnp2.c
|
||||
zungtsqr.c zungtsqr_row.c zunhr_col.c)
|
||||
zungtsqr.c zungtsqr_row.c zunhr_col.c zlatrs3.c ztrsyl3.c zgelst.c)
|
||||
|
||||
set(ZXLASRC zgesvxx.c zgerfsx.c zla_gerfsx_extended.c zla_geamv.c
|
||||
zla_gercond_c.c zla_gercond_x.c zla_gerpvgrw.c zsysvxx.c zsyrfsx.c
|
||||
|
@ -999,6 +1006,9 @@ endforeach ()
|
|||
|
||||
if (NOT C_LAPACK)
|
||||
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}")
|
||||
if (${F_COMPILER} STREQUAL "GFORTRAN")
|
||||
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS} -fno-tree-vectorize")
|
||||
endif()
|
||||
else ()
|
||||
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}")
|
||||
endif ()
|
||||
|
|
|
@ -318,6 +318,8 @@ set(CSRC
|
|||
lapacke_clacn2.c
|
||||
lapacke_clag2z.c
|
||||
lapacke_clag2z_work.c
|
||||
lapacke_clangb.c
|
||||
lapacke_clangb_work.c
|
||||
lapacke_clange.c
|
||||
lapacke_clange_work.c
|
||||
lapacke_clanhe.c
|
||||
|
@ -803,6 +805,8 @@ set(DSRC
|
|||
lapacke_dlag2s_work.c
|
||||
lapacke_dlamch.c
|
||||
lapacke_dlamch_work.c
|
||||
lapacke_dlangb.c
|
||||
lapacke_dlangb_work.c
|
||||
lapacke_dlange.c
|
||||
lapacke_dlange_work.c
|
||||
lapacke_dlansy.c
|
||||
|
@ -1381,6 +1385,8 @@ set(SSRC
|
|||
lapacke_slag2d_work.c
|
||||
lapacke_slamch.c
|
||||
lapacke_slamch_work.c
|
||||
lapacke_slangb.c
|
||||
lapacke_slangb_work.c
|
||||
lapacke_slange.c
|
||||
lapacke_slange_work.c
|
||||
lapacke_slansy.c
|
||||
|
@ -2089,6 +2095,8 @@ set(ZSRC
|
|||
lapacke_zlacrm_work.c
|
||||
lapacke_zlag2c.c
|
||||
lapacke_zlag2c_work.c
|
||||
lapacke_zlangb.c
|
||||
lapacke_zlangb_work.c
|
||||
lapacke_zlange.c
|
||||
lapacke_zlange_work.c
|
||||
lapacke_zlanhe.c
|
||||
|
@ -2481,6 +2489,8 @@ set(Utils_SRC
|
|||
lapacke_ctp_nancheck.c lapacke_dtr_trans.c lapacke_str_trans.c lapacke_ztp_trans.c
|
||||
lapacke_ctp_trans.c lapacke_lsame.c lapacke_xerbla.c lapacke_ztr_nancheck.c
|
||||
lapacke_ctr_nancheck.c lapacke_make_complex_double.c lapacke_z_nancheck.c lapacke_ztr_trans.c
|
||||
lapacke_ctz_nancheck.c lapacke_ctz_trans.c lapacke_dtz_nancheck.c lapacke_dtz_trans.c
|
||||
lapacke_stz_nancheck.c lapacke_stz_trans.c lapacke_ztz_nancheck.c lapacke_ztz_trans.c
|
||||
)
|
||||
|
||||
set(LAPACKE_REL_SRC "")
|
||||
|
|
|
@ -2,7 +2,7 @@ libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
|||
libsuffix=@SUFFIX64_UNDERSCORE@
|
||||
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||
|
||||
openblas_config=USE_64BITINT=@USE_64BITINT@ NO_CBLAS=@NO_CBLAS@ NO_LAPACK=@NO_LAPACK@ NO_LAPACKE=@NO_LAPACKE@ DYNAMIC_ARCH=@DYNAMIC_ARCH@ DYNAMIC_OLDER=@DYNAMIC_OLDER@ NO_AFFINITY=@NO_AFFINITY@ USE_OPENMP=@USE_OPENMP@ @CORE@ MAX_THREADS=@NUM_THREADS@
|
||||
openblas_config=USE_64BITINT=@INTERFACE64@ NO_CBLAS=@NO_CBLAS@ NO_LAPACK=@NO_LAPACK@ NO_LAPACKE=@NO_LAPACKE@ DYNAMIC_ARCH=@DYNAMIC_ARCH@ DYNAMIC_OLDER=@DYNAMIC_OLDER@ NO_AFFINITY=@NO_AFFINITY@ USE_OPENMP=@USE_OPENMP@ @CORE@ MAX_THREADS=@NUM_THREADS@
|
||||
Name: OpenBLAS
|
||||
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
|
||||
Version: @OPENBLAS_VERSION@
|
||||
|
|
|
@ -823,6 +823,41 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
|
|||
set(CGEMM3M_UNROLL_N 4)
|
||||
set(ZGEMM3M_UNROLL_M 4)
|
||||
set(ZGEMM3M_UNROLL_N 4)
|
||||
elseif ("${TCORE}" STREQUAL "ARMV5")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE\t65536\n"
|
||||
"#define L1_DATA_LINESIZE\t32\n"
|
||||
"#define L2_SIZE\t512488\n"
|
||||
"#define L2_LINESIZE\t32\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define L2_ASSOCIATIVE\t4\n")
|
||||
set(SGEMM_UNROLL_M 2)
|
||||
set(SGEMM_UNROLL_N 2)
|
||||
set(DGEMM_UNROLL_M 2)
|
||||
set(DGEMM_UNROLL_N 2)
|
||||
set(CGEMM_UNROLL_M 2)
|
||||
set(CGEMM_UNROLL_N 2)
|
||||
set(ZGEMM_UNROLL_M 2)
|
||||
set(ZGEMM_UNROLL_N 2)
|
||||
elseif ("${TCORE}" STREQUAL "ARMV6")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE\t65536\n"
|
||||
"#define L1_DATA_LINESIZE\t32\n"
|
||||
"#define L2_SIZE\t512488\n"
|
||||
"#define L2_LINESIZE\t32\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define L2_ASSOCIATIVE\t4\n"
|
||||
"#define HAVE_VFP\n")
|
||||
set(SGEMM_UNROLL_M 4)
|
||||
set(SGEMM_UNROLL_N 2)
|
||||
set(DGEMM_UNROLL_M 4)
|
||||
set(DGEMM_UNROLL_N 2)
|
||||
set(CGEMM_UNROLL_M 2)
|
||||
set(CGEMM_UNROLL_N 2)
|
||||
set(ZGEMM_UNROLL_M 2)
|
||||
set(ZGEMM_UNROLL_N 2)
|
||||
elseif ("${TCORE}" STREQUAL "ARMV7")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE\t65536\n"
|
||||
|
@ -886,7 +921,11 @@ else ()
|
|||
set(SGEMM_UNROLL_M 8)
|
||||
set(SGEMM_UNROLL_N 8)
|
||||
endif ()
|
||||
if ("${TCORE}" STREQUAL "CORTEXA53")
|
||||
set(DGEMM_UNROLL_M 4)
|
||||
else ()
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
endif ()
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
|
@ -1319,16 +1358,25 @@ else(NOT CMAKE_CROSSCOMPILING)
|
|||
|
||||
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
|
||||
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
file(MAKE_DIRECTORY ${GETARCH_DIR})
|
||||
configure_file(${TARGET_CONF_TEMP} ${GETARCH_DIR}/${TARGET_CONF} COPYONLY)
|
||||
file(MAKE_DIRECTORY "${GETARCH_DIR}")
|
||||
configure_file("${TARGET_CONF_TEMP}" "${GETARCH_DIR}/${TARGET_CONF}" COPYONLY)
|
||||
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
try_compile(GETARCH_RESULT ${GETARCH_DIR}
|
||||
if (CMAKE_ASM_COMPILER_ID STREQUAL "")
|
||||
try_compile(GETARCH_RESULT "${GETARCH_DIR}"
|
||||
SOURCES ${GETARCH_SRC}
|
||||
CMAKE_FLAGS "-DCMAKE_ASM_COMPILER=${CMAKE_C_COMPILER}"
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I"${GETARCH_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
|
||||
OUTPUT_VARIABLE GETARCH_LOG
|
||||
COPY_FILE "${PROJECT_BINARY_DIR}/${GETARCH_BIN}"
|
||||
)
|
||||
else()
|
||||
try_compile(GETARCH_RESULT "${GETARCH_DIR}"
|
||||
SOURCES ${GETARCH_SRC}
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I"${GETARCH_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
|
||||
OUTPUT_VARIABLE GETARCH_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
|
||||
COPY_FILE "${PROJECT_BINARY_DIR}/${GETARCH_BIN}"
|
||||
)
|
||||
|
||||
endif()
|
||||
if (NOT ${GETARCH_RESULT})
|
||||
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
|
||||
endif ()
|
||||
|
@ -1357,19 +1405,19 @@ execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 1 OUTPUT_VARIABLE
|
|||
message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
|
||||
|
||||
# append config data from getarch to the TARGET file and read in CMake vars
|
||||
file(APPEND ${TARGET_CONF_TEMP} ${GETARCH_CONF_OUT})
|
||||
file(APPEND "${TARGET_CONF_TEMP}" ${GETARCH_CONF_OUT})
|
||||
ParseGetArchVars(${GETARCH_MAKE_OUT})
|
||||
|
||||
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
|
||||
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
file(MAKE_DIRECTORY ${GETARCH2_DIR})
|
||||
configure_file(${TARGET_CONF_TEMP} ${GETARCH2_DIR}/${TARGET_CONF} COPYONLY)
|
||||
file(MAKE_DIRECTORY "${GETARCH2_DIR}")
|
||||
configure_file("${TARGET_CONF_TEMP}" "${GETARCH2_DIR}/${TARGET_CONF}" COPYONLY)
|
||||
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
|
||||
SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c
|
||||
try_compile(GETARCH2_RESULT "${GETARCH2_DIR}"
|
||||
SOURCES "${PROJECT_SOURCE_DIR}/getarch_2nd.c"
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I"${GETARCH2_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
|
||||
OUTPUT_VARIABLE GETARCH2_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
|
||||
COPY_FILE "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}"
|
||||
)
|
||||
|
||||
if (NOT ${GETARCH2_RESULT})
|
||||
|
@ -1382,9 +1430,9 @@ execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 0 OUTPUT_VARIABL
|
|||
execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
|
||||
|
||||
# append config data from getarch_2nd to the TARGET file and read in CMake vars
|
||||
file(APPEND ${TARGET_CONF_TEMP} ${GETARCH2_CONF_OUT})
|
||||
file(APPEND "${TARGET_CONF_TEMP}" ${GETARCH2_CONF_OUT})
|
||||
|
||||
configure_file(${TARGET_CONF_TEMP} ${TARGET_CONF_DIR}/${TARGET_CONF} COPYONLY)
|
||||
configure_file("${TARGET_CONF_TEMP}" "${TARGET_CONF_DIR}/${TARGET_CONF}" COPYONLY)
|
||||
|
||||
ParseGetArchVars(${GETARCH2_MAKE_OUT})
|
||||
|
||||
|
|
|
@ -172,9 +172,9 @@ if (DEFINED TARGET)
|
|||
endif()
|
||||
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang")
|
||||
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 8.99)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake -exhaustive-register-search")
|
||||
else()
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512 -exhaustive-register-search")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
@ -188,23 +188,45 @@ if (DEFINED TARGET)
|
|||
endif()
|
||||
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang")
|
||||
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 12.0)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=sapphirerapids")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=sapphirerapids -exhaustive-register-search")
|
||||
else()
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512 -exhaustive-register-search")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -exhaustive-register-search")
|
||||
endif()
|
||||
if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2)
|
||||
endif()
|
||||
|
||||
if (((${TARGET} STREQUAL ZEN) AND HAVE_AVX512VL) AND NOT NO_AVX512)
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 12.99)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=znver4")
|
||||
else()
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
endif()
|
||||
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang")
|
||||
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 15.99)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=znver4")
|
||||
else()
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
endif()
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -exhaustive-register-search")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if ((${TARGET} STREQUAL HASWELL OR (${TARGET} STREQUAL ZEN AND NOT HAVE_AVX512VL)) AND NOT NO_AVX2)
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
endif()
|
||||
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2 -mfma")
|
||||
endif()
|
||||
endif()
|
||||
if (DEFINED HAVE_AVX)
|
||||
|
|
8
common.h
8
common.h
|
@ -90,7 +90,7 @@ extern "C" {
|
|||
#endif
|
||||
#include <time.h>
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#if defined(OS_LINUX) || defined(OS_QNX)
|
||||
#include <malloc.h>
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
@ -107,7 +107,7 @@ extern "C" {
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef OS_HAIKU
|
||||
#if defined(OS_HAIKU) || defined(OS_QNX)
|
||||
#define NO_SYSV_IPC
|
||||
#endif
|
||||
|
||||
|
@ -387,6 +387,10 @@ typedef int blasint;
|
|||
#endif
|
||||
*/
|
||||
|
||||
#ifdef __EMSCRIPTEN__
|
||||
#define YIELDING
|
||||
#endif
|
||||
|
||||
#ifndef YIELDING
|
||||
#define YIELDING sched_yield()
|
||||
#endif
|
||||
|
|
|
@ -43,7 +43,7 @@
|
|||
|
||||
#define MB asm("mb")
|
||||
#define WMB asm("wmb")
|
||||
#define RMB asm("rmb")
|
||||
#define RMB asm("mb")
|
||||
|
||||
static void __inline blas_lock(unsigned long *address){
|
||||
#ifndef __DECC
|
||||
|
|
|
@ -2612,7 +2612,7 @@
|
|||
#ifndef ASSEMBLER
|
||||
#if !defined(DYNAMIC_ARCH) \
|
||||
&& (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \
|
||||
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K))
|
||||
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) || defined(ARCH_ALPHA))
|
||||
extern BLASLONG gemm_offset_a;
|
||||
extern BLASLONG gemm_offset_b;
|
||||
extern BLASLONG sbgemm_p;
|
||||
|
|
|
@ -86,7 +86,9 @@ static inline unsigned int rpcc(void){
|
|||
//__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
|
||||
//ret=tmp;
|
||||
__asm__ __volatile__(".set push \n"
|
||||
#if !defined(__mips_isa_rev) || __mips_isa_rev < 2
|
||||
".set mips32r2\n"
|
||||
#endif
|
||||
"rdhwr %0, $2\n"
|
||||
".set pop": "=r"(ret):: "memory");
|
||||
|
||||
|
@ -99,7 +101,9 @@ static inline unsigned int rpcc(void){
|
|||
static inline int WhereAmI(void){
|
||||
int ret=0;
|
||||
__asm__ __volatile__(".set push \n"
|
||||
#if !defined(__mips_isa_rev) || __mips_isa_rev < 2
|
||||
".set mips32r2\n"
|
||||
#endif
|
||||
"rdhwr %0, $0\n"
|
||||
".set pop": "=r"(ret):: "memory");
|
||||
return ret;
|
||||
|
@ -197,9 +201,15 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
|||
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
|
||||
#define ASSEMBLER_ARCH mips64r6
|
||||
#else
|
||||
#define ASSEMBLER_ARCH mips64
|
||||
#endif
|
||||
|
||||
#define PROLOGUE \
|
||||
.text ;\
|
||||
.set mips64 ;\
|
||||
.set ASSEMBLER_ARCH ;\
|
||||
.align 5 ;\
|
||||
.globl REALNAME ;\
|
||||
.ent REALNAME ;\
|
||||
|
|
156
common_param.h
156
common_param.h
|
@ -47,9 +47,10 @@ typedef struct {
|
|||
int dtb_entries;
|
||||
int offsetA, offsetB, align;
|
||||
|
||||
#ifdef BUILD_BFLOAT16
|
||||
#if BUILD_BFLOAT16 == 1
|
||||
int sbgemm_p, sbgemm_q, sbgemm_r;
|
||||
int sbgemm_unroll_m, sbgemm_unroll_n, sbgemm_unroll_mn;
|
||||
int sbgemm_align_k;
|
||||
|
||||
void (*sbstobf16_k) (BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG);
|
||||
void (*sbdtobf16_k) (BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG);
|
||||
|
@ -160,51 +161,59 @@ BLASLONG (*isbmin_k) (BLASLONG, float *, BLASLONG);
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
|
||||
#if (BUILD_SINGLE == 1) || (BUILD_DOUBLE == 1) || (BUILD_COMPLEX == 1) || (BUILD_COMPLEX16 == 1)
|
||||
int sgemm_p, sgemm_q, sgemm_r;
|
||||
int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn;
|
||||
#endif
|
||||
|
||||
int exclusive_cache;
|
||||
|
||||
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
|
||||
#if (BUILD_SINGLE == 1) || (BUILD_COMPLEX == 1)
|
||||
float (*samax_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*samin_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*smax_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*smin_k) (BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE ==1) || (BUILD_COMPLEX==1)
|
||||
BLASLONG (*isamax_k)(BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
|
||||
BLASLONG (*isamin_k)(BLASLONG, float *, BLASLONG);
|
||||
BLASLONG (*ismax_k) (BLASLONG, float *, BLASLONG);
|
||||
BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*snrm2_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*sasum_k) (BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#ifdef BUILD_SINGLE
|
||||
#if (BUILD_SINGLE==1)
|
||||
float (*ssum_k) (BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
|
||||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
|
||||
int (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
float (*sdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
//double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
|
||||
|
||||
#endif
|
||||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
|
||||
int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
|
||||
int (*sscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
|
||||
int (*sswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
#endif
|
||||
#ifdef BUILD_SINGLE
|
||||
#if (BUILD_SINGLE==1)
|
||||
int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
|
||||
int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
#endif
|
||||
|
||||
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
|
||||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
|
||||
#ifdef ARCH_X86_64
|
||||
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
|
||||
int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K);
|
||||
|
@ -219,7 +228,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
|||
int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
#endif
|
||||
#ifdef BUILD_SINGLE
|
||||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
|
||||
#ifdef SMALL_MATRIX_OPT
|
||||
int (*sgemm_small_matrix_permit)(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float beta);
|
||||
|
||||
|
@ -255,7 +264,8 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
|||
int (*strsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
int (*strsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
int (*strsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
|
||||
#endif
|
||||
#if (BUILD_SINGLE==1)
|
||||
int (*strmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
|
@ -287,12 +297,12 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
|||
int (*slaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
|
||||
#endif
|
||||
|
||||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
|
||||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
|
||||
int dgemm_p, dgemm_q, dgemm_r;
|
||||
int dgemm_unroll_m, dgemm_unroll_n, dgemm_unroll_mn;
|
||||
#endif
|
||||
|
||||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
|
||||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
|
||||
double (*damax_k) (BLASLONG, double *, BLASLONG);
|
||||
double (*damin_k) (BLASLONG, double *, BLASLONG);
|
||||
double (*dmax_k) (BLASLONG, double *, BLASLONG);
|
||||
|
@ -301,23 +311,21 @@ BLASLONG (*idamax_k)(BLASLONG, double *, BLASLONG);
|
|||
BLASLONG (*idamin_k)(BLASLONG, double *, BLASLONG);
|
||||
BLASLONG (*idmax_k) (BLASLONG, double *, BLASLONG);
|
||||
BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
|
||||
double (*dnrm2_k) (BLASLONG, double *, BLASLONG);
|
||||
double (*dasum_k) (BLASLONG, double *, BLASLONG);
|
||||
#endif
|
||||
#ifdef BUILD_DOUBLE
|
||||
#if (BUILD_DOUBLE==1)
|
||||
double (*dsum_k) (BLASLONG, double *, BLASLONG);
|
||||
#endif
|
||||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
|
||||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
|
||||
int (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
double (*ddot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
#endif
|
||||
#if defined (BUILD_SINGLE) || defined(BUILD_DOUBLE)
|
||||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
|
||||
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
|
||||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
|
||||
int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
|
||||
int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
|
@ -325,13 +333,13 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
|
|||
int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
#endif
|
||||
#ifdef BUILD_DOUBLE
|
||||
#if (BUILD_DOUBLE==1)
|
||||
int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
|
||||
int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
#endif
|
||||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
|
||||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
|
||||
int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
|
||||
int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
|
||||
|
@ -340,7 +348,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
|
|||
int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
#endif
|
||||
#ifdef BUILD_DOUBLE
|
||||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
|
||||
#ifdef SMALL_MATRIX_OPT
|
||||
int (*dgemm_small_matrix_permit)(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, double alpha, double beta);
|
||||
|
||||
|
@ -354,6 +362,8 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
|
|||
int (*dgemm_small_kernel_b0_tn )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
|
||||
int (*dgemm_small_kernel_b0_tt )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
|
||||
#endif
|
||||
#endif
|
||||
#if (BUILD_DOUBLE==1)
|
||||
int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
|
@ -500,23 +510,25 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_COMPLEX
|
||||
#if (BUILD_COMPLEX==1)
|
||||
int cgemm_p, cgemm_q, cgemm_r;
|
||||
int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn;
|
||||
|
||||
float (*camax_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*camin_k) (BLASLONG, float *, BLASLONG);
|
||||
|
||||
BLASLONG (*icamax_k)(BLASLONG, float *, BLASLONG);
|
||||
BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
||||
|
||||
float (*cnrm2_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*casum_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*csum_k) (BLASLONG, float *, BLASLONG);
|
||||
|
||||
int (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
|
||||
|
||||
int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
|
||||
int (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int (*caxpyc_k)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int (*cscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
@ -710,7 +722,7 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
|||
int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_COMPLEX16
|
||||
#if (BUILD_COMPLEX16 == 1)
|
||||
int zgemm_p, zgemm_q, zgemm_r;
|
||||
int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn;
|
||||
|
||||
|
@ -1092,34 +1104,34 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
void (*init)(void);
|
||||
|
||||
int snum_opt, dnum_opt, qnum_opt;
|
||||
#ifdef BUILD_SINGLE
|
||||
#if (BUILD_SINGLE==1)
|
||||
int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG);
|
||||
#endif
|
||||
#ifdef BUILD_DOUBLE
|
||||
#if (BUILD_DOUBLE==1)
|
||||
int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG);
|
||||
#endif
|
||||
#ifdef BUILD_COMPLEX
|
||||
#if (BUILD_COMPLEX==1)
|
||||
int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG);
|
||||
#endif
|
||||
#ifdef BUILD_COMPLEX16
|
||||
#if (BUILD_COMPLEX16==1)
|
||||
int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_SINGLE
|
||||
#if (BUILD_SINGLE==1)
|
||||
int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_DOUBLE
|
||||
#if (BUILD_DOUBLE==1)
|
||||
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_COMPLEX
|
||||
#if (BUILD_COMPLEX==1)
|
||||
int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
|
@ -1131,7 +1143,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_COMPLEX16
|
||||
#if (BUILD_COMPLEX16==1)
|
||||
int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
|
@ -1143,21 +1155,21 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_SINGLE
|
||||
#if (BUILD_SINGLE==1)
|
||||
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_DOUBLE
|
||||
#if (BUILD_DOUBLE==1)
|
||||
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_COMPLEX
|
||||
#if (BUILD_COMPLEX==1)
|
||||
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
|
@ -1169,7 +1181,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_COMPLEX16
|
||||
#if (BUILD_COMPLEX16==1)
|
||||
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
|
@ -1181,16 +1193,16 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_SINGLE
|
||||
#if (BUILD_SINGLE==1)
|
||||
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
|
||||
#endif
|
||||
#ifdef BUILD_DOUBLE
|
||||
#if (BUILD_DOUBLE==1)
|
||||
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
|
||||
#endif
|
||||
#ifdef BUILD_COMPLEX
|
||||
#if (BUILD_COMPLEX==1)
|
||||
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
|
||||
#endif
|
||||
#ifdef BUILD_COMPLEX16
|
||||
#if (BUILD_COMPLEX16==1)
|
||||
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
|
||||
#endif
|
||||
} gotoblas_t;
|
||||
|
@ -1206,7 +1218,7 @@ extern gotoblas_t *gotoblas;
|
|||
|
||||
#define HAVE_EX_L2 gotoblas -> exclusive_cache
|
||||
|
||||
#ifdef BUILD_BFLOAT16
|
||||
#if (BUILD_BFLOAT16==1)
|
||||
#define SBGEMM_P gotoblas -> sbgemm_p
|
||||
#define SBGEMM_Q gotoblas -> sbgemm_q
|
||||
#define SBGEMM_R gotoblas -> sbgemm_r
|
||||
|
@ -1215,7 +1227,7 @@ extern gotoblas_t *gotoblas;
|
|||
#define SBGEMM_UNROLL_MN gotoblas -> sbgemm_unroll_mn
|
||||
#endif
|
||||
|
||||
#if defined (BUILD_SINGLE)
|
||||
#if (BUILD_SINGLE==1)
|
||||
#define SGEMM_P gotoblas -> sgemm_p
|
||||
#define SGEMM_Q gotoblas -> sgemm_q
|
||||
#define SGEMM_R gotoblas -> sgemm_r
|
||||
|
@ -1224,30 +1236,14 @@ extern gotoblas_t *gotoblas;
|
|||
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
|
||||
#endif
|
||||
|
||||
#if defined (BUILD_DOUBLE)
|
||||
#if (BUILD_DOUBLE==1)
|
||||
#define DGEMM_P gotoblas -> dgemm_p
|
||||
#define DGEMM_Q gotoblas -> dgemm_q
|
||||
#define DGEMM_R gotoblas -> dgemm_r
|
||||
#define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m
|
||||
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
|
||||
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
|
||||
#endif
|
||||
|
||||
#define QGEMM_P gotoblas -> qgemm_p
|
||||
#define QGEMM_Q gotoblas -> qgemm_q
|
||||
#define QGEMM_R gotoblas -> qgemm_r
|
||||
#define QGEMM_UNROLL_M gotoblas -> qgemm_unroll_m
|
||||
#define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n
|
||||
#define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn
|
||||
|
||||
#ifdef BUILD_COMPLEX
|
||||
#define CGEMM_P gotoblas -> cgemm_p
|
||||
#define CGEMM_Q gotoblas -> cgemm_q
|
||||
#define CGEMM_R gotoblas -> cgemm_r
|
||||
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
|
||||
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
|
||||
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
|
||||
#ifndef BUILD_SINGLE
|
||||
#if (BUILD_SINGLE != 1)
|
||||
#define SGEMM_P gotoblas -> sgemm_p
|
||||
#define SGEMM_Q gotoblas -> sgemm_q
|
||||
#define SGEMM_R 1024
|
||||
|
@ -1257,14 +1253,38 @@ extern gotoblas_t *gotoblas;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_COMPLEX16
|
||||
#define QGEMM_P gotoblas -> qgemm_p
|
||||
#define QGEMM_Q gotoblas -> qgemm_q
|
||||
#define QGEMM_R gotoblas -> qgemm_r
|
||||
#define QGEMM_UNROLL_M gotoblas -> qgemm_unroll_m
|
||||
#define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n
|
||||
#define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn
|
||||
|
||||
#if (BUILD_COMPLEX==1)
|
||||
#define CGEMM_P gotoblas -> cgemm_p
|
||||
#define CGEMM_Q gotoblas -> cgemm_q
|
||||
#define CGEMM_R gotoblas -> cgemm_r
|
||||
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
|
||||
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
|
||||
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
|
||||
#if (BUILD_SINGLE != 1)
|
||||
#define SGEMM_P gotoblas -> sgemm_p
|
||||
#define SGEMM_Q gotoblas -> sgemm_q
|
||||
#define SGEMM_R 1024
|
||||
#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m
|
||||
#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n
|
||||
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if (BUILD_COMPLEX16==1)
|
||||
#define ZGEMM_P gotoblas -> zgemm_p
|
||||
#define ZGEMM_Q gotoblas -> zgemm_q
|
||||
#define ZGEMM_R gotoblas -> zgemm_r
|
||||
#define ZGEMM_UNROLL_M gotoblas -> zgemm_unroll_m
|
||||
#define ZGEMM_UNROLL_N gotoblas -> zgemm_unroll_n
|
||||
#define ZGEMM_UNROLL_MN gotoblas -> zgemm_unroll_mn
|
||||
#ifndef BUILD_DOUBLE
|
||||
#if (BUILD_DOUBLE != 1)
|
||||
#define DGEMM_P gotoblas -> dgemm_p
|
||||
#define DGEMM_Q gotoblas -> dgemm_q
|
||||
#define DGEMM_R 1024
|
||||
|
@ -1272,6 +1292,14 @@ extern gotoblas_t *gotoblas;
|
|||
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
|
||||
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
|
||||
#endif
|
||||
#if (BUILD_COMPLEX != 1)
|
||||
#define CGEMM_P gotoblas -> cgemm_p
|
||||
#define CGEMM_Q gotoblas -> cgemm_q
|
||||
#define CGEMM_R gotoblas -> cgemm_r
|
||||
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
|
||||
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
|
||||
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define XGEMM_P gotoblas -> xgemm_p
|
||||
|
@ -1318,7 +1346,7 @@ extern gotoblas_t *gotoblas;
|
|||
#define HAVE_EX_L2 0
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_BFLOAT16
|
||||
#if (BUILD_BFLOAT16 == 1)
|
||||
#define SBGEMM_P SBGEMM_DEFAULT_P
|
||||
#define SBGEMM_Q SBGEMM_DEFAULT_Q
|
||||
#define SBGEMM_R SBGEMM_DEFAULT_R
|
||||
|
|
|
@ -53,6 +53,7 @@ extern void goto_set_num_threads(int nthreads);
|
|||
/* Global Parameter */
|
||||
extern int blas_cpu_number;
|
||||
extern int blas_num_threads;
|
||||
extern int blas_num_threads_set;
|
||||
extern int blas_omp_linked;
|
||||
|
||||
#define BLAS_LEGACY 0x8000U
|
||||
|
@ -139,7 +140,11 @@ extern int blas_server_avail;
|
|||
static __inline int num_cpu_avail(int level) {
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
int openmp_nthreads=omp_get_max_threads();
|
||||
int openmp_nthreads;
|
||||
if (blas_num_threads_set == 0)
|
||||
openmp_nthreads=omp_get_max_threads();
|
||||
else
|
||||
openmp_nthreads=blas_cpu_number;
|
||||
#endif
|
||||
|
||||
#ifndef USE_OPENMP
|
||||
|
|
|
@ -59,6 +59,11 @@ void get_subarchitecture(void){
|
|||
printf("ev%d", implver() + 4);
|
||||
}
|
||||
|
||||
|
||||
void get_corename(void){
|
||||
printf("EV%d", implver() + 4);
|
||||
}
|
||||
|
||||
void get_subdirname(void){
|
||||
printf("alpha");
|
||||
}
|
||||
|
|
|
@ -202,10 +202,14 @@ int detect(void)
|
|||
return CPU_CORTEXA510;
|
||||
else if (strstr(cpu_part, "0xd47"))
|
||||
return CPU_CORTEXA710;
|
||||
else if (strstr(cpu_part, "0xd4d")) //A715
|
||||
return CPU_CORTEXA710;
|
||||
else if (strstr(cpu_part, "0xd44"))
|
||||
return CPU_CORTEXX1;
|
||||
else if (strstr(cpu_part, "0xd4c"))
|
||||
return CPU_CORTEXX2;
|
||||
else if (strstr(cpu_part, "0xd4e")) //X3
|
||||
return CPU_CORTEXX2;
|
||||
}
|
||||
// Qualcomm
|
||||
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
|
||||
|
|
|
@ -165,7 +165,9 @@ void get_cpuconfig(void){
|
|||
}else{
|
||||
printf("#define UNKNOWN\n");
|
||||
}
|
||||
if (!get_feature("msa")) printf("#define NO_MSA\n");
|
||||
#ifndef NO_MSA
|
||||
if (get_feature("msa")) printf("#define HAVE_MSA\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
void get_libname(void){
|
||||
|
|
|
@ -71,15 +71,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
/*********************************************************************/
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_SICORTEX 1
|
||||
#define CPU_LOONGSON3R3 2
|
||||
#define CPU_LOONGSON3R4 3
|
||||
#define CPU_I6400 4
|
||||
#define CPU_P6600 5
|
||||
#define CPU_I6500 6
|
||||
#define CPU_MIPS64_GENERIC 1
|
||||
#define CPU_SICORTEX 2
|
||||
#define CPU_LOONGSON3R3 3
|
||||
#define CPU_LOONGSON3R4 4
|
||||
#define CPU_I6400 5
|
||||
#define CPU_P6600 6
|
||||
#define CPU_I6500 7
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
"MIPS64_GENERIC"
|
||||
"SICORTEX",
|
||||
"LOONGSON3R3",
|
||||
"LOONGSON3R4",
|
||||
|
@ -113,8 +115,11 @@ int detect(void){
|
|||
return CPU_SICORTEX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return CPU_MIPS64_GENERIC;
|
||||
#else
|
||||
return CPU_UNKNOWN;
|
||||
#endif
|
||||
}
|
||||
|
||||
char *get_corename(void){
|
||||
|
@ -136,8 +141,10 @@ void get_subarchitecture(void){
|
|||
printf("P6600");
|
||||
}else if(detect()==CPU_I6500){
|
||||
printf("I6500");
|
||||
}else{
|
||||
}else if(detect()==CPU_SICORTEX){
|
||||
printf("SICORTEX");
|
||||
}else{
|
||||
printf("MIPS64_GENERIC");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -201,7 +208,9 @@ void get_cpuconfig(void){
|
|||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
}
|
||||
if (!get_feature("msa")) printf("#define NO_MSA\n");
|
||||
#ifndef NO_MSA
|
||||
if (get_feature("msa")) printf("#define HAVE_MSA\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
void get_libname(void){
|
||||
|
@ -215,8 +224,8 @@ void get_libname(void){
|
|||
printf("p6600\n");
|
||||
}else if(detect()==CPU_I6500) {
|
||||
printf("i6500\n");
|
||||
}else{
|
||||
printf("mips64\n");
|
||||
}else {
|
||||
printf("mips64_generic\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
23
cpuid_x86.c
23
cpuid_x86.c
|
@ -1544,6 +1544,17 @@ int get_cpuname(void){
|
|||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 11: //family 6 exmodel 11
|
||||
switch (model) {
|
||||
case 7: // Raptor Lake
|
||||
if(support_avx2())
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x7:
|
||||
|
@ -2334,6 +2345,18 @@ int get_coretype(void){
|
|||
return CORE_NEHALEM;
|
||||
}
|
||||
|
||||
case 11:
|
||||
switch (model) {
|
||||
case 7: // Raptor Lake
|
||||
#ifndef NO_AVX2
|
||||
if(support_avx2())
|
||||
return CORE_HASWELL;
|
||||
#endif
|
||||
if(support_avx())
|
||||
return CORE_SANDYBRIDGE;
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
case 15:
|
||||
if (model <= 0x2) return CORE_NORTHWOOD;
|
||||
else return CORE_PRESCOTT;
|
||||
|
|
5
ctest.c
5
ctest.c
|
@ -173,3 +173,8 @@ HAVE_C11
|
|||
ARCH_E2K
|
||||
#endif
|
||||
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
ARCH_RISCV64
|
||||
OS_WINDOWS
|
||||
#endif
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ else()
|
|||
c_${float_char}blas1.c)
|
||||
endif()
|
||||
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME})
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD")
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
|
||||
target_link_libraries(x${float_char}cblat1 m)
|
||||
endif()
|
||||
add_test(NAME "x${float_char}cblat1"
|
||||
|
@ -65,7 +65,7 @@ else()
|
|||
constant.c)
|
||||
endif()
|
||||
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME})
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD")
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
|
||||
target_link_libraries(x${float_char}cblat2 m)
|
||||
endif()
|
||||
add_test(NAME "x${float_char}cblat2"
|
||||
|
@ -90,7 +90,7 @@ else()
|
|||
constant.c)
|
||||
endif()
|
||||
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME})
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD")
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
|
||||
target_link_libraries(x${float_char}cblat3 m)
|
||||
endif()
|
||||
add_test(NAME "x${float_char}cblat3"
|
||||
|
|
|
@ -237,7 +237,7 @@ endif
|
|||
|
||||
ifeq ($(BUILD_DOUBLE),1)
|
||||
# Double real
|
||||
ifeq ($(NOFORTRAN),0)
|
||||
ifeq ($(NOFORTRAN), $(filter 0 2,$(NOFORTRAN)))
|
||||
xdcblat1: $(dtestl1o) c_dblat1.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xdcblat1 c_dblat1.o $(dtestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
xdcblat2: $(dtestl2o) c_dblat2.o $(TOPDIR)/$(LIBNAME)
|
||||
|
@ -256,7 +256,7 @@ endif
|
|||
|
||||
ifeq ($(BUILD_COMPLEX),1)
|
||||
# Single complex
|
||||
ifeq ($(NOFORTRAN),0)
|
||||
ifeq ($(NOFORTRAN), $(filter 0 2,$(NOFORTRAN)))
|
||||
xccblat1: $(ctestl1o) c_cblat1.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xccblat1 c_cblat1.o $(ctestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
xccblat2: $(ctestl2o) c_cblat2.o $(TOPDIR)/$(LIBNAME)
|
||||
|
@ -278,7 +278,7 @@ endif
|
|||
|
||||
ifeq ($(BUILD_COMPLEX16),1)
|
||||
# Double complex
|
||||
ifeq ($(NOFORTRAN),0)
|
||||
ifeq ($(NOFORTRAN), $(filter 0 2,$(NOFORTRAN)))
|
||||
xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xzcblat1 c_zblat1.o $(ztestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
xzcblat2: $(ztestl2o) c_zblat2.o $(TOPDIR)/$(LIBNAME)
|
||||
|
|
|
@ -969,7 +969,7 @@ real *sfac;
|
|||
1.17 };
|
||||
|
||||
/* Local variables */
|
||||
extern /* Subroutine */ srottest_();
|
||||
extern /* Subroutine */ void srottest_();
|
||||
static integer i__, k, ksize;
|
||||
extern /* Subroutine */ int stest_(), srotmtest_();
|
||||
static integer ki, kn;
|
||||
|
|
|
@ -304,6 +304,15 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
while (gemm_p * min_l > l2size) gemm_p -= GEMM_UNROLL_M;
|
||||
}
|
||||
|
||||
BLASLONG pad_min_l = min_l;
|
||||
#if defined(HALF)
|
||||
#if defined(DYNAMIC_ARCH)
|
||||
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
|
||||
#else
|
||||
pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* First, we have to move data A to L2 cache */
|
||||
min_i = m_to - m_from;
|
||||
l1stride = 1;
|
||||
|
@ -350,7 +359,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
START_RPCC();
|
||||
|
||||
OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs,
|
||||
sb + min_l * (jjs - js) * COMPSIZE * l1stride);
|
||||
sb + pad_min_l * (jjs - js) * COMPSIZE * l1stride);
|
||||
|
||||
STOP_RPCC(outercost);
|
||||
|
||||
|
@ -358,10 +367,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
|
||||
#if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
|
||||
KERNEL_OPERATION(min_i, min_jj, min_l, alpha,
|
||||
sa, sb + min_l * (jjs - js) * COMPSIZE * l1stride, c, ldc, m_from, jjs);
|
||||
sa, sb + pad_min_l * (jjs - js) * COMPSIZE * l1stride, c, ldc, m_from, jjs);
|
||||
#else
|
||||
KERNEL_OPERATION(min_i, min_jj, min_l, (void *)&xalpha,
|
||||
sa, sb + min_l * (jjs - js) * COMPSIZE * l1stride, c, ldc, m_from, jjs);
|
||||
sa, sb + pad_min_l * (jjs - js) * COMPSIZE * l1stride, c, ldc, m_from, jjs);
|
||||
#endif
|
||||
|
||||
STOP_RPCC(kernelcost);
|
||||
|
|
|
@ -325,6 +325,16 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
if (min_l > GEMM_Q) min_l = (min_l + 1) / 2;
|
||||
}
|
||||
|
||||
BLASLONG pad_min_l = min_l;
|
||||
|
||||
#if defined(HALF)
|
||||
#if defined(DYNAMIC_ARCH)
|
||||
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
|
||||
#else
|
||||
pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Determine step size in m
|
||||
* Note: We are currently on the first step in m
|
||||
*/
|
||||
|
@ -382,13 +392,13 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
/* Copy part of local region of B into workspace */
|
||||
START_RPCC();
|
||||
OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs,
|
||||
buffer[bufferside] + min_l * (jjs - js) * COMPSIZE * l1stride);
|
||||
buffer[bufferside] + pad_min_l * (jjs - js) * COMPSIZE * l1stride);
|
||||
STOP_RPCC(copy_B);
|
||||
|
||||
/* Apply kernel with local region of A and part of local region of B */
|
||||
START_RPCC();
|
||||
KERNEL_OPERATION(min_i, min_jj, min_l, alpha,
|
||||
sa, buffer[bufferside] + min_l * (jjs - js) * COMPSIZE * l1stride,
|
||||
sa, buffer[bufferside] + pad_min_l * (jjs - js) * COMPSIZE * l1stride,
|
||||
c, ldc, m_from, jjs);
|
||||
STOP_RPCC(kernel);
|
||||
|
||||
|
|
|
@ -470,9 +470,13 @@ blas_queue_t *tscq;
|
|||
#endif
|
||||
|
||||
#ifdef CONSISTENT_FPCSR
|
||||
#ifdef __aarch64__
|
||||
__asm__ __volatile__ ("msr fpcr, %0" : : "r" (queue -> sse_mode));
|
||||
#else
|
||||
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
|
||||
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef MONITOR
|
||||
main_status[cpu] = MAIN_RUNNING1;
|
||||
|
@ -746,9 +750,13 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
|
|||
queue -> position = pos;
|
||||
|
||||
#ifdef CONSISTENT_FPCSR
|
||||
#ifdef __aarch64__
|
||||
__asm__ __volatile__ ("mrs %0, fpcr" : "=r" (queue -> sse_mode));
|
||||
#else
|
||||
__asm__ __volatile__ ("fnstcw %0" : "=m" (queue -> x87_mode));
|
||||
__asm__ __volatile__ ("stmxcsr %0" : "=m" (queue -> sse_mode));
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST)
|
||||
|
||||
|
|
|
@ -69,6 +69,8 @@
|
|||
|
||||
int blas_server_avail = 0;
|
||||
|
||||
extern int openblas_omp_adaptive_env();
|
||||
|
||||
static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
|
||||
#ifdef HAVE_C11
|
||||
static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
|
||||
|
@ -98,6 +100,8 @@ static void adjust_thread_buffers() {
|
|||
|
||||
void goto_set_num_threads(int num_threads) {
|
||||
|
||||
blas_num_threads_set = 1;
|
||||
if (num_threads < 0) blas_num_threads_set = 0;
|
||||
if (num_threads < 1) num_threads = blas_num_threads;
|
||||
|
||||
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
|
||||
|
@ -108,8 +112,6 @@ void goto_set_num_threads(int num_threads) {
|
|||
|
||||
blas_cpu_number = num_threads;
|
||||
|
||||
omp_set_num_threads(blas_cpu_number);
|
||||
|
||||
adjust_thread_buffers();
|
||||
#if defined(ARCH_MIPS64)
|
||||
//set parameters for different number of threads.
|
||||
|
@ -282,8 +284,12 @@ static void exec_threads(blas_queue_t *queue, int buf_index){
|
|||
sb = queue -> sb;
|
||||
|
||||
#ifdef CONSISTENT_FPCSR
|
||||
#ifdef __aarch64__
|
||||
__asm__ __volatile__ ("msr fpcr, %0" : : "r" (queue -> sse_mode));
|
||||
#else
|
||||
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
|
||||
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if ((sa == NULL) && (sb == NULL) && ((queue -> mode & BLAS_PTHREAD) == 0)) {
|
||||
|
@ -381,8 +387,12 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
|
|||
|
||||
#ifdef CONSISTENT_FPCSR
|
||||
for (i = 0; i < num; i ++) {
|
||||
#ifdef __aarch64__
|
||||
__asm__ __volatile__ ("mrs %0, fpcr" : "=r" (queue[i].sse_mode));
|
||||
#else
|
||||
__asm__ __volatile__ ("fnstcw %0" : "=m" (queue[i].x87_mode));
|
||||
__asm__ __volatile__ ("stmxcsr %0" : "=m" (queue[i].sse_mode));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -278,12 +278,15 @@ static DWORD WINAPI blas_thread_server(void *arg){
|
|||
} else
|
||||
#endif
|
||||
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
|
||||
#ifdef BUILD_DOUBLE
|
||||
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
|
||||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
||||
|
||||
#endif
|
||||
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
|
||||
#ifdef BUILD_SINGLE
|
||||
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
|
||||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
||||
#endif
|
||||
} else {
|
||||
/* Other types in future */
|
||||
}
|
||||
|
@ -295,11 +298,15 @@ static DWORD WINAPI blas_thread_server(void *arg){
|
|||
} else
|
||||
#endif
|
||||
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
|
||||
#ifdef BUILD_COMPLEX16
|
||||
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
|
||||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
||||
#endif
|
||||
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
|
||||
#ifdef BUILD_COMPLEX
|
||||
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
|
||||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
||||
#endif
|
||||
} else {
|
||||
/* Other types in future */
|
||||
}
|
||||
|
|
|
@ -1018,7 +1018,7 @@ static gotoblas_t *force_coretype(char *coretype){
|
|||
char message[128];
|
||||
//char mname[20];
|
||||
|
||||
for ( i=1 ; i <= 24; i++)
|
||||
for ( i=1 ; i <= 25; i++)
|
||||
{
|
||||
if (!strncasecmp(coretype,corename[i],20))
|
||||
{
|
||||
|
|
|
@ -125,8 +125,13 @@ extern gotoblas_t gotoblas_THUNDERX2T99;
|
|||
extern gotoblas_t gotoblas_TSV110;
|
||||
extern gotoblas_t gotoblas_EMAG8180;
|
||||
extern gotoblas_t gotoblas_NEOVERSEN1;
|
||||
#ifndef NO_SVE
|
||||
extern gotoblas_t gotoblas_NEOVERSEV1;
|
||||
extern gotoblas_t gotoblas_NEOVERSEN2;
|
||||
#else
|
||||
#define gotoblas_NEOVERSEV1 gotoblas_ARMV8
|
||||
#define gotoblas_NEOVERSEN2 gotoblas_ARMV8
|
||||
#endif
|
||||
extern gotoblas_t gotoblas_THUNDERX3T110;
|
||||
extern gotoblas_t gotoblas_CORTEXA55;
|
||||
#endif
|
||||
|
@ -237,7 +242,7 @@ static gotoblas_t *get_coretype(void) {
|
|||
p = (char *) NULL ;
|
||||
infile = fopen("/sys/devices/system/cpu/cpu0/regs/identification/midr_el1","r");
|
||||
if (!infile) return NULL;
|
||||
fgets(buffer, sizeof(buffer), infile);
|
||||
(void)fgets(buffer, sizeof(buffer), infile);
|
||||
midr_el1=strtoul(buffer,NULL,16);
|
||||
fclose(infile);
|
||||
#else
|
||||
|
@ -274,10 +279,12 @@ static gotoblas_t *get_coretype(void) {
|
|||
return &gotoblas_CORTEXA73;
|
||||
case 0xd0c: // Neoverse N1
|
||||
return &gotoblas_NEOVERSEN1;
|
||||
#ifndef NO_SVE
|
||||
case 0xd49:
|
||||
return &gotoblas_NEOVERSEN2;
|
||||
case 0xd40:
|
||||
return &gotoblas_NEOVERSEV1;
|
||||
#endif
|
||||
case 0xd05: // Cortex A55
|
||||
return &gotoblas_CORTEXA55;
|
||||
}
|
||||
|
|
|
@ -38,22 +38,48 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include <sys/resource.h>
|
||||
#include "common.h"
|
||||
|
||||
#if (defined OS_LINUX || defined OS_ANDROID)
|
||||
#include <asm/hwcap.h>
|
||||
#include <sys/auxv.h>
|
||||
|
||||
#ifndef HWCAP_LOONGSON_CPUCFG
|
||||
#define HWCAP_LOONGSON_CPUCFG (1 << 14)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef DYNAMIC_LIST
|
||||
extern gotoblas_t gotoblas_MIPS64_GENERIC;
|
||||
#ifdef DYN_LOONGSON3R3
|
||||
extern gotoblas_t gotoblas_LOONGSON3R3;
|
||||
#else
|
||||
#define gotoblas_LOONGSON3R3 gotoblas_MIPS64_GENERIC
|
||||
#endif
|
||||
#ifdef DYN_LOONGSON3R4
|
||||
extern gotoblas_t gotoblas_LOONGSON3R4;
|
||||
#else
|
||||
#define gotoblas_LOONGSON3R4 gotoblas_MIPS64_GENERIC
|
||||
#endif
|
||||
#else
|
||||
extern gotoblas_t gotoblas_LOONGSON3R3;
|
||||
extern gotoblas_t gotoblas_LOONGSON3R4;
|
||||
extern gotoblas_t gotoblas_MIPS64_GENERIC;
|
||||
#endif
|
||||
|
||||
extern void openblas_warning(int verbose, const char * msg);
|
||||
|
||||
#define NUM_CORETYPES 2
|
||||
#define NUM_CORETYPES 3
|
||||
|
||||
static char *corename[] = {
|
||||
"MIPS64_GENERIC"
|
||||
"loongson3r3",
|
||||
"loongson3r4",
|
||||
"UNKNOWN"
|
||||
};
|
||||
|
||||
char *gotoblas_corename(void) {
|
||||
if (gotoblas == &gotoblas_LOONGSON3R3) return corename[0];
|
||||
if (gotoblas == &gotoblas_LOONGSON3R4) return corename[1];
|
||||
if (gotoblas == &gotoblas_MIPS64_GENERIC) return corename[0];
|
||||
if (gotoblas == &gotoblas_LOONGSON3R3) return corename[1];
|
||||
if (gotoblas == &gotoblas_LOONGSON3R4) return corename[2];
|
||||
return corename[NUM_CORETYPES];
|
||||
}
|
||||
|
||||
|
@ -73,77 +99,32 @@ static gotoblas_t *force_coretype(char *coretype) {
|
|||
|
||||
switch (found)
|
||||
{
|
||||
case 0: return (&gotoblas_LOONGSON3R3);
|
||||
case 1: return (&gotoblas_LOONGSON3R4);
|
||||
case 0: return (&gotoblas_MIPS64_GENERIC);
|
||||
case 1: return (&gotoblas_LOONGSON3R3);
|
||||
case 2: return (&gotoblas_LOONGSON3R4);
|
||||
}
|
||||
snprintf(message, 128, "Core not found: %s\n", coretype);
|
||||
openblas_warning(1, message);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if (defined OS_LINUX || defined OS_ANDROID)
|
||||
#define MMI_MASK 0x00000010
|
||||
#define MSA_MASK 0x00000020
|
||||
|
||||
int fd[2];
|
||||
int support_cpucfg;
|
||||
|
||||
static void handler(int signum)
|
||||
{
|
||||
close(fd[1]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* Brief : Function to check if cpucfg supported on loongson
|
||||
* Return: 1 supported
|
||||
* 0 not supported
|
||||
*/
|
||||
static int cpucfg_test(void) {
|
||||
pid_t pid;
|
||||
int status = 0;
|
||||
|
||||
support_cpucfg = 0;
|
||||
pipe(fd);
|
||||
pid = fork();
|
||||
if (pid == 0) { /* Subprocess */
|
||||
struct sigaction act;
|
||||
close(fd[0]);
|
||||
/* Set signal action for SIGILL. */
|
||||
act.sa_handler = handler;
|
||||
sigaction(SIGILL,&act,NULL);
|
||||
|
||||
/* Execute cpucfg in subprocess. */
|
||||
__asm__ volatile(
|
||||
".insn \n\t"
|
||||
".word (0xc8080118) \n\t"
|
||||
:::
|
||||
);
|
||||
support_cpucfg = 1;
|
||||
write(fd[1],&support_cpucfg,sizeof(support_cpucfg));
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
} else if (pid > 0){ /* Parent process*/
|
||||
close(fd[1]);
|
||||
if ((waitpid(pid,&status,0) <= 0) ||
|
||||
(read(fd[0],&support_cpucfg,sizeof(support_cpucfg)) <= 0))
|
||||
support_cpucfg = 0;
|
||||
close(fd[0]);
|
||||
} else {
|
||||
support_cpucfg = 0;
|
||||
}
|
||||
|
||||
return support_cpucfg;
|
||||
}
|
||||
|
||||
static gotoblas_t *get_coretype_from_cpucfg(void) {
|
||||
int flag = 0;
|
||||
__asm__ volatile(
|
||||
".set push \n\t"
|
||||
".set noat \n\t"
|
||||
".insn \n\t"
|
||||
"dli $8, 0x01 \n\t"
|
||||
".word (0xc9084918) \n\t"
|
||||
"usw $9, 0x00(%0) \n\t"
|
||||
"dli $1, 0x01 \n\t"
|
||||
".word (0xc8080118) \n\t"
|
||||
"move %0, $1 \n\t"
|
||||
".set pop \n\t"
|
||||
: "=r"(flag)
|
||||
:
|
||||
:
|
||||
: "r"(&flag)
|
||||
: "memory"
|
||||
);
|
||||
if (flag & MSA_MASK)
|
||||
return (&gotoblas_LOONGSON3R4);
|
||||
|
@ -153,7 +134,7 @@ static gotoblas_t *get_coretype_from_cpucfg(void) {
|
|||
}
|
||||
|
||||
static gotoblas_t *get_coretype_from_cpuinfo(void) {
|
||||
#ifdef linux
|
||||
#ifdef __linux
|
||||
FILE *infile;
|
||||
char buffer[512], *p;
|
||||
|
||||
|
@ -178,15 +159,17 @@ static gotoblas_t *get_coretype_from_cpuinfo(void) {
|
|||
#endif
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
static gotoblas_t *get_coretype(void) {
|
||||
int ret = 0;
|
||||
|
||||
ret = cpucfg_test();
|
||||
if (ret == 1)
|
||||
#if (!defined OS_LINUX && !defined OS_ANDROID)
|
||||
return NULL;
|
||||
#else
|
||||
if (!(getauxval(AT_HWCAP) & HWCAP_LOONGSON_CPUCFG))
|
||||
return get_coretype_from_cpucfg();
|
||||
else
|
||||
return get_coretype_from_cpuinfo();
|
||||
#endif
|
||||
}
|
||||
|
||||
void gotoblas_dynamic_init(void) {
|
||||
|
@ -208,9 +191,9 @@ void gotoblas_dynamic_init(void) {
|
|||
|
||||
if (gotoblas == NULL)
|
||||
{
|
||||
snprintf(coremsg, 128, "Falling back to loongson3r3 core\n");
|
||||
snprintf(coremsg, 128, "Falling back to MIPS64_GENEIRC\n");
|
||||
openblas_warning(1, coremsg);
|
||||
gotoblas = &gotoblas_LOONGSON3R3;
|
||||
gotoblas = &gotoblas_MIPS64_GENERIC;
|
||||
}
|
||||
|
||||
if (gotoblas && gotoblas->init) {
|
||||
|
|
|
@ -823,6 +823,8 @@ void gotoblas_affinity_init(void) {
|
|||
|
||||
if (numprocs == 0) numprocs = readenv_atoi("OMP_NUM_THREADS");
|
||||
|
||||
if (numprocs == 0) numprocs = readenv_atoi("OPENBLAS_DEFAULT_NUM_THREADS");
|
||||
|
||||
numnodes = 1;
|
||||
|
||||
if (numprocs == 1) {
|
||||
|
|
|
@ -249,8 +249,11 @@ int get_num_procs(void) {
|
|||
|
||||
#if defined(USE_OPENMP)
|
||||
#if _OPENMP >= 201511
|
||||
int i,n;
|
||||
n = 0;
|
||||
ret = omp_get_num_places();
|
||||
if (ret >0 ) nums = ret;
|
||||
if (ret > 0) for (i=0; i<ret;i++) n+= omp_get_place_num_procs(i);
|
||||
if (n > 0) nums = n;
|
||||
#endif
|
||||
return (nums > 0 ? nums : 2);
|
||||
#endif
|
||||
|
@ -419,6 +422,8 @@ This value is equal or large than blas_cpu_number. This means some threads are s
|
|||
*/
|
||||
int blas_num_threads = 0;
|
||||
|
||||
int blas_num_threads_set = 0;
|
||||
|
||||
int goto_get_num_procs (void) {
|
||||
return blas_cpu_number;
|
||||
}
|
||||
|
@ -1820,8 +1825,11 @@ int get_num_procs(void) {
|
|||
#if defined(USE_OPENMP)
|
||||
/* if (omp_get_proc_bind() != omp_proc_bind_false) */
|
||||
#if _OPENMP >= 201511
|
||||
int i,n;
|
||||
n = 0;
|
||||
ret = omp_get_num_places();
|
||||
if (ret >0 ) nums = ret;
|
||||
if (ret > 0) for (i=0;i<ret;i++) n+= omp_get_place_num_procs(i);
|
||||
if (n > 0) nums = n;
|
||||
#endif
|
||||
return (nums > 0 ? nums :2);
|
||||
#endif
|
||||
|
@ -1988,6 +1996,8 @@ This value is equal or large than blas_cpu_number. This means some threads are s
|
|||
*/
|
||||
int blas_num_threads = 0;
|
||||
|
||||
int blas_num_threads_set = 0;
|
||||
|
||||
int goto_get_num_procs (void) {
|
||||
return blas_cpu_number;
|
||||
}
|
||||
|
|
|
@ -283,6 +283,7 @@ The numbers of threads in the thread pool.
|
|||
This value is equal or large than blas_cpu_number. This means some threads are sleep.
|
||||
*/
|
||||
int blas_num_threads = 0;
|
||||
int blas_num_threads_set = 0;
|
||||
|
||||
int goto_get_num_procs (void) {
|
||||
return blas_cpu_number;
|
||||
|
|
|
@ -66,9 +66,15 @@ void openblas_read_env() {
|
|||
if(ret<0) ret=0;
|
||||
openblas_env_thread_timeout=(unsigned int)ret;
|
||||
|
||||
ret=0;
|
||||
if (readenv(p,"OPENBLAS_DEFAULT_NUM_THREADS")) ret = atoi(p);
|
||||
if(ret<0) ret=0;
|
||||
openblas_env_openblas_num_threads=ret;
|
||||
|
||||
ret=0;
|
||||
if (readenv(p,"OPENBLAS_NUM_THREADS")) ret = atoi(p);
|
||||
if(ret<0) ret=0;
|
||||
if(ret != 0 || openblas_env_openblas_num_threads == 0)
|
||||
openblas_env_openblas_num_threads=ret;
|
||||
|
||||
ret=0;
|
||||
|
|
|
@ -4000,6 +4000,22 @@ case "$p1" in
|
|||
no_underscore_objs="$no_underscore_objs $misc_common_objs"
|
||||
|
||||
printf 'int main(void){\n'
|
||||
for obj in $underscore_objs; do
|
||||
[ "$obj" != "xerbla" ] && printf 'extern void %s%s%s%s();\n' \
|
||||
"$symbolprefix" "$obj" "$bu" "$symbolsuffix"
|
||||
done
|
||||
|
||||
for obj in $need_2underscore_objs; do
|
||||
printf 'extern void %s%s%s%s%s();\n' \
|
||||
"$symbolprefix" "$obj" "$bu" "$bu" "$symbolsuffix"
|
||||
done
|
||||
|
||||
for obj in $no_underscore_objs; do
|
||||
printf 'extern void %s%s%s();\n' \
|
||||
"$symbolprefix" "$obj" "$symbolsuffix"
|
||||
done
|
||||
|
||||
printf '\n'
|
||||
for obj in $underscore_objs; do
|
||||
[ "$obj" != "xerbla" ] && printf '%s%s%s%s();\n' \
|
||||
"$symbolprefix" "$obj" "$bu" "$symbolsuffix"
|
||||
|
|
|
@ -3955,6 +3955,18 @@ if ($ARGV[0] eq "linktest") {
|
|||
@no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
|
||||
|
||||
print "int main(void){\n";
|
||||
foreach $objs (@underscore_objs) {
|
||||
print "extern void ", $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
|
||||
}
|
||||
|
||||
foreach $objs (@need_2underscore_objs) {
|
||||
print "extern void ", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "();\n";
|
||||
}
|
||||
|
||||
foreach $objs (@no_underscore_objs) {
|
||||
print "extern void ", $symbolprefix, $objs, $symbolsuffix, "();\n";
|
||||
}
|
||||
|
||||
foreach $objs (@underscore_objs) {
|
||||
print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
|
||||
}
|
||||
|
|
8
f_check
8
f_check
|
@ -82,7 +82,7 @@ else
|
|||
vendor=FUJITSU
|
||||
openmp='-Kopenmp'
|
||||
;;
|
||||
*Cray*)
|
||||
*Hewlett*)
|
||||
vendor=CRAY
|
||||
openmp='-fopenmp'
|
||||
;;
|
||||
|
@ -102,7 +102,7 @@ else
|
|||
vendor=FLANG
|
||||
openmp='-fopenmp'
|
||||
;;
|
||||
*ifx*)
|
||||
*ifort*|*ifx*)
|
||||
vendor=INTEL
|
||||
openmp='-fopenmp'
|
||||
;;
|
||||
|
@ -117,6 +117,10 @@ else
|
|||
esac
|
||||
fi
|
||||
;;
|
||||
*Cray*)
|
||||
vendor=CRAY
|
||||
openmp='-fopenmp'
|
||||
;;
|
||||
*g95*)
|
||||
vendor=G95
|
||||
openmp=''
|
||||
|
|
11
f_check.pl
11
f_check.pl
|
@ -76,11 +76,6 @@ if ($compiler eq "") {
|
|||
$vendor = FUJITSU;
|
||||
$openmp = "-Kopenmp";
|
||||
|
||||
} elsif ($data =~ /Cray/) {
|
||||
|
||||
$vendor = CRAY;
|
||||
$openmp = "-fopenmp";
|
||||
|
||||
} elsif ($data =~ /GNU/ || $data =~ /GCC/ ) {
|
||||
|
||||
$data =~ s/\(+.*?\)+//g;
|
||||
|
@ -95,7 +90,7 @@ if ($compiler eq "") {
|
|||
if ($compiler =~ /flang/) {
|
||||
$vendor = FLANG;
|
||||
$openmp = "-fopenmp";
|
||||
} elsif ($compiler =~ /ifx/) {
|
||||
} elsif ($compiler =~ /ifort/ || $compiler =~ /ifx/) {
|
||||
$vendor = INTEL;
|
||||
$openmp = "-fopenmp";
|
||||
} elsif ($compiler =~ /pgf/ || $compiler =~ /nvf/) {
|
||||
|
@ -106,6 +101,10 @@ if ($compiler eq "") {
|
|||
$openmp = "";
|
||||
}
|
||||
}
|
||||
} elsif ($data =~ /Cray/) {
|
||||
|
||||
$vendor = CRAY;
|
||||
$openmp = "-fopenmp";
|
||||
|
||||
}
|
||||
|
||||
|
|
76
getarch.c
76
getarch.c
|
@ -131,6 +131,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
/* #define FORCE_PPC440 */
|
||||
/* #define FORCE_PPC440FP2 */
|
||||
/* #define FORCE_CELL */
|
||||
/* #define FORCE_MIPS64_GENERIC */
|
||||
/* #define FORCE_SICORTEX */
|
||||
/* #define FORCE_LOONGSON3R3 */
|
||||
/* #define FORCE_LOONGSON3R4 */
|
||||
|
@ -146,6 +147,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
/* #define FORCE_SPARCV7 */
|
||||
/* #define FORCE_ZARCH_GENERIC */
|
||||
/* #define FORCE_Z13 */
|
||||
/* #define FORCE_EV4 */
|
||||
/* #define FORCE_EV5 */
|
||||
/* #define FORCE_EV6 */
|
||||
/* #define FORCE_GENERIC */
|
||||
|
||||
#ifdef FORCE_P2
|
||||
|
@ -915,6 +919,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define CORENAME "CELL"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_MIPS64_GENERIC
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "MIPS"
|
||||
#define SUBARCHITECTURE "MIPS64_GENERIC"
|
||||
#define SUBDIRNAME "mips64"
|
||||
#define ARCHCONFIG "-DMIPS64_GENERIC " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "mips64_generic"
|
||||
#define CORENAME "MIPS64_GENERIC"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_SICORTEX
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "MIPS"
|
||||
|
@ -951,7 +969,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ARCHCONFIG "-DLOONGSON3R4 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 -DHAVE_MSA"
|
||||
#define LIBNAME "loongson3r4"
|
||||
#define CORENAME "LOONGSON3R4"
|
||||
#else
|
||||
|
@ -965,7 +983,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ARCHCONFIG "-DLOONGSON3R5 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 -DHAVE_MSA"
|
||||
#define LIBNAME "loongson3r5"
|
||||
#define CORENAME "LOONGSON3R5"
|
||||
#else
|
||||
|
@ -979,7 +997,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ARCHCONFIG "-DLOONGSON2K1000 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 -DHAVE_MSA"
|
||||
#define LIBNAME "loongson2k1000"
|
||||
#define CORENAME "LOONGSON2K1000"
|
||||
#else
|
||||
|
@ -993,7 +1011,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ARCHCONFIG "-DLOONGSONGENERIC " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 -DHAVE_MSA"
|
||||
#define LIBNAME "loongsongeneric"
|
||||
#define CORENAME "LOONGSONGENERIC"
|
||||
#else
|
||||
|
@ -1007,7 +1025,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ARCHCONFIG "-DI6400 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 -DHAVE_MSA "
|
||||
#define LIBNAME "i6400"
|
||||
#define CORENAME "I6400"
|
||||
#else
|
||||
|
@ -1035,7 +1053,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ARCHCONFIG "-DP5600 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 -DNO_MSA"
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8"
|
||||
#define LIBNAME "p5600"
|
||||
#define CORENAME "P5600"
|
||||
#else
|
||||
|
@ -1049,7 +1067,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ARCHCONFIG "-DMIPS1004K " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 -DNO_MSA"
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8"
|
||||
#define LIBNAME "mips1004K"
|
||||
#define CORENAME "MIPS1004K"
|
||||
#else
|
||||
|
@ -1063,7 +1081,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ARCHCONFIG "-DMIPS24K " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=32768 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 -DNO_MSA"
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8"
|
||||
#define LIBNAME "mips24K"
|
||||
#define CORENAME "MIPS24K"
|
||||
#else
|
||||
|
@ -1077,7 +1095,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ARCHCONFIG "-DI6500 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 -DHAVE_MSA"
|
||||
#define LIBNAME "i6500"
|
||||
#define CORENAME "I6500"
|
||||
#else
|
||||
|
@ -1392,7 +1410,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 " \
|
||||
"-march=armv8.4-a -mtune=neoverse-v1"
|
||||
"-march=armv8.4-a+sve -mtune=neoverse-v1"
|
||||
#define LIBNAME "neoversev1"
|
||||
#define CORENAME "NEOVERSEV1"
|
||||
#endif
|
||||
|
@ -1601,6 +1619,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define CORENAME "Z14"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_EV4
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ALPHA"
|
||||
#define SUBARCHITECTURE "ev4"
|
||||
#define ARCHCONFIG "-DEV4 " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=2097152 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=8192 "
|
||||
#define LIBNAME "ev4"
|
||||
#define CORENAME "EV4"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_EV5
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ALPHA"
|
||||
#define SUBARCHITECTURE "ev5"
|
||||
#define ARCHCONFIG "-DEV5 " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=8192 "
|
||||
#define LIBNAME "ev5"
|
||||
#define CORENAME "EV5"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_EV6
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ALPHA"
|
||||
#define SUBARCHITECTURE "ev6"
|
||||
#define ARCHCONFIG "-DEV6 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=4194304 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=8192 "
|
||||
#define LIBNAME "ev6"
|
||||
#define CORENAME "EV6"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_C910V
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "RISCV64"
|
||||
|
@ -1777,7 +1831,7 @@ int main(int argc, char *argv[]){
|
|||
#ifdef FORCE
|
||||
printf("CORE=%s\n", CORENAME);
|
||||
#else
|
||||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv)
|
||||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__alpha__)
|
||||
printf("CORE=%s\n", get_corename());
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -53,7 +53,7 @@ set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES
|
|||
# these do not have separate 'z' sources
|
||||
set(BLAS3_SOURCES
|
||||
gemm.c symm.c
|
||||
trsm.c syrk.c syr2k.c
|
||||
trsm.c syrk.c syr2k.c gemmt.c
|
||||
)
|
||||
|
||||
set(BLAS3_MANGLED_SOURCES
|
||||
|
@ -189,7 +189,16 @@ if (NOT DEFINED NO_LAPACK)
|
|||
)
|
||||
|
||||
GenerateNamedObjects("${LAPACK_SOURCES}")
|
||||
if (NOT RELAPACK_REPLACE)
|
||||
GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3)
|
||||
else ()
|
||||
GenerateNamedObjects("lapack/getrs.c" "" "" 0 "" "" 0 3)
|
||||
GenerateNamedObjects("lapack/getf2.c" "" "" 0 "" "" 0 3)
|
||||
GenerateNamedObjects("lapack/potf2.c" "" "" 0 "" "" 0 3)
|
||||
GenerateNamedObjects("lapack/laswp.c" "" "" 0 "" "" 0 3)
|
||||
GenerateNamedObjects("lapack/lauu2.c" "" "" 0 "" "" 0 3)
|
||||
GenerateNamedObjects("lapack/trti2.c" "" "" 0 "" "" 0 3)
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
if ( BUILD_COMPLEX AND NOT BUILD_SINGLE)
|
||||
|
|
|
@ -44,12 +44,12 @@ SBLAS3OBJS = \
|
|||
sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \
|
||||
strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) \
|
||||
somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\
|
||||
sgeadd.$(SUFFIX)
|
||||
sgeadd.$(SUFFIX) sgemmt.$(SUFFIX)
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
SBBLAS1OBJS = sbdot.$(SUFFIX)
|
||||
SBBLAS2OBJS = sbgemv.$(SUFFIX)
|
||||
SBBLAS3OBJS = sbgemm.$(SUFFIX)
|
||||
SBBLAS3OBJS = sbgemm.$(SUFFIX) sbgemmt.$(SUFFIX)
|
||||
SBEXTOBJS = sbstobf16.$(SUFFIX) sbdtobf16.$(SUFFIX) sbf16tos.$(SUFFIX) dbf16tod.$(SUFFIX)
|
||||
endif
|
||||
|
||||
|
@ -76,7 +76,7 @@ DBLAS3OBJS = \
|
|||
dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \
|
||||
dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \
|
||||
domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX)\
|
||||
dgeadd.$(SUFFIX)
|
||||
dgeadd.$(SUFFIX) dgemmt.$(SUFFIX)
|
||||
|
||||
CBLAS1OBJS = \
|
||||
caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \
|
||||
|
@ -92,8 +92,9 @@ CBLAS2OBJS = \
|
|||
cgemv.$(SUFFIX) cgeru.$(SUFFIX) cgerc.$(SUFFIX) \
|
||||
ctrsv.$(SUFFIX) ctrmv.$(SUFFIX) \
|
||||
csyr2.$(SUFFIX) cgbmv.$(SUFFIX) \
|
||||
csbmv.$(SUFFIX) \
|
||||
cspr2.$(SUFFIX) \
|
||||
csbmv.$(SUFFIX) cspmv.$(SUFFIX) \
|
||||
cspr.$(SUFFIX) cspr2.$(SUFFIX) \
|
||||
csymv.$(SUFFIX) csyr.$(SUFFIX) \
|
||||
ctbsv.$(SUFFIX) ctbmv.$(SUFFIX) \
|
||||
ctpsv.$(SUFFIX) ctpmv.$(SUFFIX) \
|
||||
chemv.$(SUFFIX) chbmv.$(SUFFIX) \
|
||||
|
@ -105,7 +106,7 @@ CBLAS3OBJS = \
|
|||
ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \
|
||||
chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \
|
||||
comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX)\
|
||||
cgeadd.$(SUFFIX)
|
||||
cgeadd.$(SUFFIX) cgemmt.$(SUFFIX)
|
||||
|
||||
ZBLAS1OBJS = \
|
||||
zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \
|
||||
|
@ -121,8 +122,9 @@ ZBLAS2OBJS = \
|
|||
zgemv.$(SUFFIX) zgeru.$(SUFFIX) zgerc.$(SUFFIX) \
|
||||
ztrsv.$(SUFFIX) ztrmv.$(SUFFIX) \
|
||||
zsyr2.$(SUFFIX) zgbmv.$(SUFFIX) \
|
||||
zsbmv.$(SUFFIX) \
|
||||
zspr2.$(SUFFIX) \
|
||||
zsbmv.$(SUFFIX) zspmv.$(SUFFIX) \
|
||||
zspr.$(SUFFIX) zspr2.$(SUFFIX) \
|
||||
zsymv.$(SUFFIX) zsyr.$(SUFFIX) \
|
||||
ztbsv.$(SUFFIX) ztbmv.$(SUFFIX) \
|
||||
ztpsv.$(SUFFIX) ztpmv.$(SUFFIX) \
|
||||
zhemv.$(SUFFIX) zhbmv.$(SUFFIX) \
|
||||
|
@ -134,7 +136,7 @@ ZBLAS3OBJS = \
|
|||
ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \
|
||||
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \
|
||||
zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)\
|
||||
zgeadd.$(SUFFIX)
|
||||
zgeadd.$(SUFFIX) zgemmt.$(SUFFIX)
|
||||
|
||||
ifeq ($(SUPPORT_GEMM3M), 1)
|
||||
|
||||
|
@ -281,12 +283,12 @@ CSBLAS2OBJS = \
|
|||
CSBLAS3OBJS = \
|
||||
cblas_sgemm.$(SUFFIX) cblas_ssymm.$(SUFFIX) cblas_strmm.$(SUFFIX) cblas_strsm.$(SUFFIX) \
|
||||
cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\
|
||||
cblas_sgeadd.$(SUFFIX)
|
||||
cblas_sgeadd.$(SUFFIX) cblas_sgemmt.$(SUFFIX)
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX)
|
||||
CSBBLAS2OBJS = cblas_sbgemv.$(SUFFIX)
|
||||
CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX)
|
||||
CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX) cblas_sbgemmt.$(SUFFIX)
|
||||
CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX)
|
||||
endif
|
||||
|
||||
|
@ -306,7 +308,7 @@ CDBLAS2OBJS = \
|
|||
CDBLAS3OBJS += \
|
||||
cblas_dgemm.$(SUFFIX) cblas_dsymm.$(SUFFIX) cblas_dtrmm.$(SUFFIX) cblas_dtrsm.$(SUFFIX) \
|
||||
cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) cblas_domatcopy.$(SUFFIX) cblas_dimatcopy.$(SUFFIX) \
|
||||
cblas_dgeadd.$(SUFFIX)
|
||||
cblas_dgeadd.$(SUFFIX) cblas_dgemmt.$(SUFFIX)
|
||||
|
||||
CCBLAS1OBJS = \
|
||||
cblas_icamax.$(SUFFIX) cblas_icamin.$(SUFFIX) cblas_scasum.$(SUFFIX) cblas_caxpy.$(SUFFIX) \
|
||||
|
@ -331,7 +333,7 @@ CCBLAS3OBJS = \
|
|||
cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \
|
||||
cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \
|
||||
cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\
|
||||
cblas_cgeadd.$(SUFFIX)
|
||||
cblas_cgeadd.$(SUFFIX) cblas_cgemmt.$(SUFFIX)
|
||||
|
||||
CXERBLAOBJ = \
|
||||
cblas_xerbla.$(SUFFIX)
|
||||
|
@ -362,7 +364,7 @@ CZBLAS3OBJS = \
|
|||
cblas_zsyrk.$(SUFFIX) cblas_zsyr2k.$(SUFFIX) \
|
||||
cblas_zhemm.$(SUFFIX) cblas_zherk.$(SUFFIX) cblas_zher2k.$(SUFFIX)\
|
||||
cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \
|
||||
cblas_zgeadd.$(SUFFIX)
|
||||
cblas_zgeadd.$(SUFFIX) cblas_zgemmt.$(SUFFIX)
|
||||
|
||||
|
||||
ifeq ($(SUPPORT_GEMM3M), 1)
|
||||
|
@ -1300,6 +1302,8 @@ xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c
|
|||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
sbgemm.$(SUFFIX) sbgemm.$(PSUFFIX) : gemm.c ../param.h
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
sbgemmt.$(SUFFIX) sbgemm.$(PSUFFIX) : gemmt.c ../param.h
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
endif
|
||||
|
||||
sgemm.$(SUFFIX) sgemm.$(PSUFFIX) : gemm.c ../param.h
|
||||
|
@ -1320,6 +1324,24 @@ zgemm.$(SUFFIX) zgemm.$(PSUFFIX) : gemm.c ../param.h
|
|||
xgemm.$(SUFFIX) xgemm.$(PSUFFIX) : gemm.c ../param.h
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
sgemmt.$(SUFFIX) sgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dgemmt.$(SUFFIX) dgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qgemmt.$(SUFFIX) qgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cgemmt.$(SUFFIX) cgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zgemmt.$(SUFFIX) zgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xgemmt.$(SUFFIX) xgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
ssymm.$(SUFFIX) ssymm.$(PSUFFIX) : symm.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
|
@ -1907,6 +1929,23 @@ cblas_cgemm.$(SUFFIX) cblas_cgemm.$(PSUFFIX) : gemm.c ../param.h
|
|||
cblas_zgemm.$(SUFFIX) cblas_zgemm.$(PSUFFIX) : gemm.c ../param.h
|
||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cblas_sgemmt.$(SUFFIX) cblas_sgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
cblas_sbgemmt.$(SUFFIX) cblas_sbgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||
endif
|
||||
|
||||
cblas_dgemmt.$(SUFFIX) cblas_dgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cblas_cgemmt.$(SUFFIX) cblas_cgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cblas_zgemmt.$(SUFFIX) cblas_zgemmt.$(PSUFFIX) : gemmt.c ../param.h
|
||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cblas_ssymm.$(SUFFIX) cblas_ssymm.$(PSUFFIX) : symm.c
|
||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
|
|
|
@ -0,0 +1,589 @@
|
|||
/*********************************************************************/
|
||||
/* Copyright 2022, The OpenBLAS Project. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/*********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "common.h"
|
||||
#ifdef FUNCTION_PROFILE
|
||||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#ifndef COMPLEX
|
||||
#define SMP_THRESHOLD_MIN 65536.0
|
||||
#ifdef XDOUBLE
|
||||
#define ERROR_NAME "QGEMT "
|
||||
#elif defined(DOUBLE)
|
||||
#define ERROR_NAME "DGEMT "
|
||||
#elif defined(BFLOAT16)
|
||||
#define ERROR_NAME "SBGEMT "
|
||||
#else
|
||||
#define ERROR_NAME "SGEMT "
|
||||
#endif
|
||||
#else
|
||||
#define SMP_THRESHOLD_MIN 8192.0
|
||||
#ifdef XDOUBLE
|
||||
#define ERROR_NAME "XGEMT "
|
||||
#elif defined(DOUBLE)
|
||||
#define ERROR_NAME "ZGEMT "
|
||||
#else
|
||||
#define ERROR_NAME "CGEMT "
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef GEMM_MULTITHREAD_THRESHOLD
|
||||
#define GEMM_MULTITHREAD_THRESHOLD 4
|
||||
#endif
|
||||
|
||||
#ifndef CBLAS
|
||||
|
||||
void NAME(char *UPLO, char *TRANSA, char *TRANSB,
|
||||
blasint * M, blasint * N, blasint * K,
|
||||
FLOAT * Alpha,
|
||||
IFLOAT * a, blasint * ldA,
|
||||
IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC)
|
||||
{
|
||||
|
||||
blasint m, n, k;
|
||||
blasint lda, ldb, ldc;
|
||||
int transa, transb, uplo;
|
||||
blasint info;
|
||||
|
||||
char transA, transB, Uplo;
|
||||
IFLOAT *buffer;
|
||||
IFLOAT *aa, *bb;
|
||||
FLOAT *cc;
|
||||
#if defined(COMPLEX)
|
||||
FLOAT alpha_r, alpha_i, beta_r, beta_i;
|
||||
#else
|
||||
FLOAT alpha, beta;
|
||||
#endif
|
||||
|
||||
PRINT_DEBUG_NAME;
|
||||
|
||||
m = *M;
|
||||
n = *N;
|
||||
k = *K;
|
||||
|
||||
#if defined(COMPLEX)
|
||||
FLOAT *alpha = Alpha;
|
||||
alpha_r = *(Alpha + 0);
|
||||
alpha_i = *(Alpha + 1);
|
||||
|
||||
beta_r = *(Beta + 0);
|
||||
beta_i = *(Beta + 1);
|
||||
#else
|
||||
alpha = *Alpha;
|
||||
beta = *Beta;
|
||||
#endif
|
||||
|
||||
lda = *ldA;
|
||||
ldb = *ldB;
|
||||
ldc = *ldC;
|
||||
|
||||
transA = *TRANSA;
|
||||
transB = *TRANSB;
|
||||
Uplo = *UPLO;
|
||||
TOUPPER(transA);
|
||||
TOUPPER(transB);
|
||||
TOUPPER(Uplo);
|
||||
|
||||
transa = -1;
|
||||
transb = -1;
|
||||
uplo = -1;
|
||||
|
||||
if (transA == 'N')
|
||||
transa = 0;
|
||||
if (transA == 'T')
|
||||
transa = 1;
|
||||
#ifndef COMPLEX
|
||||
if (transA == 'R')
|
||||
transa = 0;
|
||||
if (transA == 'C')
|
||||
transa = 1;
|
||||
#else
|
||||
if (transA == 'R')
|
||||
transa = 2;
|
||||
if (transA == 'C')
|
||||
transa = 3;
|
||||
#endif
|
||||
|
||||
if (transB == 'N')
|
||||
transb = 0;
|
||||
if (transB == 'T')
|
||||
transb = 1;
|
||||
#ifndef COMPLEX
|
||||
if (transB == 'R')
|
||||
transb = 0;
|
||||
if (transB == 'C')
|
||||
transb = 1;
|
||||
#else
|
||||
if (transB == 'R')
|
||||
transb = 2;
|
||||
if (transB == 'C')
|
||||
transb = 3;
|
||||
#endif
|
||||
|
||||
if (Uplo == 'U')
|
||||
uplo = 0;
|
||||
if (Uplo == 'L')
|
||||
uplo = 1;
|
||||
|
||||
info = 0;
|
||||
|
||||
if (uplo < 0)
|
||||
info = 14;
|
||||
if (ldc < m)
|
||||
info = 13;
|
||||
if (k < 0)
|
||||
info = 5;
|
||||
if (n < 0)
|
||||
info = 4;
|
||||
if (m < 0)
|
||||
info = 3;
|
||||
if (transb < 0)
|
||||
info = 2;
|
||||
if (transa < 0)
|
||||
info = 1;
|
||||
|
||||
if (info) {
|
||||
BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
#else
|
||||
|
||||
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
||||
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M,
|
||||
blasint N, blasint k,
|
||||
#ifndef COMPLEX
|
||||
FLOAT alpha,
|
||||
IFLOAT * A, blasint LDA,
|
||||
IFLOAT * B, blasint LDB, FLOAT beta, FLOAT * c, blasint ldc)
|
||||
{
|
||||
#else
|
||||
void *valpha,
|
||||
void *va, blasint LDA,
|
||||
void *vb, blasint LDB, void *vbeta, void *vc, blasint ldc)
|
||||
{
|
||||
FLOAT *alpha = (FLOAT *) valpha;
|
||||
FLOAT *beta = (FLOAT *) vbeta;
|
||||
FLOAT *A = (FLOAT *) va;
|
||||
FLOAT *B = (FLOAT *) vb;
|
||||
FLOAT *c = (FLOAT *) vc;
|
||||
#endif
|
||||
FLOAT *aa, *bb, *cc;
|
||||
|
||||
int transa, transb, uplo;
|
||||
blasint info;
|
||||
blasint m, n, lda, ldb;
|
||||
FLOAT *a, *b;
|
||||
XFLOAT *buffer;
|
||||
|
||||
PRINT_DEBUG_CNAME;
|
||||
|
||||
transa = -1;
|
||||
transb = -1;
|
||||
info = 0;
|
||||
|
||||
if (order == CblasColMajor) {
|
||||
|
||||
if (TransA == CblasNoTrans)
|
||||
transa = 0;
|
||||
if (TransA == CblasTrans)
|
||||
transa = 1;
|
||||
#ifndef COMPLEX
|
||||
if (TransA == CblasConjNoTrans)
|
||||
transa = 0;
|
||||
if (TransA == CblasConjTrans)
|
||||
transa = 1;
|
||||
#else
|
||||
if (TransA == CblasConjNoTrans)
|
||||
transa = 2;
|
||||
if (TransA == CblasConjTrans)
|
||||
transa = 3;
|
||||
#endif
|
||||
if (TransB == CblasNoTrans)
|
||||
transb = 0;
|
||||
if (TransB == CblasTrans)
|
||||
transb = 1;
|
||||
#ifndef COMPLEX
|
||||
if (TransB == CblasConjNoTrans)
|
||||
transb = 0;
|
||||
if (TransB == CblasConjTrans)
|
||||
transb = 1;
|
||||
#else
|
||||
if (TransB == CblasConjNoTrans)
|
||||
transb = 2;
|
||||
if (TransB == CblasConjTrans)
|
||||
transb = 3;
|
||||
#endif
|
||||
|
||||
m = M;
|
||||
n = N;
|
||||
|
||||
a = (void *)A;
|
||||
b = (void *)B;
|
||||
lda = LDA;
|
||||
ldb = LDB;
|
||||
|
||||
info = -1;
|
||||
|
||||
if (ldc < m)
|
||||
info = 13;
|
||||
if (k < 0)
|
||||
info = 5;
|
||||
if (n < 0)
|
||||
info = 4;
|
||||
if (m < 0)
|
||||
info = 3;
|
||||
if (transb < 0)
|
||||
info = 2;
|
||||
if (transa < 0)
|
||||
info = 1;
|
||||
}
|
||||
|
||||
if (order == CblasRowMajor) {
|
||||
m = N;
|
||||
n = M;
|
||||
|
||||
a = (void *)B;
|
||||
b = (void *)A;
|
||||
|
||||
lda = LDB;
|
||||
ldb = LDA;
|
||||
|
||||
if (TransB == CblasNoTrans)
|
||||
transa = 0;
|
||||
if (TransB == CblasTrans)
|
||||
transa = 1;
|
||||
#ifndef COMPLEX
|
||||
if (TransB == CblasConjNoTrans)
|
||||
transa = 0;
|
||||
if (TransB == CblasConjTrans)
|
||||
transa = 1;
|
||||
#else
|
||||
if (TransB == CblasConjNoTrans)
|
||||
transa = 2;
|
||||
if (TransB == CblasConjTrans)
|
||||
transa = 3;
|
||||
#endif
|
||||
if (TransA == CblasNoTrans)
|
||||
transb = 0;
|
||||
if (TransA == CblasTrans)
|
||||
transb = 1;
|
||||
#ifndef COMPLEX
|
||||
if (TransA == CblasConjNoTrans)
|
||||
transb = 0;
|
||||
if (TransA == CblasConjTrans)
|
||||
transb = 1;
|
||||
#else
|
||||
if (TransA == CblasConjNoTrans)
|
||||
transb = 2;
|
||||
if (TransA == CblasConjTrans)
|
||||
transb = 3;
|
||||
#endif
|
||||
|
||||
info = -1;
|
||||
|
||||
if (ldc < m)
|
||||
info = 13;
|
||||
if (k < 0)
|
||||
info = 5;
|
||||
if (n < 0)
|
||||
info = 4;
|
||||
if (m < 0)
|
||||
info = 3;
|
||||
if (transb < 0)
|
||||
info = 2;
|
||||
if (transa < 0)
|
||||
info = 1;
|
||||
|
||||
}
|
||||
|
||||
uplo = -1;
|
||||
if (Uplo == CblasUpper)
|
||||
uplo = 0;
|
||||
if (Uplo == CblasLower)
|
||||
uplo = 1;
|
||||
if (uplo < 0)
|
||||
info = 14;
|
||||
|
||||
if (info >= 0) {
|
||||
BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
#if defined(COMPLEX)
|
||||
FLOAT alpha_r = *(alpha + 0);
|
||||
FLOAT alpha_i = *(alpha + 1);
|
||||
|
||||
FLOAT beta_r = *(beta + 0);
|
||||
FLOAT beta_i = *(beta + 1);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
int buffer_size;
|
||||
blasint l;
|
||||
blasint i, j;
|
||||
|
||||
#ifdef SMP
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
#if defined(COMPLEX)
|
||||
|
||||
#ifdef SMP
|
||||
static int (*gemv_thread[]) (BLASLONG, BLASLONG, FLOAT *, FLOAT *,
|
||||
BLASLONG, FLOAT *, BLASLONG, FLOAT *,
|
||||
BLASLONG, FLOAT *, int) = {
|
||||
#ifdef XDOUBLE
|
||||
xgemv_thread_n, xgemv_thread_t, xgemv_thread_r, xgemv_thread_c,
|
||||
xgemv_thread_o, xgemv_thread_u, xgemv_thread_s,
|
||||
xgemv_thread_d,
|
||||
#elif defined DOUBLE
|
||||
zgemv_thread_n, zgemv_thread_t, zgemv_thread_r, zgemv_thread_c,
|
||||
zgemv_thread_o, zgemv_thread_u, zgemv_thread_s,
|
||||
zgemv_thread_d,
|
||||
#else
|
||||
cgemv_thread_n, cgemv_thread_t, cgemv_thread_r, cgemv_thread_c,
|
||||
cgemv_thread_o, cgemv_thread_u, cgemv_thread_s,
|
||||
cgemv_thread_d,
|
||||
#endif
|
||||
};
|
||||
#endif
|
||||
|
||||
int (*gemv[]) (BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *,
|
||||
BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG,
|
||||
FLOAT *) = {
|
||||
GEMV_N, GEMV_T, GEMV_R, GEMV_C, GEMV_O, GEMV_U, GEMV_S, GEMV_D,};
|
||||
|
||||
#else
|
||||
|
||||
#ifdef SMP
|
||||
static int (*gemv_thread[]) (BLASLONG, BLASLONG, FLOAT, FLOAT *,
|
||||
BLASLONG, FLOAT *, BLASLONG, FLOAT *,
|
||||
BLASLONG, FLOAT *, int) = {
|
||||
#ifdef XDOUBLE
|
||||
qgemv_thread_n, qgemv_thread_t,
|
||||
#elif defined DOUBLE
|
||||
dgemv_thread_n, dgemv_thread_t,
|
||||
#else
|
||||
sgemv_thread_n, sgemv_thread_t,
|
||||
#endif
|
||||
};
|
||||
#endif
|
||||
int (*gemv[]) (BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG,
|
||||
FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = {
|
||||
GEMV_N, GEMV_T,};
|
||||
|
||||
#endif
|
||||
|
||||
if ((m == 0) || (n == 0))
|
||||
return;
|
||||
|
||||
IDEBUG_START;
|
||||
|
||||
FUNCTION_PROFILE_START();
|
||||
|
||||
const blasint incb = (transb == 0) ? 1 : ldb;
|
||||
|
||||
if (uplo == 1) {
|
||||
for (i = 0; i < n; i++) {
|
||||
j = n - i;
|
||||
|
||||
l = j;
|
||||
#if defined(COMPLEX)
|
||||
aa = a + i * 2;
|
||||
bb = b + i * ldb * 2;
|
||||
if (transa) {
|
||||
l = k;
|
||||
aa = a + lda * i * 2;
|
||||
bb = b + i * 2;
|
||||
}
|
||||
cc = c + i * 2 * ldc + i * 2;
|
||||
#else
|
||||
aa = a + i;
|
||||
bb = b + i * ldb;
|
||||
if (transa) {
|
||||
l = k;
|
||||
aa = a + lda * i;
|
||||
bb = b + i;
|
||||
}
|
||||
cc = c + i * ldc + i;
|
||||
#endif
|
||||
|
||||
#if defined(COMPLEX)
|
||||
if (beta_r != ONE || beta_i != ZERO)
|
||||
SCAL_K(l, 0, 0, beta_r, beta_i, cc, 1, NULL, 0,
|
||||
NULL, 0);
|
||||
|
||||
if (alpha_r == ZERO && alpha_i == ZERO)
|
||||
return;
|
||||
#else
|
||||
if (beta != ONE)
|
||||
SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0);
|
||||
|
||||
if (alpha == ZERO)
|
||||
continue;
|
||||
#endif
|
||||
|
||||
IDEBUG_START;
|
||||
|
||||
FUNCTION_PROFILE_START();
|
||||
|
||||
buffer_size = j + k + 128 / sizeof(FLOAT);
|
||||
#ifdef WINDOWS_ABI
|
||||
buffer_size += 160 / sizeof(FLOAT);
|
||||
#endif
|
||||
// for alignment
|
||||
buffer_size = (buffer_size + 3) & ~3;
|
||||
STACK_ALLOC(buffer_size, FLOAT, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
|
||||
if (1L * j * k < 2304L * GEMM_MULTITHREAD_THRESHOLD)
|
||||
nthreads = 1;
|
||||
else
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
|
||||
#if defined(COMPLEX)
|
||||
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i,
|
||||
aa, lda, bb, incb, cc, 1,
|
||||
buffer);
|
||||
#else
|
||||
(gemv[(int)transa]) (j, k, 0, alpha, aa, lda,
|
||||
bb, incb, cc, 1, buffer);
|
||||
#endif
|
||||
#ifdef SMP
|
||||
} else {
|
||||
|
||||
(gemv_thread[(int)transa]) (j, k, alpha, aa,
|
||||
lda, bb, incb, cc,
|
||||
1, buffer,
|
||||
nthreads);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
STACK_FREE(buffer);
|
||||
}
|
||||
} else {
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
j = i + 1;
|
||||
|
||||
l = j;
|
||||
#if defined COMPLEX
|
||||
bb = b + i * ldb * 2;
|
||||
if (transa) {
|
||||
l = k;
|
||||
bb = b + i * 2;
|
||||
}
|
||||
cc = c + i * 2 * ldc;
|
||||
#else
|
||||
bb = b + i * ldb;
|
||||
if (transa) {
|
||||
l = k;
|
||||
bb = b + i;
|
||||
}
|
||||
cc = c + i * ldc;
|
||||
#endif
|
||||
|
||||
#if defined(COMPLEX)
|
||||
if (beta_r != ONE || beta_i != ZERO)
|
||||
SCAL_K(l, 0, 0, beta_r, beta_i, cc, 1, NULL, 0,
|
||||
NULL, 0);
|
||||
|
||||
if (alpha_r == ZERO && alpha_i == ZERO)
|
||||
return;
|
||||
#else
|
||||
if (beta != ONE)
|
||||
SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0);
|
||||
|
||||
if (alpha == ZERO)
|
||||
continue;
|
||||
#endif
|
||||
IDEBUG_START;
|
||||
|
||||
FUNCTION_PROFILE_START();
|
||||
|
||||
buffer_size = j + k + 128 / sizeof(FLOAT);
|
||||
#ifdef WINDOWS_ABI
|
||||
buffer_size += 160 / sizeof(FLOAT);
|
||||
#endif
|
||||
// for alignment
|
||||
buffer_size = (buffer_size + 3) & ~3;
|
||||
STACK_ALLOC(buffer_size, FLOAT, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
|
||||
if (1L * j * k < 2304L * GEMM_MULTITHREAD_THRESHOLD)
|
||||
nthreads = 1;
|
||||
else
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
|
||||
#if defined(COMPLEX)
|
||||
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i,
|
||||
a, lda, bb, incb, cc, 1,
|
||||
buffer);
|
||||
#else
|
||||
(gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb,
|
||||
incb, cc, 1, buffer);
|
||||
#endif
|
||||
|
||||
#ifdef SMP
|
||||
} else {
|
||||
|
||||
(gemv_thread[(int)transa]) (j, k, alpha, a, lda,
|
||||
bb, incb, cc, 1,
|
||||
buffer, nthreads);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
STACK_FREE(buffer);
|
||||
}
|
||||
}
|
||||
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE,
|
||||
args.m * args.k + args.k * args.n +
|
||||
args.m * args.n, 2 * args.m * args.n * args.k);
|
||||
|
||||
IDEBUG_END;
|
||||
|
||||
return;
|
||||
}
|
|
@ -44,6 +44,7 @@
|
|||
#endif
|
||||
|
||||
#ifndef COMPLEX
|
||||
#define SMP_THRESHOLD_MIN 65536.
|
||||
#ifdef XDOUBLE
|
||||
#define ERROR_NAME "QSYMM "
|
||||
#elif defined(DOUBLE)
|
||||
|
@ -52,6 +53,7 @@
|
|||
#define ERROR_NAME "SSYMM "
|
||||
#endif
|
||||
#else
|
||||
#define SMP_THRESHOLD_MIN 8192.
|
||||
#ifndef GEMM3M
|
||||
#ifndef HEMM
|
||||
#ifdef XDOUBLE
|
||||
|
@ -91,6 +93,10 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef GEMM_MULTITHREAD_THRESHOLD
|
||||
#define GEMM_MULTITHREAD_THRESHOLD 4
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef SMP
|
||||
#ifndef COMPLEX
|
||||
|
@ -159,7 +165,9 @@ void NAME(char *SIDE, char *UPLO,
|
|||
#if defined(SMP) && !defined(NO_AFFINITY)
|
||||
int nodes;
|
||||
#endif
|
||||
|
||||
# if defined(SMP)
|
||||
int MN;
|
||||
#endif
|
||||
blasint info;
|
||||
int side;
|
||||
int uplo;
|
||||
|
@ -255,6 +263,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
|
|||
#if defined(SMP) && !defined(NO_AFFINITY)
|
||||
int nodes;
|
||||
#endif
|
||||
#if defined(SMP)
|
||||
int MN;
|
||||
#endif
|
||||
|
||||
PRINT_DEBUG_CNAME;
|
||||
|
||||
|
@ -375,15 +386,18 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
|
|||
|
||||
#ifdef SMP
|
||||
args.common = NULL;
|
||||
MN = 2.* (double) args.m * (double)args.m * (double) args.n;
|
||||
if (MN <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) ) {
|
||||
args.nthreads = 1;
|
||||
} else {
|
||||
args.nthreads = num_cpu_avail(3);
|
||||
|
||||
}
|
||||
if (args.nthreads == 1) {
|
||||
#endif
|
||||
|
||||
(symm[(side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0);
|
||||
|
||||
#ifdef SMP
|
||||
|
||||
} else {
|
||||
|
||||
#ifndef NO_AFFINITY
|
||||
|
|
|
@ -180,6 +180,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha,
|
|||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMP
|
||||
if (n <200)
|
||||
nthreads=1;
|
||||
else
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
|
|
|
@ -368,6 +368,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
|
|||
mode |= (uplo << BLAS_UPLO_SHIFT);
|
||||
|
||||
args.common = NULL;
|
||||
if (args.n*args.k <1000)
|
||||
args.nthreads =1 ;
|
||||
else
|
||||
args.nthreads = num_cpu_avail(3);
|
||||
|
||||
if (args.nthreads == 1) {
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
#endif
|
||||
|
||||
#ifndef COMPLEX
|
||||
#define SMP_THRESHOLD_MIN 109944.
|
||||
#ifdef XDOUBLE
|
||||
#define ERROR_NAME "QSYRK "
|
||||
#elif defined(DOUBLE)
|
||||
|
@ -52,6 +53,7 @@
|
|||
#define ERROR_NAME "SSYRK "
|
||||
#endif
|
||||
#else
|
||||
#define SMP_THRESHOLD_MIN 14824.
|
||||
#ifndef HEMM
|
||||
#ifdef XDOUBLE
|
||||
#define ERROR_NAME "XSYRK "
|
||||
|
@ -71,6 +73,10 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef GEMM_MULTITHREAD_THRESHOLD
|
||||
#define GEMM_MULTITHREAD_THRESHOLD 4
|
||||
#endif
|
||||
|
||||
static int (*syrk[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
|
||||
#ifndef HEMM
|
||||
SYRK_UN, SYRK_UC, SYRK_LN, SYRK_LC,
|
||||
|
@ -101,6 +107,7 @@ void NAME(char *UPLO, char *TRANS,
|
|||
FLOAT *sa, *sb;
|
||||
|
||||
#ifdef SMP
|
||||
int NNK;
|
||||
#ifdef USE_SIMPLE_THREADED_LEVEL3
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
|
@ -225,6 +232,8 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
|
|||
FLOAT *sa, *sb;
|
||||
|
||||
#ifdef SMP
|
||||
int NNK;
|
||||
|
||||
#ifdef USE_SIMPLE_THREADED_LEVEL3
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
|
@ -354,18 +363,13 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
|
|||
#endif
|
||||
|
||||
args.common = NULL;
|
||||
#ifndef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
if (args.n < 100)
|
||||
#else
|
||||
if (args.n < 200)
|
||||
#endif
|
||||
#else
|
||||
if (args.n < 65)
|
||||
#endif
|
||||
|
||||
NNK = (double)(args.n+1)*(double)args.n*(double)args.k;
|
||||
if (NNK <= (SMP_THRESHOLD_MIN * GEMM_MULTITHREAD_THRESHOLD)) {
|
||||
args.nthreads = 1;
|
||||
else
|
||||
} else {
|
||||
args.nthreads = num_cpu_avail(3);
|
||||
}
|
||||
|
||||
if (args.nthreads == 1) {
|
||||
#endif
|
||||
|
@ -373,7 +377,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
|
|||
(syrk[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0);
|
||||
|
||||
#ifdef SMP
|
||||
|
||||
} else {
|
||||
|
||||
#ifndef USE_SIMPLE_THREADED_LEVEL3
|
||||
|
|
|
@ -181,7 +181,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLO
|
|||
alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1],
|
||||
x, 1, a, 1, NULL, 0);
|
||||
}
|
||||
a += lda;
|
||||
a += lda * 2;
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < n; i++){
|
||||
|
@ -191,7 +191,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLO
|
|||
alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1],
|
||||
x + i * 2, 1, a, 1, NULL, 0);
|
||||
}
|
||||
a += 2 + lda;
|
||||
a += 2 + lda * 2;
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
|
|
@ -238,6 +238,72 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
|||
GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE")
|
||||
endif ()
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE")
|
||||
GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE")
|
||||
if (SMALL_MATRIX_OPT)
|
||||
if (NOT DEFINED DGEMM_SMALL_M_PERMIT)
|
||||
set(DGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_NN)
|
||||
set(DGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_NT)
|
||||
set(DGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_TN)
|
||||
set(DGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_TT)
|
||||
set(DGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_B0_NN)
|
||||
set(DGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_B0_NT)
|
||||
set(DGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_B0_TN)
|
||||
set(DGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_B0_TT)
|
||||
set(DGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
|
||||
endif ()
|
||||
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NN" "gemm_small_kernel_nn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NR" "gemm_small_kernel_nr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RN" "gemm_small_kernel_rn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RR" "gemm_small_kernel_rr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NT" "gemm_small_kernel_nt" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NC" "gemm_small_kernel_nc" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RT" "gemm_small_kernel_rt" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RC" "gemm_small_kernel_rc" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TN" "gemm_small_kernel_tn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TR" "gemm_small_kernel_tr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CN" "gemm_small_kernel_cn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CR" "gemm_small_kernel_cr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TT" "gemm_small_kernel_tt" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TC" "gemm_small_kernel_tc" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CT" "gemm_small_kernel_ct" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CC" "gemm_small_kernel_cc" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "DOUBLE")
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE)
|
||||
GenerateNamedObjects("${KERNELDIR}/${SGEMMKERNEL}" "" "gemm_kernel" false "" "" false "SINGLE")
|
||||
|
@ -825,7 +891,7 @@ endif ()
|
|||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type})
|
||||
endforeach ()
|
||||
|
||||
if (BUILD_DOUBLE AND NOT BUILD_SINGLE)
|
||||
if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE)
|
||||
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "SINGLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "SINGLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "SINGLE")
|
||||
|
@ -849,6 +915,45 @@ endif ()
|
|||
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false "SINGLE")
|
||||
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false "SINGLE")
|
||||
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false "SINGLE")
|
||||
if (SMALL_MATRIX_OPT)
|
||||
if (NOT DEFINED SGEMM_SMALL_M_PERMIT)
|
||||
set(SGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c)
|
||||
endif ()
|
||||
if (NOT DEFINED SGEMM_SMALL_K_NN)
|
||||
set(SGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c)
|
||||
endif ()
|
||||
if (NOT DEFINED SGEMM_SMALL_K_NT)
|
||||
set(SGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c)
|
||||
endif ()
|
||||
if (NOT DEFINED SGEMM_SMALL_K_TN)
|
||||
set(SGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c)
|
||||
endif ()
|
||||
if (NOT DEFINED SGEMM_SMALL_K_TT)
|
||||
set(SGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c)
|
||||
endif ()
|
||||
if (NOT DEFINED SGEMM_SMALL_K_B0_NN)
|
||||
set(SGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c)
|
||||
endif ()
|
||||
if (NOT DEFINED SGEMM_SMALL_K_B0_NT)
|
||||
set(SGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c)
|
||||
endif ()
|
||||
if (NOT DEFINED SGEMM_SMALL_K_B0_TN)
|
||||
set(SGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
|
||||
endif ()
|
||||
if (NOT DEFINED SGEMM_SMALL_K_B0_TT)
|
||||
set(SGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
|
||||
endif ()
|
||||
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "SINGLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "SINGLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_NT}" "" "gemm_small_kernel_nt" false "" "" false "SINGLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_TN}" "" "gemm_small_kernel_tn" false "" "" false "SINGLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_TT}" "" "gemm_small_kernel_tt" false "" "" false "SINGLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_NN}" "B0" "gemm_small_kernel_b0_nn" false "" "" false "SINGLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_NT}" "B0" "gemm_small_kernel_b0_nt" false "" "" false "SINGLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false "SINGLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_TT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false "SINGLE")
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
||||
# Makefile.LA
|
||||
|
@ -878,25 +983,25 @@ endif ()
|
|||
endforeach()
|
||||
if (BUILD_COMPLEX AND NOT BUILD_SINGLE)
|
||||
if (NOT DEFINED SNEG_TCOPY)
|
||||
set(SNEG_TCOPY ../generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c)
|
||||
set(SNEG_TCOPY ../generic/neg_tcopy_${SGEMM_UNROLL_M}.c)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED SLASWP_NCOPY)
|
||||
set(SLASWP_NCOPY ../generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c)
|
||||
set(SLASWP_NCOPY ../generic/laswp_ncopy_${SGEMM_UNROLL_N}.c)
|
||||
endif ()
|
||||
GenerateNamedObjects("${KERNELDIR}/${SNEG_TCOPY}" "" "neg_tcopy" false "" "" false "SINGLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${SLASWP_NCOPY}" "" "laswp_ncopy" false "" "" false "SINGLE")
|
||||
endif()
|
||||
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
|
||||
if (NOT DEFINED DNEG_TCOPY)
|
||||
set(DNEG_TCOPY ../generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c)
|
||||
set(DNEG_TCOPY ../generic/neg_tcopy_${DGEMM_UNROLL_M}.c)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED DLASWP_NCOPY)
|
||||
set(DLASWP_NCOPY ../generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c)
|
||||
set(DLASWP_NCOPY ../generic/laswp_ncopy_${DGEMM_UNROLL_N}.c)
|
||||
endif ()
|
||||
GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}" "" "neg_tcopy" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}" "" "laswp_ncopy" false "" "" false "DOUBLE")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -979,10 +1084,117 @@ endif ()
|
|||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
|
||||
GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k" false "" "" false "DOUBLE")
|
||||
if (DEFINED DMAXKERNEL)
|
||||
GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k" false "" "" false "DOUBLE")
|
||||
endif ()
|
||||
if (DEFINED DMINKERNEL)
|
||||
GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "USE_MIN" "min_k" false "" "" false "DOUBLE")
|
||||
endif ()
|
||||
if (DEFINED IDMINKERNEL)
|
||||
GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "DOUBLE")
|
||||
endif ()
|
||||
if (DEFINED IDMAXKERNEL)
|
||||
GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k" false "" "" false "DOUBLE")
|
||||
endif ()
|
||||
GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE")
|
||||
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMVNKERNEL}" "" "gemv_n" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false "DOUBLE")
|
||||
if (DGEMMINCOPY)
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "${DGEMMINCOPYOBJ}" false "" "" true "DOUBLE")
|
||||
endif ()
|
||||
if (DGEMMITCOPY)
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "${DGEMMITCOPYOBJ}" false "" "" true "DOUBLE")
|
||||
endif ()
|
||||
if (DGEMMONCOPY)
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "${DGEMMONCOPYOBJ}" false "" "" true "DOUBLE")
|
||||
endif ()
|
||||
if (DGEMMOTCOPY)
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE")
|
||||
endif ()
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE")
|
||||
|
||||
GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE")
|
||||
GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE")
|
||||
if (SMALL_MATRIX_OPT)
|
||||
if (NOT DEFINED DGEMM_SMALL_M_PERMIT)
|
||||
set(DGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c)
|
||||
endif ()
|
||||
if (BUILD_COMPLEX16 AND NOT BUILD_COMPLEX)
|
||||
if (NOT DEFINED DGEMM_SMALL_K_NN)
|
||||
set(DGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_NT)
|
||||
set(DGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_TN)
|
||||
set(DGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_TT)
|
||||
set(DGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_B0_NN)
|
||||
set(DGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_B0_NT)
|
||||
set(DGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_B0_TN)
|
||||
set(DGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
|
||||
endif ()
|
||||
if (NOT DEFINED DGEMM_SMALL_K_B0_TT)
|
||||
set(DGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
|
||||
endif ()
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NN" "gemm_small_kernel_nn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NR" "gemm_small_kernel_nr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RN" "gemm_small_kernel_rn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RR" "gemm_small_kernel_rr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NT" "gemm_small_kernel_nt" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NC" "gemm_small_kernel_nc" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RT" "gemm_small_kernel_rt" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RC" "gemm_small_kernel_rc" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TN" "gemm_small_kernel_tn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TR" "gemm_small_kernel_tr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CN" "gemm_small_kernel_cn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CR" "gemm_small_kernel_cr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TT" "gemm_small_kernel_tt" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TC" "gemm_small_kernel_tc" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CT" "gemm_small_kernel_ct" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CC" "gemm_small_kernel_cc" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "DOUBLE")
|
||||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "DOUBLE")
|
||||
endif ()
|
||||
endif ()
|
||||
if (BUILD_COMPLEX16 AND NOT BUILD_SINGLE)
|
||||
GenerateNamedObjects("${KERNELDIR}/${SSCALKERNEL}" "" "scal_k" false "" "" false "SINGLE")
|
||||
endif()
|
||||
if (BUILD_COMPLEX160 AND NOT BUILD_COMPLEX)
|
||||
GenerateNamedObjects("${KERNELDIR}/${CAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "COMPLEX")
|
||||
if (DEFINED CMAXKERNEL)
|
||||
|
@ -1047,6 +1259,68 @@ endif ()
|
|||
GenerateNamedObjects("${KERNELDIR}/${CGEMMOTCOPY}" "COMPLEX" "${CGEMMOTCOPYOBJ}" false "" "" true "COMPLEX")
|
||||
endif ()
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_BETA}" "" "gemm_beta" false "" "" false "COMPLEX")
|
||||
if (SMALL_MATRIX_OPT)
|
||||
if (NOT DEFINED CGEMM_SMALL_M_PERMIT)
|
||||
set(CGEMM_SMALL_M_PERMIT ../generic/zgemm_small_matrix_permit)
|
||||
endif ()
|
||||
if (NOT DEFINED CGEMM_SMALL_K_NN)
|
||||
set(CGEMM_SMALL_K_NN ../generic/zgemm_small_matrix_kernel_nn)
|
||||
endif ()
|
||||
if (NOT DEFINED CGEMM_SMALL_K_NT)
|
||||
set(CGEMM_SMALL_K_NT ../generic/zgemm_small_matrix_kernel_nt)
|
||||
endif ()
|
||||
if (NOT DEFINED CGEMM_SMALL_K_TN)
|
||||
set(CGEMM_SMALL_K_TN ../generic/zgemm_small_matrix_kernel_tn)
|
||||
endif ()
|
||||
if (NOT DEFINED CGEMM_SMALL_K_TT)
|
||||
set(CGEMM_SMALL_K_TT ../generic/zgemm_small_matrix_kernel_tt)
|
||||
endif ()
|
||||
if (NOT DEFINED CGEMM_SMALL_K_B0_NN)
|
||||
set(CGEMM_SMALL_K_B0_NN ../generic/zgemm_small_matrix_kernel_nn)
|
||||
endif ()
|
||||
if (NOT DEFINED CGEMM_SMALL_K_B0_NT)
|
||||
set(CGEMM_SMALL_K_B0_NT ../generic/zgemm_small_matrix_kernel_nt)
|
||||
endif ()
|
||||
if (NOT DEFINED CGEMM_SMALL_K_B0_TN)
|
||||
set(CGEMM_SMALL_K_B0_TN ../generic/zgemm_small_matrix_kernel_tn)
|
||||
endif ()
|
||||
if (NOT DEFINED CGEMM_SMALL_K_B0_TT)
|
||||
set(CGEMM_SMALL_K_B0_TT ../generic/zgemm_small_matrix_kernel_tt)
|
||||
endif ()
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_M_PERMIT}.c" "" "gemm_small_matrix_permit" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "NN" "gemm_small_kernel_nn" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "NR" "gemm_small_kernel_nr" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "RN" "gemm_small_kernel_rn" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "RR" "gemm_small_kernel_rr" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "NT" "gemm_small_kernel_nt" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "NC" "gemm_small_kernel_nc" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "RT" "gemm_small_kernel_rt" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "RC" "gemm_small_kernel_rc" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "TN" "gemm_small_kernel_tn" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "TR" "gemm_small_kernel_tr" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "CN" "gemm_small_kernel_cn" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "CR" "gemm_small_kernel_cr" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "TT" "gemm_small_kernel_tt" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "TC" "gemm_small_kernel_tc" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "CT" "gemm_small_kernel_ct" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "CC" "gemm_small_kernel_cc" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "COMPLEX")
|
||||
endif ()
|
||||
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "COMPLEX")
|
||||
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "COMPLEX")
|
||||
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "COMPLEX")
|
||||
|
|
|
@ -23,7 +23,7 @@ ifeq ($(C_COMPILER), CLANG)
|
|||
# Any clang posing as gcc 4.2 should be new enough (3.4 or later)
|
||||
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2)
|
||||
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||
AVX2OPT = -mavx2
|
||||
AVX2OPT = -mavx2 -mfma
|
||||
endif
|
||||
endif
|
||||
ifdef NO_AVX2
|
||||
|
@ -73,6 +73,8 @@ else ifeq ($(TARGET_CORE), SKYLAKEX)
|
|||
endif
|
||||
else ifeq ($(TARGET_CORE), HASWELL)
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
|
||||
else ifeq ($(TARGET_CORE), ZEN)
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
|
||||
else ifeq ($(TARGET_CORE), LOONGSON3R4)
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS)
|
||||
else
|
||||
|
|
|
@ -207,9 +207,12 @@ ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" ""
|
|||
SBLASOBJS += \
|
||||
sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE))" ""
|
||||
SBLASOBJS += \
|
||||
ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
ifeq ($(BUILD_SINGLE),1)
|
||||
SBLASOBJS += \
|
||||
ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX) \
|
||||
sger_k$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
ifeq ($(BUILD_DOUBLE),1)
|
||||
|
@ -359,8 +362,7 @@ $(KDIR)xgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNE
|
|||
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@
|
||||
|
||||
|
||||
ifeq ($(BUILD_SINGLE),1)
|
||||
|
||||
ifneq "$(or (BUILD_SINGLE),$(BUILD_DOUBLE))" ""
|
||||
$(KDIR)ssymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_U_KERNEL) $(SSYMV_U_PARAM)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $@
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 88
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 40
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
#define ASSEMBLER
|
||||
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCH_SIZE 80
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
#define ASSEMBLER
|
||||
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
.globl NAME
|
||||
.ent NAME
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
#define ASSEMBLER
|
||||
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCH_SIZE 80
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 88
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||
#error "Architecture is not specified."
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define STACKSIZE 64
|
||||
#define PREFETCHSIZE 32
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define STACKSIZE 64
|
||||
#define PREFETCHSIZE 32
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "version.h"
|
||||
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 88
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
#define ASSEMBLER
|
||||
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCH_SIZE 80
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 88
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
PROLOGUE
|
||||
PROFCODE
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||
#error "Architecture is not specified."
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||
#error "Architecture is not specified."
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue