Merge branch 'develop' into release-0.3.0

This commit is contained in:
Martin Kroeker 2023-03-26 23:34:17 +02:00 committed by GitHub
commit e46971b9d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2603 changed files with 155622 additions and 112440 deletions

View File

@ -2,6 +2,9 @@ name: continuous build
on: [push, pull_request] on: [push, pull_request]
permissions:
contents: read # to fetch code (actions/checkout)
jobs: jobs:
build: build:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
@ -34,7 +37,7 @@ jobs:
- name: Install Dependencies - name: Install Dependencies
run: | run: |
if [ "$RUNNER_OS" == "Linux" ]; then if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt-get install -y gfortran cmake ccache sudo apt-get install -y gfortran cmake ccache libtinfo5
elif [ "$RUNNER_OS" == "macOS" ]; then elif [ "$RUNNER_OS" == "macOS" ]; then
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed. # It looks like "gfortran" isn't working correctly unless "gcc" is re-installed.
brew reinstall gcc brew reinstall gcc
@ -150,6 +153,7 @@ jobs:
matrix: matrix:
msystem: [MINGW64, MINGW32, CLANG64] msystem: [MINGW64, MINGW32, CLANG64]
idx: [int32, int64] idx: [int32, int64]
build-type: [Release]
include: include:
- msystem: MINGW64 - msystem: MINGW64
idx: int32 idx: int32
@ -173,6 +177,11 @@ jobs:
idx64-flags: -DBINARY=64 -DINTERFACE64=1 idx64-flags: -DBINARY=64 -DINTERFACE64=1
target-prefix: mingw-w64-clang-x86_64 target-prefix: mingw-w64-clang-x86_64
c-lapack-flags: -DC_LAPACK=ON c-lapack-flags: -DC_LAPACK=ON
- msystem: MINGW64
idx: int32
target-prefix: mingw-w64-x86_64
fc-pkg: mingw-w64-x86_64-gcc-fortran
build-type: None
exclude: exclude:
- msystem: MINGW32 - msystem: MINGW32
idx: int64 idx: int64
@ -215,11 +224,11 @@ jobs:
path: C:/msys64/home/runneradmin/.ccache path: C:/msys64/home/runneradmin/.ccache
# We include the commit sha in the cache key, as new cache entries are # We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet. # only created if there is no existing entry for the key yet.
key: ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ github.ref }}-${{ github.sha }} key: ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}-${{ github.sha }}
# Restore a matching ccache cache entry. Prefer same branch. # Restore a matching ccache cache entry. Prefer same branch.
restore-keys: | restore-keys: |
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ github.ref }} ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }} ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}
- name: Configure ccache - name: Configure ccache
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota. # Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota.
@ -235,7 +244,8 @@ jobs:
- name: Configure OpenBLAS - name: Configure OpenBLAS
run: | run: |
mkdir build && cd build mkdir build && cd build
cmake -DBUILD_SHARED_LIBS=ON \ cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
-DBUILD_SHARED_LIBS=ON \
-DBUILD_STATIC_LIBS=ON \ -DBUILD_STATIC_LIBS=ON \
-DDYNAMIC_ARCH=ON \ -DDYNAMIC_ARCH=ON \
-DUSE_THREAD=ON \ -DUSE_THREAD=ON \
@ -257,3 +267,54 @@ jobs:
- name: Run tests - name: Run tests
timeout-minutes: 60 timeout-minutes: 60
run: cd build && ctest run: cd build && ctest
cross_build:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
include:
- target: mips64el
triple: mips64el-linux-gnuabi64
opts: DYNAMIC_ARCH=1 TARGET=GENERIC
- target: riscv64
triple: riscv64-linux-gnu
opts: TARGET=RISCV64_GENERIC
- target: mipsel
triple: mipsel-linux-gnu
opts: TARGET=MIPS1004K
- target: alpha
triple: alpha-linux-gnu
opts: TARGET=EV4
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Install Dependencies
run: |
sudo apt-get install -y ccache gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-${{ matrix.target }}-cross
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}
- name: Configure ccache
run: |
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Build OpenBLAS
run: |
make -j$(nproc) HOSTCC="ccache gcc" CC="ccache ${{ matrix.triple }}-gcc" FC="ccache ${{ matrix.triple }}-gfortran" ARCH=${{ matrix.target }} ${{ matrix.opts }}

117
.github/workflows/mips64.yml vendored Normal file
View File

@ -0,0 +1,117 @@
name: mips64 qemu test
on: [push, pull_request]
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
TEST:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- target: MIPS64_GENERIC
triple: mips64el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=MIPS64_GENERIC
- target: SICORTEX
triple: mips64el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=SICORTEX
- target: I6400
triple: mipsisa64r6el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=I6400
- target: P6600
triple: mipsisa64r6el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=P6600
- target: I6500
triple: mipsisa64r6el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=I6500
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: install build deps
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-mips64el-cross
- name: checkout qemu
uses: actions/checkout@v3
with:
repository: qemu/qemu
path: qemu
ref: 79dfa177ae348bb5ab5f97c0915359b13d6186e2
- name: build qemu
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=mips64el-linux-user --disable-system
make -j$(nproc)
make install
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}
- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: build OpenBLAS
run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
qemu-mips64el ./utest/openblas_utest
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat

View File

@ -17,6 +17,10 @@ on:
# it only makes sense to test if this file has been changed # it only makes sense to test if this file has been changed
name: Nightly-Homebrew-Build name: Nightly-Homebrew-Build
permissions:
contents: read # to fetch code (actions/checkout)
jobs: jobs:
build-OpenBLAS-with-Homebrew: build-OpenBLAS-with-Homebrew:
runs-on: macos-latest runs-on: macos-latest
@ -28,6 +32,8 @@ jobs:
HOMEBREW_NO_AUTO_UPDATE: "ON" HOMEBREW_NO_AUTO_UPDATE: "ON"
HOMEBREW_NO_BOTTLE_SOURCE_FALLBACK: "ON" HOMEBREW_NO_BOTTLE_SOURCE_FALLBACK: "ON"
HOMEBREW_NO_INSTALL_CLEANUP: "ON" HOMEBREW_NO_INSTALL_CLEANUP: "ON"
HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: "ON"
HOMEBREW_NO_INSTALL_FROM_API: "ON"
steps: steps:
- name: Random delay for cron job - name: Random delay for cron job

View File

@ -30,7 +30,7 @@ matrix:
before_script: &common-before before_script: &common-before
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32" - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32"
script: script:
- travis_wait 20 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE - travis_wait 50 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
- make -C test $COMMON_FLAGS $BTYPE - make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE - make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE - make -C utest $COMMON_FLAGS $BTYPE
@ -104,7 +104,7 @@ matrix:
- sudo apt-get update - sudo apt-get update
- sudo apt-get install gcc-9 gfortran-9 -y - sudo apt-get install gcc-9 gfortran-9 -y
script: script:
- travis_wait 20 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9 - travis_wait 50 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
- make -C test $COMMON_FLAGS $BTYPE - make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE - make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE - make -C utest $COMMON_FLAGS $BTYPE
@ -121,7 +121,7 @@ matrix:
- sudo apt-get update - sudo apt-get update
- sudo apt-get install gcc-9 gfortran-9 -y - sudo apt-get install gcc-9 gfortran-9 -y
script: script:
- travis_wait 20 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9 - travis_wait 50 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
- make -C test $COMMON_FLAGS $BTYPE - make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE - make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE - make -C utest $COMMON_FLAGS $BTYPE
@ -285,6 +285,25 @@ matrix:
- gfortran - gfortran
script: script:
- travis_wait 45 make && make lapack-test - travis_wait 45 make && make lapack-test
env:
- TARGET_BOX=NEOVERSE_N1
- &test-neon1-gcc8
os: linux
arch: arm64
dist: focal
group: edge
virt: lxd
compiler: gcc
addons:
apt:
packages:
- gcc-8
- gfortran-8
script:
- travis_wait 45 make QUIET_MAKE=1 CC=gcc-8 FC=gfortran-8 DYNAMIC_ARCH=1
env:
- TARGET_BOX=NEOVERSE_N1-GCC8
# whitelist # whitelist
branches: branches:

View File

@ -8,7 +8,7 @@ project(OpenBLAS C ASM)
set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3) set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 21) set(OpenBLAS_PATCH_VERSION 22)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
@ -36,6 +36,8 @@ option(USE_LOCKING "Use locks even in single-threaded builds to make them callab
option(USE_PERL "Use the older PERL scripts for build preparation instead of universal shell scripts" OFF) option(USE_PERL "Use the older PERL scripts for build preparation instead of universal shell scripts" OFF)
option(NO_WARMUP "Do not run a benchmark on each startup just to find the best location for the memory buffer" ON)
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON) option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON)
else() else()
@ -212,10 +214,10 @@ if(NOT NO_LAPACKE)
add_library(LAPACKE OBJECT ${LAPACKE_SOURCES}) add_library(LAPACKE OBJECT ${LAPACKE_SOURCES})
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACKE>") list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACKE>")
endif() endif()
if(BUILD_RELAPACK) #if(BUILD_RELAPACK)
add_library(RELAPACK OBJECT ${RELA_SOURCES}) # add_library(RELAPACK OBJECT ${RELA_SOURCES})
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:RELAPACK>") # list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:RELAPACK>")
endif() #endif()
set(OpenBLAS_LIBS "") set(OpenBLAS_LIBS "")
if(BUILD_STATIC_LIBS) if(BUILD_STATIC_LIBS)
add_library(${OpenBLAS_LIBNAME}_static STATIC ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) add_library(${OpenBLAS_LIBNAME}_static STATIC ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
@ -236,7 +238,7 @@ endif()
set_target_properties(${OpenBLAS_LIBS} PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME}) set_target_properties(${OpenBLAS_LIBS} PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
# Android needs to explicitly link against libm # Android needs to explicitly link against libm
if(ANDROID) if (${CMAKE_SYSTEM_NAME} MATCHES "AIX|Android|Linux|FreeBSD|OpenBSD|NetBSD|DragonFly|Darwin")
if(BUILD_STATIC_LIBS) if(BUILD_STATIC_LIBS)
target_link_libraries(${OpenBLAS_LIBNAME}_static m) target_link_libraries(${OpenBLAS_LIBNAME}_static m)
endif() endif()
@ -396,7 +398,7 @@ if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "")
message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}") message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
endif() endif()
if (NOT DEFINED USE_PERL) if (NOT USE_PERL)
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
COMMAND ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" "${BUILD_BFLOAT16}" "${BUILD_SINGLE}" "${BUILD_DOUBLE}" "${BUILD_COMPLEX}" "${BUILD_COMPLEX16}" > ${PROJECT_BINARY_DIR}/objcopy.def COMMAND ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" "${BUILD_BFLOAT16}" "${BUILD_SINGLE}" "${BUILD_DOUBLE}" "${BUILD_COMPLEX}" "${BUILD_COMPLEX16}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so

View File

@ -211,4 +211,5 @@ In chronological order:
* PLCT Lab, Institute of Software Chinese Academy of Sciences * PLCT Lab, Institute of Software Chinese Academy of Sciences
* [2022-03] Support RISC-V Vector Intrinisc 1.0 version. * [2022-03] Support RISC-V Vector Intrinisc 1.0 version.
* Pablo Romero <https://github.com/pablorcum>
* [2022-08] Fix building from sources for QNX

View File

@ -80,7 +80,7 @@
SUN SUN
Fujitsu Fujitsu
4. Suported precision 4. Supported precision
Now x86/x86_64 version support 80bit FP precision in addition to Now x86/x86_64 version support 80bit FP precision in addition to
normal double presicion and single precision. Currently only normal double presicion and single precision. Currently only

View File

@ -110,6 +110,10 @@ ifeq ($(OSNAME), Darwin)
@echo "\"make PREFIX=/your_installation_path/ install\"." @echo "\"make PREFIX=/your_installation_path/ install\"."
@echo @echo
@echo "(or set PREFIX in Makefile.rule and run make install." @echo "(or set PREFIX in Makefile.rule and run make install."
@echo
@echo "Note that any flags passed to make during build should also be passed to make install"
@echo "to circumvent any install errors."
@echo
@echo "If you want to move the .dylib to a new location later, make sure you change" @echo "If you want to move the .dylib to a new location later, make sure you change"
@echo "the internal name of the dylib with:" @echo "the internal name of the dylib with:"
@echo @echo
@ -118,8 +122,11 @@ endif
@echo @echo
@echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"." @echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"."
@echo @echo
@echo "Note that any flags passed to make during build should also be passed to make install"
@echo "to circumvent any install errors."
@echo
shared : shared : libs netlib $(RELA)
ifneq ($(NO_SHARED), 1) ifneq ($(NO_SHARED), 1)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly)) ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
@$(MAKE) -C exports so @$(MAKE) -C exports so
@ -143,7 +150,7 @@ ifeq ($(OSNAME), CYGWIN_NT)
endif endif
endif endif
tests : tests : libs netlib $(RELA) shared
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
touch $(LIBNAME) touch $(LIBNAME)
ifndef NO_FBLAS ifndef NO_FBLAS
@ -271,7 +278,11 @@ prof_lapack : lapack_prebuild
lapack_prebuild : lapack_prebuild :
ifeq ($(NO_LAPACK), $(filter 0,$(NO_LAPACK))) ifeq ($(NO_LAPACK), $(filter 0,$(NO_LAPACK)))
-@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc -@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
ifeq ($(F_COMPILER), GFORTRAN)
-@echo "override FFLAGS = $(LAPACK_FFLAGS) -fno-tree-vectorize" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "override FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "override FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
-@echo "FFLAGS_DRV = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "FFLAGS_DRV = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc

5089
Makefile.L3 Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,42 +1,24 @@
CPP = $(CC) -E
RANLIB = ranlib
ifeq ($(LIBSUBARCH), EV4)
LIBNAME = $(LIBPREFIX)_ev4.a
LIBNAME_P = $(LIBPREFIX)_ev4_p.a
endif
ifeq ($(LIBSUBARCH), EV5)
LIBNAME = $(LIBPREFIX)_ev5.a
LIBNAME_P = $(LIBPREFIX)_ev5_p.a
endif
ifeq ($(LIBSUBARCH), EV6)
LIBNAME = $(LIBPREFIX)_ev6.a
LIBNAME_P = $(LIBPREFIX)_ev6_p.a
endif
ifneq ($(COMPILER), NATIVE) ifneq ($(COMPILER), NATIVE)
# GCC User # GCC User
ifeq ($(LIBSUBARCH), EV4) ifeq ($(CORE), EV4)
OPTION += -DEV4 -mcpu=ev4 CCOMMON_OPT += -mcpu=ev4
endif endif
ifeq ($(LIBSUBARCH), EV5) ifeq ($(CORE), EV5)
OPTION += -DEV5 -mcpu=ev5 CCOMMON_OPT += -mcpu=ev5
endif endif
ifeq ($(LIBSUBARCH), EV6) ifeq ($(CORE), EV6)
OPTION += -DEV6 -mcpu=ev6 CCOMMON_OPT += -mcpu=ev6
endif endif
else else
# Compaq Compiler User # Compaq Compiler User
ifeq ($(LIBSUBARCH), EV4) ifeq ($(CORE), EV4)
OPTION += -DEV4 -tune ev4 -arch ev4 CCOMMON_OPT += -tune ev4 -arch ev4
endif endif
ifeq ($(LIBSUBARCH), EV5) ifeq ($(CORE), EV5)
OPTION += -DEV5 -tune ev5 -arch ev5 CCOMMON_OPT += -tune ev5 -arch ev5
endif endif
ifeq ($(LIBSUBARCH), EV6) ifeq ($(CORE), EV6)
OPTION += -DEV6 -tune ev6 -arch ev6 CCOMMON_OPT += -tune ev6 -arch ev6
endif endif
endif endif

View File

@ -89,17 +89,17 @@ endif
endif endif
# Use a72 tunings because Neoverse-V1 is only available # Use a72 tunings because Neoverse-V1 is only available
# in GCC>=9.4 # in GCC>=10.4
ifeq ($(CORE), NEOVERSEV1) ifeq ($(CORE), NEOVERSEV1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG))) ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq ($(GCCVERSIONGTEQ9), 1) ifeq ($(GCCVERSIONGTEQ10), 1)
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ10))) ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11)))
CCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1 CCOMMON_OPT += -march=armv8.4-a+sve -mtune=neoverse-v1
ifneq ($(F_COMPILER), NAG) ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1 FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1
endif endif
else else
CCOMMON_OPT += -march=armv8.4-a -mtune=native CCOMMON_OPT += -march=armv8.4-a+sve -mtune=native
ifneq ($(F_COMPILER), NAG) ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a -mtune=native FCOMMON_OPT += -march=armv8.4-a -mtune=native
endif endif
@ -119,17 +119,21 @@ endif
endif endif
# Use a72 tunings because Neoverse-N2 is only available # Use a72 tunings because Neoverse-N2 is only available
# in GCC>=9.4 # in GCC>=10.4
ifeq ($(CORE), NEOVERSEN2) ifeq ($(CORE), NEOVERSEN2)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG))) ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq ($(GCCVERSIONGTEQ9), 1) ifeq ($(GCCVERSIONGTEQ10), 1)
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ10))) ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11)))
ifneq ($(OSNAME), Darwin)
CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2 CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
else
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
endif
ifneq ($(F_COMPILER), NAG) ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2 FCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
endif endif
else else
CCOMMON_OPT += -march=armv8.5-a -mtune=native CCOMMON_OPT += -march=armv8.5-a+sve -mtune=native
ifneq ($(F_COMPILER), NAG) ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.5-a -mtune=native FCOMMON_OPT += -march=armv8.5-a -mtune=native
endif endif

View File

@ -14,6 +14,11 @@ OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
PKG_EXTRALIB := $(EXTRALIB) PKG_EXTRALIB := $(EXTRALIB)
ifeq ($(INTERFACE64),1)
SUFFIX64=64
endif
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc"
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
ifeq ($(C_COMPILER), PGI) ifeq ($(C_COMPILER), PGI)
PKG_EXTRALIB += -lomp PKG_EXTRALIB += -lomp
@ -150,13 +155,19 @@ endif
endif endif
#Generating openblas.pc #Generating openblas.pc
@echo Generating $(LIBSONAMEBASE).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)" ifeq ($(INTERFACE64),1)
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" SUFFIX64=64
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" endif
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc"
@echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
@echo 'extralib='$(PKG_EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" @echo Generating $(LIBSONAMEBASE)$(SUFFIX64).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
@cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" @echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(PKGFILE)"
@echo 'libsuffix='$(SYMBOLSUFFIX) >> "$(PKGFILE)"
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(PKGFILE)"
@echo 'openblas_config= USE_64BITINT='$(INTERFACE64) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(PKGFILE)"
@echo 'version='$(VERSION) >> "$(PKGFILE)"
@echo 'extralib='$(PKG_EXTRALIB) >> "$(PKGFILE)"
@cat openblas.pc.in >> "$(PKGFILE)"
#Generating OpenBLASConfig.cmake #Generating OpenBLASConfig.cmake

View File

@ -1,3 +1,4 @@
MSA_FLAGS = -mmsa -mfp64 -mload-store-pairs
ifdef BINARY64 ifdef BINARY64
else else
endif endif

View File

@ -1,3 +1,4 @@
MSA_FLAGS = -mmsa -mfp64 -mload-store-pairs
ifdef BINARY64 ifdef BINARY64
else else
endif endif

View File

@ -60,9 +60,9 @@ all: getarch_2nd
./getarch_2nd 1 >> $(TARGET_CONF) ./getarch_2nd 1 >> $(TARGET_CONF)
$(TARGET_CONF): c_check$(SCRIPTSUFFIX) f_check$(SCRIPTSUFFIX) getarch $(TARGET_CONF): c_check$(SCRIPTSUFFIX) f_check$(SCRIPTSUFFIX) getarch
./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) $(CFLAGS) ./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(CC)" $(TARGET_FLAGS) $(CFLAGS)
ifneq ($(ONLY_CBLAS), 1) ifneq ($(ONLY_CBLAS), 1)
./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS) ./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(FC)" $(TARGET_FLAGS)
else else
#When we only build CBLAS, we set NOFORTRAN=2 #When we only build CBLAS, we set NOFORTRAN=2
echo "NOFORTRAN=2" >> $(TARGET_MAKE) echo "NOFORTRAN=2" >> $(TARGET_MAKE)
@ -77,8 +77,8 @@ endif
getarch : getarch.c cpuid.S dummy $(CPUIDEMU) getarch : getarch.c cpuid.S dummy $(CPUIDEMU)
avx512=$$(./c_check$(SCRIPTSUFFIX) - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \ avx512=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \
rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_RV64GV); \ rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | grep NO_RV64GV); \
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} $${rv64gv:+-D$${rv64gv}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU) $(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} $${rv64gv:+-D$${rv64gv}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
getarch_2nd : getarch_2nd.c $(TARGET_CONF) dummy getarch_2nd : getarch_2nd.c $(TARGET_CONF) dummy

View File

@ -3,7 +3,7 @@
# #
# This library's version # This library's version
VERSION = 0.3.21 VERSION = 0.3.22
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@ -131,6 +131,9 @@ BUILD_LAPACK_DEPRECATED = 1
# Build RecursiveLAPACK on top of LAPACK # Build RecursiveLAPACK on top of LAPACK
# BUILD_RELAPACK = 1 # BUILD_RELAPACK = 1
# Have RecursiveLAPACK actually replace standard LAPACK routines instead of
# just adding its equivalents with a RELAPACK_ prefix
# RELAPACK_REPLACE = 1
# If you want to use the legacy threaded Level 3 implementation. # If you want to use the legacy threaded Level 3 implementation.
# USE_SIMPLE_THREADED_LEVEL3 = 1 # USE_SIMPLE_THREADED_LEVEL3 = 1
@ -207,7 +210,7 @@ NO_AFFINITY = 1
# to the user space. If bigphysarea is enabled, it will use it. # to the user space. If bigphysarea is enabled, it will use it.
# DEVICEDRIVER_ALLOCATION = 1 # DEVICEDRIVER_ALLOCATION = 1
# If you need to synchronize FP CSR between threads (for x86/x86_64 only). # If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only).
# CONSISTENT_FPCSR = 1 # CONSISTENT_FPCSR = 1
# If any gemm argument m, n or k is less or equal this threshold, gemm will be execute # If any gemm argument m, n or k is less or equal this threshold, gemm will be execute

View File

@ -9,6 +9,10 @@ ifndef TOPDIR
TOPDIR = . TOPDIR = .
endif endif
ifndef RELAPACK_REPLACE
RELAPACK_REPLACE=0
endif
# we need to use the host system's architecture for getarch compile options even especially when cross-compiling # we need to use the host system's architecture for getarch compile options even especially when cross-compiling
HOSTARCH := $(shell uname -m) HOSTARCH := $(shell uname -m)
ifeq ($(HOSTARCH), amd64) ifeq ($(HOSTARCH), amd64)
@ -280,8 +284,10 @@ GETARCH_FLAGS += -DHAVE_GAS=$(HAVE_GAS)
# Generating Makefile.conf and config.h # Generating Makefile.conf and config.h
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" HOST_CFLAGS="$(GETARCH_FLAGS)" CFLAGS="$(CFLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) DYNAMIC_ARCH=$(DYNAMIC_ARCH) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all) DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" HOST_CFLAGS="$(GETARCH_FLAGS)" CFLAGS="$(CFLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) DYNAMIC_ARCH=$(DYNAMIC_ARCH) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
endif
ifndef TARGET_CORE ifndef TARGET_CORE
include $(TOPDIR)/Makefile.conf -include $(TOPDIR)/Makefile.conf
else else
HAVE_NEON= HAVE_NEON=
HAVE_VFP= HAVE_VFP=
@ -302,7 +308,6 @@ HAVE_FMA3=
include $(TOPDIR)/Makefile_kernel.conf include $(TOPDIR)/Makefile_kernel.conf
endif endif
endif
ifndef NUM_PARALLEL ifndef NUM_PARALLEL
NUM_PARALLEL = 1 NUM_PARALLEL = 1
@ -415,7 +420,7 @@ ifeq ($(OSNAME), AIX)
EXTRALIB += -lm EXTRALIB += -lm
endif endif
ifeq ($(OSNAME), FreeBSD) ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
ifeq ($(ARCH), $(filter $(ARCH),arm arm64)) ifeq ($(ARCH), $(filter $(ARCH),arm arm64))
EXTRALIB += -lm EXTRALIB += -lm
endif endif
@ -660,8 +665,10 @@ DYNAMIC_CORE += CORTEXA57
DYNAMIC_CORE += CORTEXA72 DYNAMIC_CORE += CORTEXA72
DYNAMIC_CORE += CORTEXA73 DYNAMIC_CORE += CORTEXA73
DYNAMIC_CORE += NEOVERSEN1 DYNAMIC_CORE += NEOVERSEN1
ifneq ($(NO_SVE), 1)
DYNAMIC_CORE += NEOVERSEV1 DYNAMIC_CORE += NEOVERSEV1
DYNAMIC_CORE += NEOVERSEN2 DYNAMIC_CORE += NEOVERSEN2
endif
DYNAMIC_CORE += CORTEXA55 DYNAMIC_CORE += CORTEXA55
DYNAMIC_CORE += FALKOR DYNAMIC_CORE += FALKOR
DYNAMIC_CORE += THUNDERX DYNAMIC_CORE += THUNDERX
@ -677,7 +684,12 @@ endif
endif endif
ifeq ($(ARCH), mips64) ifeq ($(ARCH), mips64)
DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4 DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4 MIPS64_GENERIC
ifdef DYNAMIC_LIST
override DYNAMIC_CORE = MIPS64_GENERIC $(DYNAMIC_LIST)
XCCOMMON_OPT = -DDYNAMIC_LIST -DDYN_MIPS64_GENERIC
XCCOMMON_OPT += $(foreach dcore,$(DYNAMIC_LIST),-DDYN_$(dcore))
endif
endif endif
ifeq ($(ARCH), loongarch64) ifeq ($(ARCH), loongarch64)
@ -818,13 +830,32 @@ endif
ifeq ($(ARCH), riscv64) ifeq ($(ARCH), riscv64)
NO_BINARY_MODE = 1 NO_BINARY_MODE = 1
BINARY_DEFINED = 1 BINARY_DEFINED = 1
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
ifeq ($(F_COMPILER), GFORTRAN)
FCOMMON_OPT += -fdefault-integer-8
endif
ifeq ($(F_COMPILER), FLANG)
FCOMMON_OPT += -i8
endif
endif
endif
endif endif
ifeq ($(ARCH), loongarch64) ifeq ($(ARCH), loongarch64)
NO_BINARY_MODE = 1 NO_BINARY_MODE = 1
BINARY_DEFINED = 1 BINARY_DEFINED = 1
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
ifeq ($(F_COMPILER), GFORTRAN)
FCOMMON_OPT += -fdefault-integer-8
endif
ifeq ($(F_COMPILER), FLANG)
FCOMMON_OPT += -i8
endif
endif
endif
endif endif
# #
# C Compiler dependent settings # C Compiler dependent settings
@ -856,6 +887,11 @@ CCOMMON_OPT += -mabi=32
BINARY_DEFINED = 1 BINARY_DEFINED = 1
endif endif
ifneq (, $(filter $(CORE), MIPS64_GENERIC))
CCOMMON_OPT += -DNO_MSA
FCOMMON_OPT += -DNO_MSA
endif
ifneq (, $(filter $(CORE),LOONGSON3R3 LOONGSON3R4)) ifneq (, $(filter $(CORE),LOONGSON3R3 LOONGSON3R4))
CCOMMON_OPT += -march=loongson3a CCOMMON_OPT += -march=loongson3a
FCOMMON_OPT += -march=loongson3a FCOMMON_OPT += -march=loongson3a
@ -932,16 +968,19 @@ endif
endif endif
ifdef BINARY64 ifdef BINARY64
ifeq ($(ARCH), x86_64) ifeq ($(ARCH), x86_64)
ifeq (,$(findstring tp,$(CFLAGS)))
ifneq ($(NEWPGI2),1) ifneq ($(NEWPGI2),1)
CCOMMON_OPT += -tp p7-64 CCOMMON_OPT += -tp p7-64
else else
CCOMMON_OPT += -tp px CCOMMON_OPT += -tp px
endif endif
endif
ifneq ($(NEWPGI),1) ifneq ($(NEWPGI),1)
CCOMMON_OPT += -D__MMX__ -Mnollvm CCOMMON_OPT += -D__MMX__ -Mnollvm
endif endif
else else
ifeq ($(ARCH), power) ifeq ($(ARCH), power)
ifeq (,$(findstring tp,$(CFLAGS)))
ifeq ($(CORE), POWER8) ifeq ($(CORE), POWER8)
CCOMMON_OPT += -tp pwr8 CCOMMON_OPT += -tp pwr8
endif endif
@ -950,14 +989,17 @@ CCOMMON_OPT += -tp pwr9
endif endif
endif endif
endif endif
endif
else else
ifneq ($(NEWPGI2),1) ifneq ($(NEWPGI2),1)
ifeq (,$(findstring tp,$(CFLAGS)))
CCOMMON_OPT += -tp p7 CCOMMON_OPT += -tp p7
else else
CCOMMON_OPT += -tp px CCOMMON_OPT += -tp px
endif endif
endif endif
endif endif
endif
ifeq ($(C_COMPILER), PATHSCALE) ifeq ($(C_COMPILER), PATHSCALE)
ifdef BINARY64 ifdef BINARY64
@ -1349,6 +1391,10 @@ ifeq ($(NO_AVX512), 1)
CCOMMON_OPT += -DNO_AVX512 CCOMMON_OPT += -DNO_AVX512
endif endif
ifeq ($(NO_SVE), 1)
CCOMMON_OPT += -DNO_SVE
endif
ifdef SMP ifdef SMP
CCOMMON_OPT += -DSMP_SERVER CCOMMON_OPT += -DSMP_SERVER

View File

@ -130,6 +130,28 @@ endif
endif endif
endif endif
ifeq ($(CORE), ZEN)
ifdef HAVE_AVX512VL
ifndef NO_AVX512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
ifeq ($(OSNAME), WINNT)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
endif
endif
endif
endif
ifdef HAVE_AVX2 ifdef HAVE_AVX2
ifndef NO_AVX2 ifndef NO_AVX2
ifeq ($(C_COMPILER), GCC) ifeq ($(C_COMPILER), GCC)
@ -143,6 +165,7 @@ ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT += -mavx2 CCOMMON_OPT += -mavx2
endif endif
endif endif
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
ifeq ($(F_COMPILER), GFORTRAN) ifeq ($(F_COMPILER), GFORTRAN)
# AVX2 support was added in 4.7.0 # AVX2 support was added in 4.7.0
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4) GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
@ -159,6 +182,7 @@ endif
endif endif
endif endif
endif endif
endif
endif endif

20
SECURITY.md Normal file
View File

@ -0,0 +1,20 @@
# Security Policy
## Supported Versions
It is generally recommended to use the latest release as this project
does not maintain multiple stable branches and providing packages e.g.
for Linux distributions is outside our scope. In particular, versions
before 0.3.18 can be assumed to carry the out-of-bounds-read error in
the LAPACK ?LARRV family of functions that was the subject of
CVE-2021-4048
## Reporting a Vulnerability
If you suspect that you have found a vulnerability - a defect that could
be abused to compromise the security of a user's code or systems - please
do not use the normal github issue tracker (except perhaps to post a general
warning if you deem that necessary). Instead, please contact the project
maintainers through the email addresses given in their github user profiles.
Defects found in the "lapack-netlib" subtree should ideally be reported to
the maintainers of the reference implementation of LAPACK, lapack@icl.itk.edu

View File

@ -65,6 +65,7 @@ MIPS1004K
MIPS24K MIPS24K
4.MIPS64 CPU: 4.MIPS64 CPU:
MIPS64_GENERIC
SICORTEX SICORTEX
LOONGSON3A LOONGSON3A
LOONGSON3B LOONGSON3B
@ -128,3 +129,7 @@ LOONGSON2K1000
12. Elbrus E2000: 12. Elbrus E2000:
E2K E2K
13. Alpha
EV4
EV5
EV6

View File

@ -141,7 +141,7 @@ jobs:
- job: OSX_OpenMP - job: OSX_OpenMP
pool: pool:
vmImage: 'macOS-10.15' vmImage: 'macOS-11'
steps: steps:
- script: | - script: |
brew update brew update
@ -151,15 +151,23 @@ jobs:
- job: OSX_GCC_Nothreads - job: OSX_GCC_Nothreads
pool: pool:
vmImage: 'macOS-10.15' vmImage: 'macOS-11'
steps: steps:
- script: | - script: |
brew update brew update
make USE_THREADS=0 CC=gcc-10 FC=gfortran-10 make USE_THREADS=0 CC=gcc-10 FC=gfortran-10
- job: OSX_GCC12
pool:
vmImage: 'macOS-latest'
steps:
- script: |
brew update
make CC=gcc-12 FC=gfortran-12
- job: OSX_OpenMP_Clang - job: OSX_OpenMP_Clang
pool: pool:
vmImage: 'macOS-10.15' vmImage: 'macOS-11'
variables: variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib LIBRARY_PATH: /usr/local/opt/llvm/lib
@ -172,7 +180,7 @@ jobs:
- job: OSX_OpenMP_Clang_cmake - job: OSX_OpenMP_Clang_cmake
pool: pool:
vmImage: 'macOS-10.15' vmImage: 'macOS-11'
variables: variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib LIBRARY_PATH: /usr/local/opt/llvm/lib
@ -188,7 +196,7 @@ jobs:
- job: OSX_dynarch_cmake - job: OSX_dynarch_cmake
pool: pool:
vmImage: 'macOS-10.15' vmImage: 'macOS-11'
variables: variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib LIBRARY_PATH: /usr/local/opt/llvm/lib
@ -196,13 +204,13 @@ jobs:
- script: | - script: |
mkdir build mkdir build
cd build cd build
cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_Fortran_COMPILER=gfortran-10 -DBUILD_SHARED_LIBS=ON .. cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DDYNAMIC_LIST='NEHALEM HASWELL SKYLAKEX' -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_Fortran_COMPILER=gfortran-10 -DBUILD_SHARED_LIBS=ON ..
cmake --build . cmake --build .
ctest ctest
- job: OSX_Ifort_Clang - job: OSX_Ifort_Clang
pool: pool:
vmImage: 'macOS-10.15' vmImage: 'macOS-11'
variables: variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/17643/m_HPCKit_p_2021.2.0.2903_offline.dmg MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/17643/m_HPCKit_p_2021.2.0.2903_offline.dmg
@ -235,7 +243,7 @@ jobs:
- job: OSX_NDK_ARMV7 - job: OSX_NDK_ARMV7
pool: pool:
vmImage: 'macOS-10.15' vmImage: 'macOS-11'
steps: steps:
- script: | - script: |
brew update brew update
@ -255,7 +263,7 @@ jobs:
- job: OSX_IOS_ARMV7 - job: OSX_IOS_ARMV7
pool: pool:
vmImage: 'macOS-10.15' vmImage: 'macOS-11'
variables: variables:
CC: /Applications/Xcode_12.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang CC: /Applications/Xcode_12.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
CFLAGS: -O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode_12.4.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS14.4.sdk -arch armv7 -miphoneos-version-min=5.1 CFLAGS: -O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode_12.4.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS14.4.sdk -arch armv7 -miphoneos-version-min=5.1

File diff suppressed because it is too large Load Diff

View File

@ -1,133 +1,133 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2016, The OpenBLAS Project Copyright (c) 2016, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the OpenBLAS project nor the names of 3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products its contributors may be used to endorse or promote products
derived from this software without specific prior written permission. derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include "bench.h" #include "bench.h"
#undef AMAX #undef AMAX
#ifdef COMPLEX #ifdef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
#define AMAX BLASFUNC(dzamax) #define AMAX BLASFUNC(dzamax)
#else #else
#define AMAX BLASFUNC(scamax) #define AMAX BLASFUNC(scamax)
#endif #endif
#else #else
#ifdef DOUBLE #ifdef DOUBLE
#define AMAX BLASFUNC(damax) #define AMAX BLASFUNC(damax)
#else #else
#define AMAX BLASFUNC(samax) #define AMAX BLASFUNC(samax)
#endif #endif
#endif #endif
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
FLOAT *x; FLOAT *x;
blasint m, i; blasint m, i;
blasint inc_x = 1; blasint inc_x = 1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
double time1, timeg; double time1, timeg;
argc--; argc--;
argv++; argv++;
if (argc > 0) if (argc > 0)
{ {
from = atol(*argv); from = atol(*argv);
argc--; argc--;
argv++; argv++;
} }
if (argc > 0) if (argc > 0)
{ {
to = MAX(atol(*argv), from); to = MAX(atol(*argv), from);
argc--; argc--;
argv++; argv++;
} }
if (argc > 0) if (argc > 0)
{ {
step = atol(*argv); step = atol(*argv);
argc--; argc--;
argv++; argv++;
} }
if ((p = getenv("OPENBLAS_LOOPS"))) if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p); loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p); inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops); fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
{ {
fprintf(stderr, "Out of Memory!!\n"); fprintf(stderr, "Out of Memory!!\n");
exit(1); exit(1);
} }
#ifdef __linux #ifdef __linux
srandom(getpid()); srandom(getpid());
#endif #endif
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for (m = from; m <= to; m += step) for (m = from; m <= to; m += step)
{ {
timeg = 0; timeg = 0;
fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
for (l = 0; l < loops; l++) for (l = 0; l < loops; l++)
{ {
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
{ {
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5; x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
} }
begin(); begin();
AMAX(&m, x, &inc_x); AMAX(&m, x, &inc_x);
end(); end();
timeg += getsec(); timeg += getsec();
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops %10.6f sec\n", " %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
} }
return 0; return 0;
} }
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -1,137 +1,137 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2016, The OpenBLAS Project Copyright (c) 2016, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the OpenBLAS project nor the names of 3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products its contributors may be used to endorse or promote products
derived from this software without specific prior written permission. derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include "bench.h" #include "bench.h"
#undef AMIN #undef AMIN
#ifdef COMPLEX #ifdef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
#define AMIN BLASFUNC(dzamin) #define AMIN BLASFUNC(dzamin)
#else #else
#define AMIN BLASFUNC(scamin) #define AMIN BLASFUNC(scamin)
#endif #endif
#else #else
#ifdef DOUBLE #ifdef DOUBLE
#define AMIN BLASFUNC(damin) #define AMIN BLASFUNC(damin)
#else #else
#define AMIN BLASFUNC(samin) #define AMIN BLASFUNC(samin)
#endif #endif
#endif #endif
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
FLOAT *x; FLOAT *x;
blasint m, i; blasint m, i;
blasint inc_x = 1; blasint inc_x = 1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
double time1, timeg; double time1, timeg;
argc--; argc--;
argv++; argv++;
if (argc > 0) if (argc > 0)
{ {
from = atol(*argv); from = atol(*argv);
argc--; argc--;
argv++; argv++;
} }
if (argc > 0) if (argc > 0)
{ {
to = MAX(atol(*argv), from); to = MAX(atol(*argv), from);
argc--; argc--;
argv++; argv++;
} }
if (argc > 0) if (argc > 0)
{ {
step = atol(*argv); step = atol(*argv);
argc--; argc--;
argv++; argv++;
} }
if ((p = getenv("OPENBLAS_LOOPS"))) if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p); loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p); inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops); fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
{ {
fprintf(stderr, "Out of Memory!!\n"); fprintf(stderr, "Out of Memory!!\n");
exit(1); exit(1);
} }
#ifdef __linux #ifdef __linux
srandom(getpid()); srandom(getpid());
#endif #endif
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for (m = from; m <= to; m += step) for (m = from; m <= to; m += step)
{ {
timeg = 0; timeg = 0;
fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
for (l = 0; l < loops; l++) for (l = 0; l < loops; l++)
{ {
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
{ {
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5; x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
} }
begin(); begin();
AMIN(&m, x, &inc_x); AMIN(&m, x, &inc_x);
end(); end();
timeg += getsec(); timeg += getsec();
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops %10.6f sec\n", " %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
} }
return 0; return 0;
} }
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -74,6 +74,24 @@ static void *huge_malloc(BLASLONG size){
#endif #endif
/* Benchmarks should allocate with cacheline (often 64 bytes) alignment
to avoid unreliable results. This technique, storing the allocated
pointer value just before the aligned memory, doesn't require
C11's aligned_alloc for compatibility with older compilers. */
static void *aligned_alloc_cacheline(size_t n)
{
void *p = malloc((size_t)(void *) + n + L1_DATA_LINESIZE - 1);
if (p) {
void **newp = (void **)
(((uintptr_t)p + L1_DATA_LINESIZE) & (uintptr_t)-L1_DATA_LINESIZE);
newp[-1] = p;
p = newp;
}
return p;
}
#define malloc aligned_alloc_cacheline
#define free(p) free((p) ? ((void **)(p))[-1] : (p))
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
struct timeval start, stop; struct timeval start, stop;
#elif defined(__APPLE__) #elif defined(__APPLE__)

View File

@ -1,134 +1,134 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2014, The OpenBLAS Project Copyright (c) 2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the OpenBLAS project nor the names of 3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products its contributors may be used to endorse or promote products
derived from this software without specific prior written permission. derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include "bench.h" #include "bench.h"
#undef HBMV #undef HBMV
#ifdef DOUBLE #ifdef DOUBLE
#define HBMV BLASFUNC(zhbmv) #define HBMV BLASFUNC(zhbmv)
#else #else
#define HBMV BLASFUNC(chbmv) #define HBMV BLASFUNC(chbmv)
#endif #endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {0.0, 0.0}; FLOAT beta [] = {0.0, 0.0};
blasint k = 1; blasint k = 1;
char uplo='L'; char uplo='L';
blasint m, i, j; blasint m, i, j;
blasint inc_x=1, inc_y=1; blasint inc_x=1, inc_y=1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;} if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_K"))) k = atoi(p); if ((p = getenv("OPENBLAS_K"))) k = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' k = %d Inc_x = %d Inc_y = %d Loops = %d\n", fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' k = %d Inc_x = %d Inc_y = %d Loops = %d\n",
from, to, step, uplo, k, inc_x, inc_y, loops); from, to, step, uplo, k, inc_x, inc_y, loops);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) { if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n"); fprintf(stderr,"Out of Memory!!\n");
exit(1); exit(1);
} }
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) { if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n"); fprintf(stderr,"Out of Memory!!\n");
exit(1); exit(1);
} }
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) { if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n"); fprintf(stderr,"Out of Memory!!\n");
exit(1); exit(1);
} }
#ifdef __linux #ifdef __linux
srandom(getpid()); srandom(getpid());
#endif #endif
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) { for(m = from; m <= to; m += step) {
timeg=0; timeg=0;
fprintf(stderr, " %6dx%d : ", (int)m, (int)m); fprintf(stderr, " %6dx%d : ", (int)m, (int)m);
for(j = 0; j < m; j++) { for(j = 0; j < m; j++) {
for(i = 0; i < m * COMPSIZE; i++) { for(i = 0; i < m * COMPSIZE; i++) {
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
} }
for (l = 0; l < loops; l++) { for (l = 0; l < loops; l++) {
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) { for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) { for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
begin(); begin();
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
end(); end();
timeg += getsec(); timeg += getsec();
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, " %10.2f MFlops\n", fprintf(stderr, " %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)(2 * k + 1) * (double)m / timeg * 1.e-6); COMPSIZE * COMPSIZE * 2. * (double)(2 * k + 1) * (double)m / timeg * 1.e-6);
} }
return 0; return 0;
} }
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -1,133 +1,133 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2014, The OpenBLAS Project Copyright (c) 2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the OpenBLAS project nor the names of 3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products its contributors may be used to endorse or promote products
derived from this software without specific prior written permission. derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include "bench.h" #include "bench.h"
#undef HPMV #undef HPMV
#ifdef DOUBLE #ifdef DOUBLE
#define HPMV BLASFUNC(zhpmv) #define HPMV BLASFUNC(zhpmv)
#else #else
#define HPMV BLASFUNC(chpmv) #define HPMV BLASFUNC(chpmv)
#endif #endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0}; FLOAT beta [] = {1.0, 1.0};
char uplo='L'; char uplo='L';
blasint m, i, j; blasint m, i, j;
blasint inc_x=1, inc_y=1; blasint inc_x=1, inc_y=1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;} if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops); fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) { if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n"); fprintf(stderr,"Out of Memory!!\n");
exit(1); exit(1);
} }
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) { if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n"); fprintf(stderr,"Out of Memory!!\n");
exit(1); exit(1);
} }
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) { if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n"); fprintf(stderr,"Out of Memory!!\n");
exit(1); exit(1);
} }
#ifdef __linux #ifdef __linux
srandom(getpid()); srandom(getpid());
#endif #endif
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) { for(m = from; m <= to; m += step) {
timeg=0; timeg=0;
fprintf(stderr, " %6dx%d : ", (int)m, (int)m); fprintf(stderr, " %6dx%d : ", (int)m, (int)m);
for(j = 0; j < m; j++) { for(j = 0; j < m; j++) {
for(i = 0; i < m * COMPSIZE; i++) { for(i = 0; i < m * COMPSIZE; i++) {
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
} }
for (l = 0; l < loops; l++) { for (l = 0; l < loops; l++) {
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) { for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) { for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
begin(); begin();
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y ); HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
end(); end();
time1 = getsec(); time1 = getsec();
timeg += time1; timeg += time1;
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, " %10.2f MFlops\n", fprintf(stderr, " %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6); COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
} }
return 0; return 0;
} }
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -1,120 +1,120 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2016, The OpenBLAS Project Copyright (c) 2016, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the OpenBLAS project nor the names of 3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products its contributors may be used to endorse or promote products
derived from this software without specific prior written permission. derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include "bench.h" #include "bench.h"
#undef IAMIN #undef IAMIN
#ifdef COMPLEX #ifdef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
#define IAMIN BLASFUNC(izamin) #define IAMIN BLASFUNC(izamin)
#else #else
#define IAMIN BLASFUNC(icamin) #define IAMIN BLASFUNC(icamin)
#endif #endif
#else #else
#ifdef DOUBLE #ifdef DOUBLE
#define IAMIN BLASFUNC(idamin) #define IAMIN BLASFUNC(idamin)
#else #else
#define IAMIN BLASFUNC(isamin) #define IAMIN BLASFUNC(isamin)
#endif #endif
#endif #endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
blasint m, i; blasint m, i;
blasint inc_x=1; blasint inc_x=1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;} if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1); fprintf(stderr,"Out of Memory!!\n");exit(1);
} }
#ifdef __linux #ifdef __linux
srandom(getpid()); srandom(getpid());
#endif #endif
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) for(m = from; m <= to; m += step)
{ {
timeg=0; timeg=0;
fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
begin(); begin();
IAMIN (&m, x, &inc_x); IAMIN (&m, x, &inc_x);
end(); end();
time1 = getsec(); time1 = getsec();
timeg += time1; timeg += time1;
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops %10.6f sec\n", " %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
} }
return 0; return 0;
} }
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -1,114 +1,114 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2016, The OpenBLAS Project Copyright (c) 2016, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the OpenBLAS project nor the names of 3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products its contributors may be used to endorse or promote products
derived from this software without specific prior written permission. derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include "bench.h" #include "bench.h"
#undef IMAX #undef IMAX
#ifndef COMPLEX #ifndef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
#define IMAX BLASFUNC(idmax) #define IMAX BLASFUNC(idmax)
#else #else
#define IMAX BLASFUNC(ismax) #define IMAX BLASFUNC(ismax)
#endif #endif
#endif #endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
blasint m, i; blasint m, i;
blasint inc_x=1; blasint inc_x=1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;} if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1); fprintf(stderr,"Out of Memory!!\n");exit(1);
} }
#ifdef __linux #ifdef __linux
srandom(getpid()); srandom(getpid());
#endif #endif
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) for(m = from; m <= to; m += step)
{ {
timeg=0; timeg=0;
fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
begin(); begin();
IMAX (&m, x, &inc_x); IMAX (&m, x, &inc_x);
end(); end();
time1 = getsec(); time1 = getsec();
timeg += time1; timeg += time1;
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops %10.6f sec\n", " %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
} }
return 0; return 0;
} }
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -1,114 +1,114 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2016, The OpenBLAS Project Copyright (c) 2016, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the OpenBLAS project nor the names of 3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products its contributors may be used to endorse or promote products
derived from this software without specific prior written permission. derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include "bench.h" #include "bench.h"
#undef IMIN #undef IMIN
#ifndef COMPLEX #ifndef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
#define IMIN BLASFUNC(idmin) #define IMIN BLASFUNC(idmin)
#else #else
#define IMIN BLASFUNC(ismin) #define IMIN BLASFUNC(ismin)
#endif #endif
#endif #endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
blasint m, i; blasint m, i;
blasint inc_x=1; blasint inc_x=1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;} if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1); fprintf(stderr,"Out of Memory!!\n");exit(1);
} }
#ifdef __linux #ifdef __linux
srandom(getpid()); srandom(getpid());
#endif #endif
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) for(m = from; m <= to; m += step)
{ {
timeg=0; timeg=0;
fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
begin(); begin();
IMIN (&m, x, &inc_x); IMIN (&m, x, &inc_x);
end(); end();
time1 = getsec(); time1 = getsec();
timeg += time1; timeg += time1;
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops %10.6f sec\n", " %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
} }
return 0; return 0;
} }
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -1,113 +1,113 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2016, The OpenBLAS Project Copyright (c) 2016, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the OpenBLAS project nor the names of 3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products its contributors may be used to endorse or promote products
derived from this software without specific prior written permission. derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include "bench.h" #include "bench.h"
#undef NAMAX #undef NAMAX
#ifndef COMPLEX #ifndef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
#define NAMAX BLASFUNC(dmax) #define NAMAX BLASFUNC(dmax)
#else #else
#define NAMAX BLASFUNC(smax) #define NAMAX BLASFUNC(smax)
#endif #endif
#endif #endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
blasint m, i; blasint m, i;
blasint inc_x=1; blasint inc_x=1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;} if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1); fprintf(stderr,"Out of Memory!!\n");exit(1);
} }
#ifdef __linux #ifdef __linux
srandom(getpid()); srandom(getpid());
#endif #endif
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) for(m = from; m <= to; m += step)
{ {
timeg=0; timeg=0;
fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
begin(); begin();
NAMAX (&m, x, &inc_x); NAMAX (&m, x, &inc_x);
end(); end();
time1 = getsec(); time1 = getsec();
timeg += time1; timeg += time1;
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops %10.6f sec\n", " %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
} }
return 0; return 0;
} }
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -1,113 +1,113 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2016, The OpenBLAS Project Copyright (c) 2016, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the OpenBLAS project nor the names of 3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products its contributors may be used to endorse or promote products
derived from this software without specific prior written permission. derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include "bench.h" #include "bench.h"
#undef NAMIN #undef NAMIN
#ifndef COMPLEX #ifndef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
#define NAMIN BLASFUNC(dmin) #define NAMIN BLASFUNC(dmin)
#else #else
#define NAMIN BLASFUNC(smin) #define NAMIN BLASFUNC(smin)
#endif #endif
#endif #endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
blasint m, i; blasint m, i;
blasint inc_x=1; blasint inc_x=1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;} if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1); fprintf(stderr,"Out of Memory!!\n");exit(1);
} }
#ifdef __linux #ifdef __linux
srandom(getpid()); srandom(getpid());
#endif #endif
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) for(m = from; m <= to; m += step)
{ {
timeg=0; timeg=0;
fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
begin(); begin();
NAMIN (&m, x, &inc_x); NAMIN (&m, x, &inc_x);
end(); end();
time1 = getsec(); time1 = getsec();
timeg += time1; timeg += time1;
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops %10.6f sec\n", " %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
} }
return 0; return 0;
} }
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -1,138 +1,138 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2014, The OpenBLAS Project Copyright (c) 2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the OpenBLAS project nor the names of 3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products its contributors may be used to endorse or promote products
derived from this software without specific prior written permission. derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include "bench.h" #include "bench.h"
#undef ROTM #undef ROTM
#ifdef DOUBLE #ifdef DOUBLE
#define ROTM BLASFUNC(drotm) #define ROTM BLASFUNC(drotm)
#else #else
#define ROTM BLASFUNC(srotm) #define ROTM BLASFUNC(srotm)
#endif #endif
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
FLOAT *x, *y; FLOAT *x, *y;
// FLOAT result; // FLOAT result;
blasint m, i; blasint m, i;
blasint inc_x = 1, inc_y = 1; blasint inc_x = 1, inc_y = 1;
FLOAT param[5] = {1, 2.0, 3.0, 4.0, 5.0}; FLOAT param[5] = {1, 2.0, 3.0, 4.0, 5.0};
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
double time1, timeg; double time1, timeg;
argc--; argc--;
argv++; argv++;
if (argc > 0) { if (argc > 0) {
from = atol(*argv); from = atol(*argv);
argc--; argc--;
argv++; argv++;
} }
if (argc > 0) { if (argc > 0) {
to = MAX(atol(*argv), from); to = MAX(atol(*argv), from);
argc--; argc--;
argv++; argv++;
} }
if (argc > 0) { if (argc > 0) {
step = atol(*argv); step = atol(*argv);
argc--; argc--;
argv++; argv++;
} }
if ((p = getenv("OPENBLAS_LOOPS"))) if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p); loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p); inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) if ((p = getenv("OPENBLAS_INCY")))
inc_y = atoi(p); inc_y = atoi(p);
fprintf( fprintf(
stderr, stderr,
"From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n",
from, to, step, inc_x, inc_y, loops); from, to, step, inc_x, inc_y, loops);
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) ==
NULL) { NULL) {
fprintf(stderr, "Out of Memory!!\n"); fprintf(stderr, "Out of Memory!!\n");
exit(1); exit(1);
} }
if ((y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == if ((y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) ==
NULL) { NULL) {
fprintf(stderr, "Out of Memory!!\n"); fprintf(stderr, "Out of Memory!!\n");
exit(1); exit(1);
} }
#ifdef __linux #ifdef __linux
srandom(getpid()); srandom(getpid());
#endif #endif
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for (m = from; m <= to; m += step) { for (m = from; m <= to; m += step) {
timeg = 0; timeg = 0;
fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) { for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5; x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
} }
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) { for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
y[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5; y[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
} }
for (l = 0; l < loops; l++) { for (l = 0; l < loops; l++) {
begin(); begin();
ROTM(&m, x, &inc_x, y, &inc_y, param); ROTM(&m, x, &inc_x, y, &inc_y, param);
end(); end();
time1 = getsec(); time1 = getsec();
timeg += time1; timeg += time1;
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", fprintf(stderr, " %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg); COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);
} }
return 0; return 0;
} }

View File

@ -45,7 +45,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x, *y; FLOAT *x;
FLOAT alpha[2] = { 2.0, 2.0 }; FLOAT alpha[2] = { 2.0, 2.0 };
blasint m, i; blasint m, i;
blasint inc_x=1,inc_y=1; blasint inc_x=1,inc_y=1;
@ -74,10 +74,6 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1); fprintf(stderr,"Out of Memory!!\n");exit(1);
} }
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux #ifdef __linux
srandom(getpid()); srandom(getpid());
#endif #endif
@ -91,30 +87,20 @@ int main(int argc, char *argv[]){
fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
SCAL (&m, alpha, x, &inc_x); SCAL (&m, alpha, x, &inc_x);
}
end();
end(); time1 = getsec();
time1 = getsec(); timeg = time1 / loops;
timeg += time1;
}
timeg /= loops;
#ifdef COMPLEX #ifdef COMPLEX
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 6. * (double)m / timeg * 1.e-6, timeg); fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 6. * (double)m / timeg * 1.e-6, timeg);

View File

@ -1,146 +1,146 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2014, The OpenBLAS Project Copyright (c) 2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the OpenBLAS project nor the names of 3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products its contributors may be used to endorse or promote products
derived from this software without specific prior written permission. derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include "bench.h" #include "bench.h"
#undef SPMV #undef SPMV
#ifndef COMPLEX #ifndef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
#define SPMV BLASFUNC(dspmv) #define SPMV BLASFUNC(dspmv)
#else #else
#define SPMV BLASFUNC(sspmv) #define SPMV BLASFUNC(sspmv)
#endif #endif
#else #else
#ifdef DOUBLE #ifdef DOUBLE
#define SPMV BLASFUNC(zspmv) #define SPMV BLASFUNC(zspmv)
#else #else
#define SPMV BLASFUNC(cspmv) #define SPMV BLASFUNC(cspmv)
#endif #endif
#endif #endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0}; FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0}; FLOAT beta [] = {1.0, 1.0};
char uplo='L'; char uplo='L';
blasint m, i, j; blasint m, i, j;
blasint inc_x=1,inc_y=1; blasint inc_x=1,inc_y=1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;} if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops); fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1); fprintf(stderr,"Out of Memory!!\n");exit(1);
} }
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1); fprintf(stderr,"Out of Memory!!\n");exit(1);
} }
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){ if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1); fprintf(stderr,"Out of Memory!!\n");exit(1);
} }
#ifdef __linux #ifdef __linux
srandom(getpid()); srandom(getpid());
#endif #endif
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) for(m = from; m <= to; m += step)
{ {
timeg=0; timeg=0;
fprintf(stderr, " %6dx%d : ", (int)m,(int)m); fprintf(stderr, " %6dx%d : ", (int)m,(int)m);
for(j = 0; j < m; j++){ for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){ for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
} }
for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
begin(); begin();
SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y ); SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
end(); end();
time1 = getsec(); time1 = getsec();
timeg += time1; timeg += time1;
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6); COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
} }
return 0; return 0;
} }
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

32
c_check
View File

@ -31,8 +31,8 @@ flags="$*"
cross_suffix="" cross_suffix=""
if [ "`dirname $compiler_name`" != '.' ]; then if [ "`dirname \"$compiler_name\"`" != '.' ]; then
cross_suffix="$cross_suffix`dirname $compiler_name`/" cross_suffix="$cross_suffix`dirname \"$compiler_name\"`/"
fi fi
bn=`basename $compiler_name` bn=`basename $compiler_name`
@ -162,7 +162,7 @@ fi
exit 1 exit 1
} }
have_msa=0 no_msa=0
if [ "$architecture" = "mips" ] || [ "$architecture" = "mips64" ]; then if [ "$architecture" = "mips" ] || [ "$architecture" = "mips64" ]; then
tmpd="$(mktemp -d)" tmpd="$(mktemp -d)"
tmpf="$tmpd/a.c" tmpf="$tmpd/a.c"
@ -172,11 +172,10 @@ if [ "$architecture" = "mips" ] || [ "$architecture" = "mips64" ]; then
printf "void main(void){ __asm__ volatile(%s); }\n" "$code" >> "$tmpf" printf "void main(void){ __asm__ volatile(%s); }\n" "$code" >> "$tmpf"
args="$msa_flags -o $tmpf.o $tmpf" args="$msa_flags -o $tmpf.o $tmpf"
have_msa=1
{ {
$compiler_name $flags $args >/dev/null 2>&1 $compiler_name $flags $args >/dev/null 2>&1
} || { } || {
have_msa=0 no_msa=1
} }
rm -rf "$tmpd" rm -rf "$tmpd"
@ -240,6 +239,21 @@ if [ "$architecture" = "riscv64" ]; then
rm -rf "$tmpd" rm -rf "$tmpd"
fi fi
no_sve=0
if [ "$architecture" = "arm64" ]; then
tmpd=`mktemp -d`
tmpf="$tmpd/a.c"
printf "#include <arm_sve.h>\n\n int main(void){}\n">> "$tmpf"
args=" -march=armv8-a+sve -c -o $tmpf.o $tmpf"
no_sve=0
{
$compiler_name $flags $args >/dev/null 2>&1
} || {
no_sve=1
}
rm -rf "$tmpd"
fi
c11_atomics=0 c11_atomics=0
case "$data" in case "$data" in
*HAVE_C11*) *HAVE_C11*)
@ -375,10 +389,8 @@ done
printf "CROSS_SUFFIX=%s\n" "$cross_suffix" printf "CROSS_SUFFIX=%s\n" "$cross_suffix"
[ "$cross" -ne 0 ] && printf "CROSS=1\n" [ "$cross" -ne 0 ] && printf "CROSS=1\n"
printf "CEXTRALIB=%s %s %s\n" "$linker_L" "$linker_l" "$linker_a" printf "CEXTRALIB=%s %s %s\n" "$linker_L" "$linker_l" "$linker_a"
[ "$have_msa" -eq 1 ] && { [ "$no_msa" -eq 1 ] && printf "NO_MSA=1\n"
printf "HAVE_MSA=1\n" [ "$no_sve" -eq 1 ] && printf "NO_SVE=1\n"
printf "MSA_FLAGS=%s\n" "$msa_flags"
}
[ "$no_rv64gv" -eq 1 ] && printf "NO_RV64GV=1\n" [ "$no_rv64gv" -eq 1 ] && printf "NO_RV64GV=1\n"
[ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n" [ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n"
[ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n" [ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n"
@ -396,7 +408,7 @@ compiler=`echo "$compiler" | tr '[[:lower:]]' '[[:upper:]]' `
[ "$binformat" = "bin32" ] && printf "#define __32BIT__\t1\n" [ "$binformat" = "bin32" ] && printf "#define __32BIT__\t1\n"
[ "$binformat" = "bin64" ] && printf "#define __64BIT__\t1\n" [ "$binformat" = "bin64" ] && printf "#define __64BIT__\t1\n"
[ -n "$need_fu" ] && printf "#define FUNDERSCORE\t%s\n" "$need_fu" [ -n "$need_fu" ] && printf "#define FUNDERSCORE\t%s\n" "$need_fu"
[ "$have_msa" -eq 1 ] && printf "#define HAVE_MSA\t1\n" [ "$no_msa" -eq 1 ] && printf "#define NO_MSA\t1\n"
[ "$c11_atomics" -eq 1 ] && printf "#define HAVE_C11\t1\n" [ "$c11_atomics" -eq 1 ] && printf "#define HAVE_C11\t1\n"
} >> "$config" } >> "$config"

View File

@ -44,9 +44,12 @@ endif ()
if (DYNAMIC_ARCH) if (DYNAMIC_ARCH)
if (ARM64) if (ARM64)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA55 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 NEOVERSEV1 NEOVERSEN2 THUNDERX3T110) set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA55 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 9.99)
set(DYNAMIC_CORE "${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2")
endif ()
if (DYNAMIC_LIST) if (DYNAMIC_LIST)
set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST}) set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST})
endif () endif ()
endif () endif ()
@ -132,3 +135,8 @@ if (ARM64)
set(BINARY_DEFINED 1) set(BINARY_DEFINED 1)
endif () endif ()
if (${ARCH} STREQUAL "riscv64")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
endif ()

View File

@ -144,6 +144,21 @@ if (${CORE} STREQUAL SAPPHIRERAPIDS)
endif () endif ()
endif () endif ()
if (${CORE} STREQUAL ZEN)
if (HAVE_AVX512VL)
if (NOT DYNAMIC_ARCH)
if (NOT NO_AVX512)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 13.0 OR ${GCC_VERSION} VERSION_EQUAL 13.0)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=znver4")
else ()
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
endif ()
endif ()
endif ()
endif ()
endif ()
if (${CORE} STREQUAL A64FX) if (${CORE} STREQUAL A64FX)
if (NOT DYNAMIC_ARCH) if (NOT DYNAMIC_ARCH)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
@ -155,6 +170,39 @@ if (${CORE} STREQUAL A64FX)
endif () endif ()
endif () endif ()
if (${CORE} STREQUAL NEOVERSEN2)
if (NOT DYNAMIC_ARCH)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
else ()
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
endif()
endif ()
endif ()
if (${CORE} STREQUAL NEOVERSEV1)
if (NOT DYNAMIC_ARCH)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1")
else ()
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
endif()
endif ()
endif ()
if (${CORE} STREQUAL NEOVERSEN1)
if (NOT DYNAMIC_ARCH)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 9.4 OR ${GCC_VERSION} VERSION_EQUAL 9.4)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve -mtune=neoverse-n1")
else ()
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
endif()
endif ()
endif ()
if (${CORE} STREQUAL ARMV8SVE) if (${CORE} STREQUAL ARMV8SVE)
if (NOT DYNAMIC_ARCH) if (NOT DYNAMIC_ARCH)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")

View File

@ -46,7 +46,7 @@ if (${F_COMPILER} STREQUAL "GFORTRAN")
set(FCOMMON_OPT "${FCOMMON_OPT} -fno-optimize-sibling-calls") set(FCOMMON_OPT "${FCOMMON_OPT} -fno-optimize-sibling-calls")
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
if (NOT NO_LAPACK) if (NOT NO_LAPACK)
set(EXTRALIB "{EXTRALIB} -lgfortran") set(EXTRALIB "${EXTRALIB} -lgfortran")
endif () endif ()
if (NO_BINARY_MODE) if (NO_BINARY_MODE)
if (MIPS64) if (MIPS64)

File diff suppressed because it is too large Load Diff

View File

@ -318,6 +318,8 @@ set(CSRC
lapacke_clacn2.c lapacke_clacn2.c
lapacke_clag2z.c lapacke_clag2z.c
lapacke_clag2z_work.c lapacke_clag2z_work.c
lapacke_clangb.c
lapacke_clangb_work.c
lapacke_clange.c lapacke_clange.c
lapacke_clange_work.c lapacke_clange_work.c
lapacke_clanhe.c lapacke_clanhe.c
@ -803,6 +805,8 @@ set(DSRC
lapacke_dlag2s_work.c lapacke_dlag2s_work.c
lapacke_dlamch.c lapacke_dlamch.c
lapacke_dlamch_work.c lapacke_dlamch_work.c
lapacke_dlangb.c
lapacke_dlangb_work.c
lapacke_dlange.c lapacke_dlange.c
lapacke_dlange_work.c lapacke_dlange_work.c
lapacke_dlansy.c lapacke_dlansy.c
@ -1381,6 +1385,8 @@ set(SSRC
lapacke_slag2d_work.c lapacke_slag2d_work.c
lapacke_slamch.c lapacke_slamch.c
lapacke_slamch_work.c lapacke_slamch_work.c
lapacke_slangb.c
lapacke_slangb_work.c
lapacke_slange.c lapacke_slange.c
lapacke_slange_work.c lapacke_slange_work.c
lapacke_slansy.c lapacke_slansy.c
@ -2089,6 +2095,8 @@ set(ZSRC
lapacke_zlacrm_work.c lapacke_zlacrm_work.c
lapacke_zlag2c.c lapacke_zlag2c.c
lapacke_zlag2c_work.c lapacke_zlag2c_work.c
lapacke_zlangb.c
lapacke_zlangb_work.c
lapacke_zlange.c lapacke_zlange.c
lapacke_zlange_work.c lapacke_zlange_work.c
lapacke_zlanhe.c lapacke_zlanhe.c
@ -2481,6 +2489,8 @@ set(Utils_SRC
lapacke_ctp_nancheck.c lapacke_dtr_trans.c lapacke_str_trans.c lapacke_ztp_trans.c lapacke_ctp_nancheck.c lapacke_dtr_trans.c lapacke_str_trans.c lapacke_ztp_trans.c
lapacke_ctp_trans.c lapacke_lsame.c lapacke_xerbla.c lapacke_ztr_nancheck.c lapacke_ctp_trans.c lapacke_lsame.c lapacke_xerbla.c lapacke_ztr_nancheck.c
lapacke_ctr_nancheck.c lapacke_make_complex_double.c lapacke_z_nancheck.c lapacke_ztr_trans.c lapacke_ctr_nancheck.c lapacke_make_complex_double.c lapacke_z_nancheck.c lapacke_ztr_trans.c
lapacke_ctz_nancheck.c lapacke_ctz_trans.c lapacke_dtz_nancheck.c lapacke_dtz_trans.c
lapacke_stz_nancheck.c lapacke_stz_trans.c lapacke_ztz_nancheck.c lapacke_ztz_trans.c
) )
set(LAPACKE_REL_SRC "") set(LAPACKE_REL_SRC "")

View File

@ -2,7 +2,7 @@ libdir=@CMAKE_INSTALL_FULL_LIBDIR@
libsuffix=@SUFFIX64_UNDERSCORE@ libsuffix=@SUFFIX64_UNDERSCORE@
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
openblas_config=USE_64BITINT=@USE_64BITINT@ NO_CBLAS=@NO_CBLAS@ NO_LAPACK=@NO_LAPACK@ NO_LAPACKE=@NO_LAPACKE@ DYNAMIC_ARCH=@DYNAMIC_ARCH@ DYNAMIC_OLDER=@DYNAMIC_OLDER@ NO_AFFINITY=@NO_AFFINITY@ USE_OPENMP=@USE_OPENMP@ @CORE@ MAX_THREADS=@NUM_THREADS@ openblas_config=USE_64BITINT=@INTERFACE64@ NO_CBLAS=@NO_CBLAS@ NO_LAPACK=@NO_LAPACK@ NO_LAPACKE=@NO_LAPACKE@ DYNAMIC_ARCH=@DYNAMIC_ARCH@ DYNAMIC_OLDER=@DYNAMIC_OLDER@ NO_AFFINITY=@NO_AFFINITY@ USE_OPENMP=@USE_OPENMP@ @CORE@ MAX_THREADS=@NUM_THREADS@
Name: OpenBLAS Name: OpenBLAS
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
Version: @OPENBLAS_VERSION@ Version: @OPENBLAS_VERSION@

View File

@ -823,6 +823,41 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
set(CGEMM3M_UNROLL_N 4) set(CGEMM3M_UNROLL_N 4)
set(ZGEMM3M_UNROLL_M 4) set(ZGEMM3M_UNROLL_M 4)
set(ZGEMM3M_UNROLL_N 4) set(ZGEMM3M_UNROLL_N 4)
elseif ("${TCORE}" STREQUAL "ARMV5")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE\t65536\n"
"#define L1_DATA_LINESIZE\t32\n"
"#define L2_SIZE\t512488\n"
"#define L2_LINESIZE\t32\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n"
"#define L2_ASSOCIATIVE\t4\n")
set(SGEMM_UNROLL_M 2)
set(SGEMM_UNROLL_N 2)
set(DGEMM_UNROLL_M 2)
set(DGEMM_UNROLL_N 2)
set(CGEMM_UNROLL_M 2)
set(CGEMM_UNROLL_N 2)
set(ZGEMM_UNROLL_M 2)
set(ZGEMM_UNROLL_N 2)
elseif ("${TCORE}" STREQUAL "ARMV6")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE\t65536\n"
"#define L1_DATA_LINESIZE\t32\n"
"#define L2_SIZE\t512488\n"
"#define L2_LINESIZE\t32\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n"
"#define L2_ASSOCIATIVE\t4\n"
"#define HAVE_VFP\n")
set(SGEMM_UNROLL_M 4)
set(SGEMM_UNROLL_N 2)
set(DGEMM_UNROLL_M 4)
set(DGEMM_UNROLL_N 2)
set(CGEMM_UNROLL_M 2)
set(CGEMM_UNROLL_N 2)
set(ZGEMM_UNROLL_M 2)
set(ZGEMM_UNROLL_N 2)
elseif ("${TCORE}" STREQUAL "ARMV7") elseif ("${TCORE}" STREQUAL "ARMV7")
file(APPEND ${TARGET_CONF_TEMP} file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE\t65536\n" "#define L1_DATA_SIZE\t65536\n"
@ -886,7 +921,11 @@ else ()
set(SGEMM_UNROLL_M 8) set(SGEMM_UNROLL_M 8)
set(SGEMM_UNROLL_N 8) set(SGEMM_UNROLL_N 8)
endif () endif ()
if ("${TCORE}" STREQUAL "CORTEXA53")
set(DGEMM_UNROLL_M 4)
else ()
set(DGEMM_UNROLL_M 8) set(DGEMM_UNROLL_M 8)
endif ()
set(DGEMM_UNROLL_N 4) set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8) set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4) set(CGEMM_UNROLL_N 4)
@ -1319,16 +1358,25 @@ else(NOT CMAKE_CROSSCOMPILING)
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH_DIR}) file(MAKE_DIRECTORY "${GETARCH_DIR}")
configure_file(${TARGET_CONF_TEMP} ${GETARCH_DIR}/${TARGET_CONF} COPYONLY) configure_file("${TARGET_CONF_TEMP}" "${GETARCH_DIR}/${TARGET_CONF}" COPYONLY)
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
try_compile(GETARCH_RESULT ${GETARCH_DIR} if (CMAKE_ASM_COMPILER_ID STREQUAL "")
SOURCES ${GETARCH_SRC} try_compile(GETARCH_RESULT "${GETARCH_DIR}"
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I"${GETARCH_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}" SOURCES ${GETARCH_SRC}
OUTPUT_VARIABLE GETARCH_LOG CMAKE_FLAGS "-DCMAKE_ASM_COMPILER=${CMAKE_C_COMPILER}"
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I"${GETARCH_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
) OUTPUT_VARIABLE GETARCH_LOG
COPY_FILE "${PROJECT_BINARY_DIR}/${GETARCH_BIN}"
)
else()
try_compile(GETARCH_RESULT "${GETARCH_DIR}"
SOURCES ${GETARCH_SRC}
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I"${GETARCH_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
OUTPUT_VARIABLE GETARCH_LOG
COPY_FILE "${PROJECT_BINARY_DIR}/${GETARCH_BIN}"
)
endif()
if (NOT ${GETARCH_RESULT}) if (NOT ${GETARCH_RESULT})
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}") MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
endif () endif ()
@ -1357,19 +1405,19 @@ execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 1 OUTPUT_VARIABLE
message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
# append config data from getarch to the TARGET file and read in CMake vars # append config data from getarch to the TARGET file and read in CMake vars
file(APPEND ${TARGET_CONF_TEMP} ${GETARCH_CONF_OUT}) file(APPEND "${TARGET_CONF_TEMP}" ${GETARCH_CONF_OUT})
ParseGetArchVars(${GETARCH_MAKE_OUT}) ParseGetArchVars(${GETARCH_MAKE_OUT})
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build") set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}") set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH2_DIR}) file(MAKE_DIRECTORY "${GETARCH2_DIR}")
configure_file(${TARGET_CONF_TEMP} ${GETARCH2_DIR}/${TARGET_CONF} COPYONLY) configure_file("${TARGET_CONF_TEMP}" "${GETARCH2_DIR}/${TARGET_CONF}" COPYONLY)
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
try_compile(GETARCH2_RESULT ${GETARCH2_DIR} try_compile(GETARCH2_RESULT "${GETARCH2_DIR}"
SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c SOURCES "${PROJECT_SOURCE_DIR}/getarch_2nd.c"
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I"${GETARCH2_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}" COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I"${GETARCH2_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
OUTPUT_VARIABLE GETARCH2_LOG OUTPUT_VARIABLE GETARCH2_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} COPY_FILE "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}"
) )
if (NOT ${GETARCH2_RESULT}) if (NOT ${GETARCH2_RESULT})
@ -1382,9 +1430,9 @@ execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 0 OUTPUT_VARIABL
execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT) execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
# append config data from getarch_2nd to the TARGET file and read in CMake vars # append config data from getarch_2nd to the TARGET file and read in CMake vars
file(APPEND ${TARGET_CONF_TEMP} ${GETARCH2_CONF_OUT}) file(APPEND "${TARGET_CONF_TEMP}" ${GETARCH2_CONF_OUT})
configure_file(${TARGET_CONF_TEMP} ${TARGET_CONF_DIR}/${TARGET_CONF} COPYONLY) configure_file("${TARGET_CONF_TEMP}" "${TARGET_CONF_DIR}/${TARGET_CONF}" COPYONLY)
ParseGetArchVars(${GETARCH2_MAKE_OUT}) ParseGetArchVars(${GETARCH2_MAKE_OUT})

View File

@ -172,9 +172,9 @@ if (DEFINED TARGET)
endif() endif()
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang") elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang")
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 8.99) if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 8.99)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake") set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake -exhaustive-register-search")
else() else()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512 -exhaustive-register-search")
endif() endif()
endif() endif()
endif() endif()
@ -188,23 +188,45 @@ if (DEFINED TARGET)
endif() endif()
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang") elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang")
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 12.0) if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 12.0)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=sapphirerapids") set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=sapphirerapids -exhaustive-register-search")
else() else()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512 -exhaustive-register-search")
endif() endif()
endif() endif()
endif() endif()
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512) if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -exhaustive-register-search")
endif()
endif() endif()
if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2)
if (((${TARGET} STREQUAL ZEN) AND HAVE_AVX512VL) AND NOT NO_AVX512)
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 12.99)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=znver4")
else()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang")
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 15.99)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=znver4")
else()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -exhaustive-register-search")
endif()
endif()
if ((${TARGET} STREQUAL HASWELL OR (${TARGET} STREQUAL ZEN AND NOT HAVE_AVX512VL)) AND NOT NO_AVX2)
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif() endif()
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2 -mfma")
endif() endif()
endif() endif()
if (DEFINED HAVE_AVX) if (DEFINED HAVE_AVX)

View File

@ -90,7 +90,7 @@ extern "C" {
#endif #endif
#include <time.h> #include <time.h>
#ifdef OS_LINUX #if defined(OS_LINUX) || defined(OS_QNX)
#include <malloc.h> #include <malloc.h>
#include <sched.h> #include <sched.h>
#endif #endif
@ -107,7 +107,7 @@ extern "C" {
#endif #endif
#endif #endif
#ifdef OS_HAIKU #if defined(OS_HAIKU) || defined(OS_QNX)
#define NO_SYSV_IPC #define NO_SYSV_IPC
#endif #endif
@ -387,6 +387,10 @@ typedef int blasint;
#endif #endif
*/ */
#ifdef __EMSCRIPTEN__
#define YIELDING
#endif
#ifndef YIELDING #ifndef YIELDING
#define YIELDING sched_yield() #define YIELDING sched_yield()
#endif #endif

View File

@ -43,7 +43,7 @@
#define MB asm("mb") #define MB asm("mb")
#define WMB asm("wmb") #define WMB asm("wmb")
#define RMB asm("rmb") #define RMB asm("mb")
static void __inline blas_lock(unsigned long *address){ static void __inline blas_lock(unsigned long *address){
#ifndef __DECC #ifndef __DECC

View File

@ -2612,7 +2612,7 @@
#ifndef ASSEMBLER #ifndef ASSEMBLER
#if !defined(DYNAMIC_ARCH) \ #if !defined(DYNAMIC_ARCH) \
&& (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \ && (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K)) || defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) || defined(ARCH_ALPHA))
extern BLASLONG gemm_offset_a; extern BLASLONG gemm_offset_a;
extern BLASLONG gemm_offset_b; extern BLASLONG gemm_offset_b;
extern BLASLONG sbgemm_p; extern BLASLONG sbgemm_p;

View File

@ -86,7 +86,9 @@ static inline unsigned int rpcc(void){
//__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory"); //__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
//ret=tmp; //ret=tmp;
__asm__ __volatile__(".set push \n" __asm__ __volatile__(".set push \n"
#if !defined(__mips_isa_rev) || __mips_isa_rev < 2
".set mips32r2\n" ".set mips32r2\n"
#endif
"rdhwr %0, $2\n" "rdhwr %0, $2\n"
".set pop": "=r"(ret):: "memory"); ".set pop": "=r"(ret):: "memory");
@ -99,7 +101,9 @@ static inline unsigned int rpcc(void){
static inline int WhereAmI(void){ static inline int WhereAmI(void){
int ret=0; int ret=0;
__asm__ __volatile__(".set push \n" __asm__ __volatile__(".set push \n"
#if !defined(__mips_isa_rev) || __mips_isa_rev < 2
".set mips32r2\n" ".set mips32r2\n"
#endif
"rdhwr %0, $0\n" "rdhwr %0, $0\n"
".set pop": "=r"(ret):: "memory"); ".set pop": "=r"(ret):: "memory");
return ret; return ret;
@ -197,9 +201,15 @@ static inline int blas_quickdivide(blasint x, blasint y){
#if defined(ASSEMBLER) && !defined(NEEDPARAM) #if defined(ASSEMBLER) && !defined(NEEDPARAM)
#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
#define ASSEMBLER_ARCH mips64r6
#else
#define ASSEMBLER_ARCH mips64
#endif
#define PROLOGUE \ #define PROLOGUE \
.text ;\ .text ;\
.set mips64 ;\ .set ASSEMBLER_ARCH ;\
.align 5 ;\ .align 5 ;\
.globl REALNAME ;\ .globl REALNAME ;\
.ent REALNAME ;\ .ent REALNAME ;\

View File

@ -47,9 +47,10 @@ typedef struct {
int dtb_entries; int dtb_entries;
int offsetA, offsetB, align; int offsetA, offsetB, align;
#ifdef BUILD_BFLOAT16 #if BUILD_BFLOAT16 == 1
int sbgemm_p, sbgemm_q, sbgemm_r; int sbgemm_p, sbgemm_q, sbgemm_r;
int sbgemm_unroll_m, sbgemm_unroll_n, sbgemm_unroll_mn; int sbgemm_unroll_m, sbgemm_unroll_n, sbgemm_unroll_mn;
int sbgemm_align_k;
void (*sbstobf16_k) (BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG); void (*sbstobf16_k) (BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG);
void (*sbdtobf16_k) (BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG); void (*sbdtobf16_k) (BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG);
@ -160,51 +161,59 @@ BLASLONG (*isbmin_k) (BLASLONG, float *, BLASLONG);
#endif #endif
#endif #endif
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) #if (BUILD_SINGLE == 1) || (BUILD_DOUBLE == 1) || (BUILD_COMPLEX == 1) || (BUILD_COMPLEX16 == 1)
int sgemm_p, sgemm_q, sgemm_r; int sgemm_p, sgemm_q, sgemm_r;
int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn; int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn;
#endif #endif
int exclusive_cache; int exclusive_cache;
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) #if (BUILD_SINGLE == 1) || (BUILD_COMPLEX == 1)
float (*samax_k) (BLASLONG, float *, BLASLONG); float (*samax_k) (BLASLONG, float *, BLASLONG);
float (*samin_k) (BLASLONG, float *, BLASLONG); float (*samin_k) (BLASLONG, float *, BLASLONG);
float (*smax_k) (BLASLONG, float *, BLASLONG); float (*smax_k) (BLASLONG, float *, BLASLONG);
float (*smin_k) (BLASLONG, float *, BLASLONG); float (*smin_k) (BLASLONG, float *, BLASLONG);
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE ==1) || (BUILD_COMPLEX==1)
BLASLONG (*isamax_k)(BLASLONG, float *, BLASLONG); BLASLONG (*isamax_k)(BLASLONG, float *, BLASLONG);
#endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
BLASLONG (*isamin_k)(BLASLONG, float *, BLASLONG); BLASLONG (*isamin_k)(BLASLONG, float *, BLASLONG);
BLASLONG (*ismax_k) (BLASLONG, float *, BLASLONG); BLASLONG (*ismax_k) (BLASLONG, float *, BLASLONG);
BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
float (*snrm2_k) (BLASLONG, float *, BLASLONG); float (*snrm2_k) (BLASLONG, float *, BLASLONG);
float (*sasum_k) (BLASLONG, float *, BLASLONG); float (*sasum_k) (BLASLONG, float *, BLASLONG);
#endif #endif
#ifdef BUILD_SINGLE #if (BUILD_SINGLE==1)
float (*ssum_k) (BLASLONG, float *, BLASLONG); float (*ssum_k) (BLASLONG, float *, BLASLONG);
#endif #endif
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
int (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
float (*sdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); float (*sdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
//double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); //double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float); int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
int (*sscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*sscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
int (*sswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*sswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
#endif #endif
#ifdef BUILD_SINGLE #if (BUILD_SINGLE==1)
int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
#endif #endif
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
#ifdef ARCH_X86_64 #ifdef ARCH_X86_64
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG); void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K); int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K);
@ -219,7 +228,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
#endif #endif
#ifdef BUILD_SINGLE #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
#ifdef SMALL_MATRIX_OPT #ifdef SMALL_MATRIX_OPT
int (*sgemm_small_matrix_permit)(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float beta); int (*sgemm_small_matrix_permit)(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float beta);
@ -255,7 +264,8 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*strsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); int (*strsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
int (*strsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); int (*strsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
int (*strsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); int (*strsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
#endif
#if (BUILD_SINGLE==1)
int (*strmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); int (*strmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); int (*strmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); int (*strmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
@ -287,12 +297,12 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*slaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *); int (*slaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
#endif #endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
int dgemm_p, dgemm_q, dgemm_r; int dgemm_p, dgemm_q, dgemm_r;
int dgemm_unroll_m, dgemm_unroll_n, dgemm_unroll_mn; int dgemm_unroll_m, dgemm_unroll_n, dgemm_unroll_mn;
#endif #endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
double (*damax_k) (BLASLONG, double *, BLASLONG); double (*damax_k) (BLASLONG, double *, BLASLONG);
double (*damin_k) (BLASLONG, double *, BLASLONG); double (*damin_k) (BLASLONG, double *, BLASLONG);
double (*dmax_k) (BLASLONG, double *, BLASLONG); double (*dmax_k) (BLASLONG, double *, BLASLONG);
@ -301,23 +311,21 @@ BLASLONG (*idamax_k)(BLASLONG, double *, BLASLONG);
BLASLONG (*idamin_k)(BLASLONG, double *, BLASLONG); BLASLONG (*idamin_k)(BLASLONG, double *, BLASLONG);
BLASLONG (*idmax_k) (BLASLONG, double *, BLASLONG); BLASLONG (*idmax_k) (BLASLONG, double *, BLASLONG);
BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG); BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
double (*dnrm2_k) (BLASLONG, double *, BLASLONG); double (*dnrm2_k) (BLASLONG, double *, BLASLONG);
double (*dasum_k) (BLASLONG, double *, BLASLONG); double (*dasum_k) (BLASLONG, double *, BLASLONG);
#endif #endif
#ifdef BUILD_DOUBLE #if (BUILD_DOUBLE==1)
double (*dsum_k) (BLASLONG, double *, BLASLONG); double (*dsum_k) (BLASLONG, double *, BLASLONG);
#endif #endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
int (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
double (*ddot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG); double (*ddot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
#endif #endif
#if defined (BUILD_SINGLE) || defined(BUILD_DOUBLE) #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
#endif #endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double); int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
@ -325,13 +333,13 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
#endif #endif
#ifdef BUILD_DOUBLE #if (BUILD_DOUBLE==1)
int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
#endif #endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG); int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
@ -340,7 +348,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *); int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *); int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
#endif #endif
#ifdef BUILD_DOUBLE #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
#ifdef SMALL_MATRIX_OPT #ifdef SMALL_MATRIX_OPT
int (*dgemm_small_matrix_permit)(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, double alpha, double beta); int (*dgemm_small_matrix_permit)(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, double alpha, double beta);
@ -354,6 +362,8 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
int (*dgemm_small_kernel_b0_tn )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc); int (*dgemm_small_kernel_b0_tn )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
int (*dgemm_small_kernel_b0_tt )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc); int (*dgemm_small_kernel_b0_tt )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
#endif #endif
#endif
#if (BUILD_DOUBLE==1)
int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
int (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); int (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
@ -500,23 +510,25 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX #if (BUILD_COMPLEX==1)
int cgemm_p, cgemm_q, cgemm_r; int cgemm_p, cgemm_q, cgemm_r;
int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn; int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn;
float (*camax_k) (BLASLONG, float *, BLASLONG); float (*camax_k) (BLASLONG, float *, BLASLONG);
float (*camin_k) (BLASLONG, float *, BLASLONG); float (*camin_k) (BLASLONG, float *, BLASLONG);
BLASLONG (*icamax_k)(BLASLONG, float *, BLASLONG);
BLASLONG (*icamax_k)(BLASLONG, float *, BLASLONG);
BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG); BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
float (*cnrm2_k) (BLASLONG, float *, BLASLONG); float (*cnrm2_k) (BLASLONG, float *, BLASLONG);
float (*casum_k) (BLASLONG, float *, BLASLONG); float (*casum_k) (BLASLONG, float *, BLASLONG);
float (*csum_k) (BLASLONG, float *, BLASLONG); float (*csum_k) (BLASLONG, float *, BLASLONG);
int (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
int (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*caxpyc_k)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*caxpyc_k)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*cscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*cscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
@ -710,7 +722,7 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *); int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
#endif #endif
#ifdef BUILD_COMPLEX16 #if (BUILD_COMPLEX16 == 1)
int zgemm_p, zgemm_q, zgemm_r; int zgemm_p, zgemm_q, zgemm_r;
int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn; int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn;
@ -1092,34 +1104,34 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
void (*init)(void); void (*init)(void);
int snum_opt, dnum_opt, qnum_opt; int snum_opt, dnum_opt, qnum_opt;
#ifdef BUILD_SINGLE #if (BUILD_SINGLE==1)
int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG); int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG);
#endif #endif
#ifdef BUILD_DOUBLE #if (BUILD_DOUBLE==1)
int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG); int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX #if (BUILD_COMPLEX==1)
int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG); int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX16 #if (BUILD_COMPLEX16==1)
int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG); int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG);
#endif #endif
#ifdef BUILD_SINGLE #if (BUILD_SINGLE==1)
int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
#endif #endif
#ifdef BUILD_DOUBLE #if (BUILD_DOUBLE==1)
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX #if (BUILD_COMPLEX==1)
int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
@ -1131,7 +1143,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX16 #if (BUILD_COMPLEX16==1)
int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
@ -1143,21 +1155,21 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
#endif #endif
#ifdef BUILD_SINGLE #if (BUILD_SINGLE==1)
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG); int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG); int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG); int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG); int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG);
#endif #endif
#ifdef BUILD_DOUBLE #if (BUILD_DOUBLE==1)
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG); int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG); int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG); int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG); int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX #if (BUILD_COMPLEX==1)
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
@ -1169,7 +1181,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX16 #if (BUILD_COMPLEX16==1)
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
@ -1181,16 +1193,16 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
#endif #endif
#ifdef BUILD_SINGLE #if (BUILD_SINGLE==1)
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
#endif #endif
#ifdef BUILD_DOUBLE #if (BUILD_DOUBLE==1)
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX #if (BUILD_COMPLEX==1)
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG); int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX16 #if (BUILD_COMPLEX16==1)
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG); int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
#endif #endif
} gotoblas_t; } gotoblas_t;
@ -1206,7 +1218,7 @@ extern gotoblas_t *gotoblas;
#define HAVE_EX_L2 gotoblas -> exclusive_cache #define HAVE_EX_L2 gotoblas -> exclusive_cache
#ifdef BUILD_BFLOAT16 #if (BUILD_BFLOAT16==1)
#define SBGEMM_P gotoblas -> sbgemm_p #define SBGEMM_P gotoblas -> sbgemm_p
#define SBGEMM_Q gotoblas -> sbgemm_q #define SBGEMM_Q gotoblas -> sbgemm_q
#define SBGEMM_R gotoblas -> sbgemm_r #define SBGEMM_R gotoblas -> sbgemm_r
@ -1215,7 +1227,7 @@ extern gotoblas_t *gotoblas;
#define SBGEMM_UNROLL_MN gotoblas -> sbgemm_unroll_mn #define SBGEMM_UNROLL_MN gotoblas -> sbgemm_unroll_mn
#endif #endif
#if defined (BUILD_SINGLE) #if (BUILD_SINGLE==1)
#define SGEMM_P gotoblas -> sgemm_p #define SGEMM_P gotoblas -> sgemm_p
#define SGEMM_Q gotoblas -> sgemm_q #define SGEMM_Q gotoblas -> sgemm_q
#define SGEMM_R gotoblas -> sgemm_r #define SGEMM_R gotoblas -> sgemm_r
@ -1224,30 +1236,14 @@ extern gotoblas_t *gotoblas;
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn #define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
#endif #endif
#if defined (BUILD_DOUBLE) #if (BUILD_DOUBLE==1)
#define DGEMM_P gotoblas -> dgemm_p #define DGEMM_P gotoblas -> dgemm_p
#define DGEMM_Q gotoblas -> dgemm_q #define DGEMM_Q gotoblas -> dgemm_q
#define DGEMM_R gotoblas -> dgemm_r #define DGEMM_R gotoblas -> dgemm_r
#define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m #define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n #define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn #define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
#endif #if (BUILD_SINGLE != 1)
#define QGEMM_P gotoblas -> qgemm_p
#define QGEMM_Q gotoblas -> qgemm_q
#define QGEMM_R gotoblas -> qgemm_r
#define QGEMM_UNROLL_M gotoblas -> qgemm_unroll_m
#define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n
#define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn
#ifdef BUILD_COMPLEX
#define CGEMM_P gotoblas -> cgemm_p
#define CGEMM_Q gotoblas -> cgemm_q
#define CGEMM_R gotoblas -> cgemm_r
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
#ifndef BUILD_SINGLE
#define SGEMM_P gotoblas -> sgemm_p #define SGEMM_P gotoblas -> sgemm_p
#define SGEMM_Q gotoblas -> sgemm_q #define SGEMM_Q gotoblas -> sgemm_q
#define SGEMM_R 1024 #define SGEMM_R 1024
@ -1257,14 +1253,38 @@ extern gotoblas_t *gotoblas;
#endif #endif
#endif #endif
#ifdef BUILD_COMPLEX16 #define QGEMM_P gotoblas -> qgemm_p
#define QGEMM_Q gotoblas -> qgemm_q
#define QGEMM_R gotoblas -> qgemm_r
#define QGEMM_UNROLL_M gotoblas -> qgemm_unroll_m
#define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n
#define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn
#if (BUILD_COMPLEX==1)
#define CGEMM_P gotoblas -> cgemm_p
#define CGEMM_Q gotoblas -> cgemm_q
#define CGEMM_R gotoblas -> cgemm_r
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
#if (BUILD_SINGLE != 1)
#define SGEMM_P gotoblas -> sgemm_p
#define SGEMM_Q gotoblas -> sgemm_q
#define SGEMM_R 1024
#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m
#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
#endif
#endif
#if (BUILD_COMPLEX16==1)
#define ZGEMM_P gotoblas -> zgemm_p #define ZGEMM_P gotoblas -> zgemm_p
#define ZGEMM_Q gotoblas -> zgemm_q #define ZGEMM_Q gotoblas -> zgemm_q
#define ZGEMM_R gotoblas -> zgemm_r #define ZGEMM_R gotoblas -> zgemm_r
#define ZGEMM_UNROLL_M gotoblas -> zgemm_unroll_m #define ZGEMM_UNROLL_M gotoblas -> zgemm_unroll_m
#define ZGEMM_UNROLL_N gotoblas -> zgemm_unroll_n #define ZGEMM_UNROLL_N gotoblas -> zgemm_unroll_n
#define ZGEMM_UNROLL_MN gotoblas -> zgemm_unroll_mn #define ZGEMM_UNROLL_MN gotoblas -> zgemm_unroll_mn
#ifndef BUILD_DOUBLE #if (BUILD_DOUBLE != 1)
#define DGEMM_P gotoblas -> dgemm_p #define DGEMM_P gotoblas -> dgemm_p
#define DGEMM_Q gotoblas -> dgemm_q #define DGEMM_Q gotoblas -> dgemm_q
#define DGEMM_R 1024 #define DGEMM_R 1024
@ -1272,6 +1292,14 @@ extern gotoblas_t *gotoblas;
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n #define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn #define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
#endif #endif
#if (BUILD_COMPLEX != 1)
#define CGEMM_P gotoblas -> cgemm_p
#define CGEMM_Q gotoblas -> cgemm_q
#define CGEMM_R gotoblas -> cgemm_r
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
#endif
#endif #endif
#define XGEMM_P gotoblas -> xgemm_p #define XGEMM_P gotoblas -> xgemm_p
@ -1318,7 +1346,7 @@ extern gotoblas_t *gotoblas;
#define HAVE_EX_L2 0 #define HAVE_EX_L2 0
#endif #endif
#ifdef BUILD_BFLOAT16 #if (BUILD_BFLOAT16 == 1)
#define SBGEMM_P SBGEMM_DEFAULT_P #define SBGEMM_P SBGEMM_DEFAULT_P
#define SBGEMM_Q SBGEMM_DEFAULT_Q #define SBGEMM_Q SBGEMM_DEFAULT_Q
#define SBGEMM_R SBGEMM_DEFAULT_R #define SBGEMM_R SBGEMM_DEFAULT_R

View File

@ -53,6 +53,7 @@ extern void goto_set_num_threads(int nthreads);
/* Global Parameter */ /* Global Parameter */
extern int blas_cpu_number; extern int blas_cpu_number;
extern int blas_num_threads; extern int blas_num_threads;
extern int blas_num_threads_set;
extern int blas_omp_linked; extern int blas_omp_linked;
#define BLAS_LEGACY 0x8000U #define BLAS_LEGACY 0x8000U
@ -139,7 +140,11 @@ extern int blas_server_avail;
static __inline int num_cpu_avail(int level) { static __inline int num_cpu_avail(int level) {
#ifdef USE_OPENMP #ifdef USE_OPENMP
int openmp_nthreads=omp_get_max_threads(); int openmp_nthreads;
if (blas_num_threads_set == 0)
openmp_nthreads=omp_get_max_threads();
else
openmp_nthreads=blas_cpu_number;
#endif #endif
#ifndef USE_OPENMP #ifndef USE_OPENMP

View File

@ -59,6 +59,11 @@ void get_subarchitecture(void){
printf("ev%d", implver() + 4); printf("ev%d", implver() + 4);
} }
void get_corename(void){
printf("EV%d", implver() + 4);
}
void get_subdirname(void){ void get_subdirname(void){
printf("alpha"); printf("alpha");
} }

View File

@ -202,10 +202,14 @@ int detect(void)
return CPU_CORTEXA510; return CPU_CORTEXA510;
else if (strstr(cpu_part, "0xd47")) else if (strstr(cpu_part, "0xd47"))
return CPU_CORTEXA710; return CPU_CORTEXA710;
else if (strstr(cpu_part, "0xd4d")) //A715
return CPU_CORTEXA710;
else if (strstr(cpu_part, "0xd44")) else if (strstr(cpu_part, "0xd44"))
return CPU_CORTEXX1; return CPU_CORTEXX1;
else if (strstr(cpu_part, "0xd4c")) else if (strstr(cpu_part, "0xd4c"))
return CPU_CORTEXX2; return CPU_CORTEXX2;
else if (strstr(cpu_part, "0xd4e")) //X3
return CPU_CORTEXX2;
} }
// Qualcomm // Qualcomm
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))

View File

@ -165,7 +165,9 @@ void get_cpuconfig(void){
}else{ }else{
printf("#define UNKNOWN\n"); printf("#define UNKNOWN\n");
} }
if (!get_feature("msa")) printf("#define NO_MSA\n"); #ifndef NO_MSA
if (get_feature("msa")) printf("#define HAVE_MSA\n");
#endif
} }
void get_libname(void){ void get_libname(void){

View File

@ -70,16 +70,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/
#define CPU_UNKNOWN 0 #define CPU_UNKNOWN 0
#define CPU_SICORTEX 1 #define CPU_MIPS64_GENERIC 1
#define CPU_LOONGSON3R3 2 #define CPU_SICORTEX 2
#define CPU_LOONGSON3R4 3 #define CPU_LOONGSON3R3 3
#define CPU_I6400 4 #define CPU_LOONGSON3R4 4
#define CPU_P6600 5 #define CPU_I6400 5
#define CPU_I6500 6 #define CPU_P6600 6
#define CPU_I6500 7
static char *cpuname[] = { static char *cpuname[] = {
"UNKNOWN", "UNKNOWN",
"MIPS64_GENERIC"
"SICORTEX", "SICORTEX",
"LOONGSON3R3", "LOONGSON3R3",
"LOONGSON3R4", "LOONGSON3R4",
@ -113,8 +115,11 @@ int detect(void){
return CPU_SICORTEX; return CPU_SICORTEX;
} }
} }
return CPU_MIPS64_GENERIC;
#else
return CPU_UNKNOWN;
#endif #endif
return CPU_UNKNOWN;
} }
char *get_corename(void){ char *get_corename(void){
@ -136,9 +141,11 @@ void get_subarchitecture(void){
printf("P6600"); printf("P6600");
}else if(detect()==CPU_I6500){ }else if(detect()==CPU_I6500){
printf("I6500"); printf("I6500");
}else{ }else if(detect()==CPU_SICORTEX){
printf("SICORTEX"); printf("SICORTEX");
} }else{
printf("MIPS64_GENERIC");
}
} }
void get_subdirname(void){ void get_subdirname(void){
@ -201,7 +208,9 @@ void get_cpuconfig(void){
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 8\n"); printf("#define L2_ASSOCIATIVE 8\n");
} }
if (!get_feature("msa")) printf("#define NO_MSA\n"); #ifndef NO_MSA
if (get_feature("msa")) printf("#define HAVE_MSA\n");
#endif
} }
void get_libname(void){ void get_libname(void){
@ -215,8 +224,8 @@ void get_libname(void){
printf("p6600\n"); printf("p6600\n");
}else if(detect()==CPU_I6500) { }else if(detect()==CPU_I6500) {
printf("i6500\n"); printf("i6500\n");
}else{ }else {
printf("mips64\n"); printf("mips64_generic\n");
} }
} }

View File

@ -1544,6 +1544,17 @@ int get_cpuname(void){
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
} }
break; break;
case 11: //family 6 exmodel 11
switch (model) {
case 7: // Raptor Lake
if(support_avx2())
return CPUTYPE_HASWELL;
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
else
return CPUTYPE_NEHALEM;
}
break;
} }
break; break;
case 0x7: case 0x7:
@ -2334,6 +2345,18 @@ int get_coretype(void){
return CORE_NEHALEM; return CORE_NEHALEM;
} }
case 11:
switch (model) {
case 7: // Raptor Lake
#ifndef NO_AVX2
if(support_avx2())
return CORE_HASWELL;
#endif
if(support_avx())
return CORE_SANDYBRIDGE;
else
return CORE_NEHALEM;
}
case 15: case 15:
if (model <= 0x2) return CORE_NORTHWOOD; if (model <= 0x2) return CORE_NORTHWOOD;
else return CORE_PRESCOTT; else return CORE_PRESCOTT;

View File

@ -173,3 +173,8 @@ HAVE_C11
ARCH_E2K ARCH_E2K
#endif #endif
#if defined(__EMSCRIPTEN__)
ARCH_RISCV64
OS_WINDOWS
#endif

View File

@ -40,7 +40,7 @@ else()
c_${float_char}blas1.c) c_${float_char}blas1.c)
endif() endif()
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}) target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME})
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
target_link_libraries(x${float_char}cblat1 m) target_link_libraries(x${float_char}cblat1 m)
endif() endif()
add_test(NAME "x${float_char}cblat1" add_test(NAME "x${float_char}cblat1"
@ -65,7 +65,7 @@ else()
constant.c) constant.c)
endif() endif()
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}) target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME})
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
target_link_libraries(x${float_char}cblat2 m) target_link_libraries(x${float_char}cblat2 m)
endif() endif()
add_test(NAME "x${float_char}cblat2" add_test(NAME "x${float_char}cblat2"
@ -90,7 +90,7 @@ else()
constant.c) constant.c)
endif() endif()
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}) target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME})
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
target_link_libraries(x${float_char}cblat3 m) target_link_libraries(x${float_char}cblat3 m)
endif() endif()
add_test(NAME "x${float_char}cblat3" add_test(NAME "x${float_char}cblat3"

View File

@ -237,7 +237,7 @@ endif
ifeq ($(BUILD_DOUBLE),1) ifeq ($(BUILD_DOUBLE),1)
# Double real # Double real
ifeq ($(NOFORTRAN),0) ifeq ($(NOFORTRAN), $(filter 0 2,$(NOFORTRAN)))
xdcblat1: $(dtestl1o) c_dblat1.o $(TOPDIR)/$(LIBNAME) xdcblat1: $(dtestl1o) c_dblat1.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xdcblat1 c_dblat1.o $(dtestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xdcblat1 c_dblat1.o $(dtestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xdcblat2: $(dtestl2o) c_dblat2.o $(TOPDIR)/$(LIBNAME) xdcblat2: $(dtestl2o) c_dblat2.o $(TOPDIR)/$(LIBNAME)
@ -256,7 +256,7 @@ endif
ifeq ($(BUILD_COMPLEX),1) ifeq ($(BUILD_COMPLEX),1)
# Single complex # Single complex
ifeq ($(NOFORTRAN),0) ifeq ($(NOFORTRAN), $(filter 0 2,$(NOFORTRAN)))
xccblat1: $(ctestl1o) c_cblat1.o $(TOPDIR)/$(LIBNAME) xccblat1: $(ctestl1o) c_cblat1.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xccblat1 c_cblat1.o $(ctestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xccblat1 c_cblat1.o $(ctestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xccblat2: $(ctestl2o) c_cblat2.o $(TOPDIR)/$(LIBNAME) xccblat2: $(ctestl2o) c_cblat2.o $(TOPDIR)/$(LIBNAME)
@ -278,7 +278,7 @@ endif
ifeq ($(BUILD_COMPLEX16),1) ifeq ($(BUILD_COMPLEX16),1)
# Double complex # Double complex
ifeq ($(NOFORTRAN),0) ifeq ($(NOFORTRAN), $(filter 0 2,$(NOFORTRAN)))
xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME) xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xzcblat1 c_zblat1.o $(ztestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xzcblat1 c_zblat1.o $(ztestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xzcblat2: $(ztestl2o) c_zblat2.o $(TOPDIR)/$(LIBNAME) xzcblat2: $(ztestl2o) c_zblat2.o $(TOPDIR)/$(LIBNAME)

View File

@ -969,7 +969,7 @@ real *sfac;
1.17 }; 1.17 };
/* Local variables */ /* Local variables */
extern /* Subroutine */ srottest_(); extern /* Subroutine */ void srottest_();
static integer i__, k, ksize; static integer i__, k, ksize;
extern /* Subroutine */ int stest_(), srotmtest_(); extern /* Subroutine */ int stest_(), srotmtest_();
static integer ki, kn; static integer ki, kn;

View File

@ -304,6 +304,15 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
while (gemm_p * min_l > l2size) gemm_p -= GEMM_UNROLL_M; while (gemm_p * min_l > l2size) gemm_p -= GEMM_UNROLL_M;
} }
BLASLONG pad_min_l = min_l;
#if defined(HALF)
#if defined(DYNAMIC_ARCH)
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
#else
pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);;
#endif
#endif
/* First, we have to move data A to L2 cache */ /* First, we have to move data A to L2 cache */
min_i = m_to - m_from; min_i = m_to - m_from;
l1stride = 1; l1stride = 1;
@ -350,7 +359,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
START_RPCC(); START_RPCC();
OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs, OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs,
sb + min_l * (jjs - js) * COMPSIZE * l1stride); sb + pad_min_l * (jjs - js) * COMPSIZE * l1stride);
STOP_RPCC(outercost); STOP_RPCC(outercost);
@ -358,10 +367,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#if !defined(XDOUBLE) || !defined(QUAD_PRECISION) #if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
KERNEL_OPERATION(min_i, min_jj, min_l, alpha, KERNEL_OPERATION(min_i, min_jj, min_l, alpha,
sa, sb + min_l * (jjs - js) * COMPSIZE * l1stride, c, ldc, m_from, jjs); sa, sb + pad_min_l * (jjs - js) * COMPSIZE * l1stride, c, ldc, m_from, jjs);
#else #else
KERNEL_OPERATION(min_i, min_jj, min_l, (void *)&xalpha, KERNEL_OPERATION(min_i, min_jj, min_l, (void *)&xalpha,
sa, sb + min_l * (jjs - js) * COMPSIZE * l1stride, c, ldc, m_from, jjs); sa, sb + pad_min_l * (jjs - js) * COMPSIZE * l1stride, c, ldc, m_from, jjs);
#endif #endif
STOP_RPCC(kernelcost); STOP_RPCC(kernelcost);

View File

@ -324,6 +324,16 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
} else { } else {
if (min_l > GEMM_Q) min_l = (min_l + 1) / 2; if (min_l > GEMM_Q) min_l = (min_l + 1) / 2;
} }
BLASLONG pad_min_l = min_l;
#if defined(HALF)
#if defined(DYNAMIC_ARCH)
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
#else
pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);;
#endif
#endif
/* Determine step size in m /* Determine step size in m
* Note: We are currently on the first step in m * Note: We are currently on the first step in m
@ -382,13 +392,13 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
/* Copy part of local region of B into workspace */ /* Copy part of local region of B into workspace */
START_RPCC(); START_RPCC();
OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs, OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs,
buffer[bufferside] + min_l * (jjs - js) * COMPSIZE * l1stride); buffer[bufferside] + pad_min_l * (jjs - js) * COMPSIZE * l1stride);
STOP_RPCC(copy_B); STOP_RPCC(copy_B);
/* Apply kernel with local region of A and part of local region of B */ /* Apply kernel with local region of A and part of local region of B */
START_RPCC(); START_RPCC();
KERNEL_OPERATION(min_i, min_jj, min_l, alpha, KERNEL_OPERATION(min_i, min_jj, min_l, alpha,
sa, buffer[bufferside] + min_l * (jjs - js) * COMPSIZE * l1stride, sa, buffer[bufferside] + pad_min_l * (jjs - js) * COMPSIZE * l1stride,
c, ldc, m_from, jjs); c, ldc, m_from, jjs);
STOP_RPCC(kernel); STOP_RPCC(kernel);

View File

@ -470,9 +470,13 @@ blas_queue_t *tscq;
#endif #endif
#ifdef CONSISTENT_FPCSR #ifdef CONSISTENT_FPCSR
#ifdef __aarch64__
__asm__ __volatile__ ("msr fpcr, %0" : : "r" (queue -> sse_mode));
#else
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode)); __asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode)); __asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
#endif #endif
#endif
#ifdef MONITOR #ifdef MONITOR
main_status[cpu] = MAIN_RUNNING1; main_status[cpu] = MAIN_RUNNING1;
@ -746,9 +750,13 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
queue -> position = pos; queue -> position = pos;
#ifdef CONSISTENT_FPCSR #ifdef CONSISTENT_FPCSR
#ifdef __aarch64__
__asm__ __volatile__ ("mrs %0, fpcr" : "=r" (queue -> sse_mode));
#else
__asm__ __volatile__ ("fnstcw %0" : "=m" (queue -> x87_mode)); __asm__ __volatile__ ("fnstcw %0" : "=m" (queue -> x87_mode));
__asm__ __volatile__ ("stmxcsr %0" : "=m" (queue -> sse_mode)); __asm__ __volatile__ ("stmxcsr %0" : "=m" (queue -> sse_mode));
#endif #endif
#endif
#if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST) #if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST)

View File

@ -69,6 +69,8 @@
int blas_server_avail = 0; int blas_server_avail = 0;
extern int openblas_omp_adaptive_env();
static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER]; static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
#ifdef HAVE_C11 #ifdef HAVE_C11
static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
@ -98,6 +100,8 @@ static void adjust_thread_buffers() {
void goto_set_num_threads(int num_threads) { void goto_set_num_threads(int num_threads) {
blas_num_threads_set = 1;
if (num_threads < 0) blas_num_threads_set = 0;
if (num_threads < 1) num_threads = blas_num_threads; if (num_threads < 1) num_threads = blas_num_threads;
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER; if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
@ -108,8 +112,6 @@ void goto_set_num_threads(int num_threads) {
blas_cpu_number = num_threads; blas_cpu_number = num_threads;
omp_set_num_threads(blas_cpu_number);
adjust_thread_buffers(); adjust_thread_buffers();
#if defined(ARCH_MIPS64) #if defined(ARCH_MIPS64)
//set parameters for different number of threads. //set parameters for different number of threads.
@ -282,8 +284,12 @@ static void exec_threads(blas_queue_t *queue, int buf_index){
sb = queue -> sb; sb = queue -> sb;
#ifdef CONSISTENT_FPCSR #ifdef CONSISTENT_FPCSR
#ifdef __aarch64__
__asm__ __volatile__ ("msr fpcr, %0" : : "r" (queue -> sse_mode));
#else
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode)); __asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode)); __asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
#endif
#endif #endif
if ((sa == NULL) && (sb == NULL) && ((queue -> mode & BLAS_PTHREAD) == 0)) { if ((sa == NULL) && (sb == NULL) && ((queue -> mode & BLAS_PTHREAD) == 0)) {
@ -381,8 +387,12 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
#ifdef CONSISTENT_FPCSR #ifdef CONSISTENT_FPCSR
for (i = 0; i < num; i ++) { for (i = 0; i < num; i ++) {
#ifdef __aarch64__
__asm__ __volatile__ ("mrs %0, fpcr" : "=r" (queue[i].sse_mode));
#else
__asm__ __volatile__ ("fnstcw %0" : "=m" (queue[i].x87_mode)); __asm__ __volatile__ ("fnstcw %0" : "=m" (queue[i].x87_mode));
__asm__ __volatile__ ("stmxcsr %0" : "=m" (queue[i].sse_mode)); __asm__ __volatile__ ("stmxcsr %0" : "=m" (queue[i].sse_mode));
#endif
} }
#endif #endif

View File

@ -278,12 +278,15 @@ static DWORD WINAPI blas_thread_server(void *arg){
} else } else
#endif #endif
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
#ifdef BUILD_DOUBLE
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double) sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
#ifdef BUILD_SINGLE
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float) sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else { } else {
/* Other types in future */ /* Other types in future */
} }
@ -295,11 +298,15 @@ static DWORD WINAPI blas_thread_server(void *arg){
} else } else
#endif #endif
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
#ifdef BUILD_COMPLEX16
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double) sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
#ifdef BUILD_COMPLEX
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float) sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else { } else {
/* Other types in future */ /* Other types in future */
} }

View File

@ -1018,7 +1018,7 @@ static gotoblas_t *force_coretype(char *coretype){
char message[128]; char message[128];
//char mname[20]; //char mname[20];
for ( i=1 ; i <= 24; i++) for ( i=1 ; i <= 25; i++)
{ {
if (!strncasecmp(coretype,corename[i],20)) if (!strncasecmp(coretype,corename[i],20))
{ {

View File

@ -125,8 +125,13 @@ extern gotoblas_t gotoblas_THUNDERX2T99;
extern gotoblas_t gotoblas_TSV110; extern gotoblas_t gotoblas_TSV110;
extern gotoblas_t gotoblas_EMAG8180; extern gotoblas_t gotoblas_EMAG8180;
extern gotoblas_t gotoblas_NEOVERSEN1; extern gotoblas_t gotoblas_NEOVERSEN1;
#ifndef NO_SVE
extern gotoblas_t gotoblas_NEOVERSEV1; extern gotoblas_t gotoblas_NEOVERSEV1;
extern gotoblas_t gotoblas_NEOVERSEN2; extern gotoblas_t gotoblas_NEOVERSEN2;
#else
#define gotoblas_NEOVERSEV1 gotoblas_ARMV8
#define gotoblas_NEOVERSEN2 gotoblas_ARMV8
#endif
extern gotoblas_t gotoblas_THUNDERX3T110; extern gotoblas_t gotoblas_THUNDERX3T110;
extern gotoblas_t gotoblas_CORTEXA55; extern gotoblas_t gotoblas_CORTEXA55;
#endif #endif
@ -237,7 +242,7 @@ static gotoblas_t *get_coretype(void) {
p = (char *) NULL ; p = (char *) NULL ;
infile = fopen("/sys/devices/system/cpu/cpu0/regs/identification/midr_el1","r"); infile = fopen("/sys/devices/system/cpu/cpu0/regs/identification/midr_el1","r");
if (!infile) return NULL; if (!infile) return NULL;
fgets(buffer, sizeof(buffer), infile); (void)fgets(buffer, sizeof(buffer), infile);
midr_el1=strtoul(buffer,NULL,16); midr_el1=strtoul(buffer,NULL,16);
fclose(infile); fclose(infile);
#else #else
@ -274,10 +279,12 @@ static gotoblas_t *get_coretype(void) {
return &gotoblas_CORTEXA73; return &gotoblas_CORTEXA73;
case 0xd0c: // Neoverse N1 case 0xd0c: // Neoverse N1
return &gotoblas_NEOVERSEN1; return &gotoblas_NEOVERSEN1;
#ifndef NO_SVE
case 0xd49: case 0xd49:
return &gotoblas_NEOVERSEN2; return &gotoblas_NEOVERSEN2;
case 0xd40: case 0xd40:
return &gotoblas_NEOVERSEV1; return &gotoblas_NEOVERSEV1;
#endif
case 0xd05: // Cortex A55 case 0xd05: // Cortex A55
return &gotoblas_CORTEXA55; return &gotoblas_CORTEXA55;
} }

View File

@ -38,22 +38,48 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <sys/resource.h> #include <sys/resource.h>
#include "common.h" #include "common.h"
#if (defined OS_LINUX || defined OS_ANDROID)
#include <asm/hwcap.h>
#include <sys/auxv.h>
#ifndef HWCAP_LOONGSON_CPUCFG
#define HWCAP_LOONGSON_CPUCFG (1 << 14)
#endif
#endif
#ifdef DYNAMIC_LIST
extern gotoblas_t gotoblas_MIPS64_GENERIC;
#ifdef DYN_LOONGSON3R3
extern gotoblas_t gotoblas_LOONGSON3R3;
#else
#define gotoblas_LOONGSON3R3 gotoblas_MIPS64_GENERIC
#endif
#ifdef DYN_LOONGSON3R4
extern gotoblas_t gotoblas_LOONGSON3R4;
#else
#define gotoblas_LOONGSON3R4 gotoblas_MIPS64_GENERIC
#endif
#else
extern gotoblas_t gotoblas_LOONGSON3R3; extern gotoblas_t gotoblas_LOONGSON3R3;
extern gotoblas_t gotoblas_LOONGSON3R4; extern gotoblas_t gotoblas_LOONGSON3R4;
extern gotoblas_t gotoblas_MIPS64_GENERIC;
#endif
extern void openblas_warning(int verbose, const char * msg); extern void openblas_warning(int verbose, const char * msg);
#define NUM_CORETYPES 2 #define NUM_CORETYPES 3
static char *corename[] = { static char *corename[] = {
"MIPS64_GENERIC"
"loongson3r3", "loongson3r3",
"loongson3r4", "loongson3r4",
"UNKNOWN" "UNKNOWN"
}; };
char *gotoblas_corename(void) { char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_LOONGSON3R3) return corename[0]; if (gotoblas == &gotoblas_MIPS64_GENERIC) return corename[0];
if (gotoblas == &gotoblas_LOONGSON3R4) return corename[1]; if (gotoblas == &gotoblas_LOONGSON3R3) return corename[1];
if (gotoblas == &gotoblas_LOONGSON3R4) return corename[2];
return corename[NUM_CORETYPES]; return corename[NUM_CORETYPES];
} }
@ -73,77 +99,32 @@ static gotoblas_t *force_coretype(char *coretype) {
switch (found) switch (found)
{ {
case 0: return (&gotoblas_LOONGSON3R3); case 0: return (&gotoblas_MIPS64_GENERIC);
case 1: return (&gotoblas_LOONGSON3R4); case 1: return (&gotoblas_LOONGSON3R3);
case 2: return (&gotoblas_LOONGSON3R4);
} }
snprintf(message, 128, "Core not found: %s\n", coretype); snprintf(message, 128, "Core not found: %s\n", coretype);
openblas_warning(1, message); openblas_warning(1, message);
return NULL; return NULL;
} }
#if (defined OS_LINUX || defined OS_ANDROID)
#define MMI_MASK 0x00000010 #define MMI_MASK 0x00000010
#define MSA_MASK 0x00000020 #define MSA_MASK 0x00000020
int fd[2];
int support_cpucfg;
static void handler(int signum)
{
close(fd[1]);
exit(1);
}
/* Brief : Function to check if cpucfg supported on loongson
* Return: 1 supported
* 0 not supported
*/
static int cpucfg_test(void) {
pid_t pid;
int status = 0;
support_cpucfg = 0;
pipe(fd);
pid = fork();
if (pid == 0) { /* Subprocess */
struct sigaction act;
close(fd[0]);
/* Set signal action for SIGILL. */
act.sa_handler = handler;
sigaction(SIGILL,&act,NULL);
/* Execute cpucfg in subprocess. */
__asm__ volatile(
".insn \n\t"
".word (0xc8080118) \n\t"
:::
);
support_cpucfg = 1;
write(fd[1],&support_cpucfg,sizeof(support_cpucfg));
close(fd[1]);
exit(0);
} else if (pid > 0){ /* Parent process*/
close(fd[1]);
if ((waitpid(pid,&status,0) <= 0) ||
(read(fd[0],&support_cpucfg,sizeof(support_cpucfg)) <= 0))
support_cpucfg = 0;
close(fd[0]);
} else {
support_cpucfg = 0;
}
return support_cpucfg;
}
static gotoblas_t *get_coretype_from_cpucfg(void) { static gotoblas_t *get_coretype_from_cpucfg(void) {
int flag = 0; int flag = 0;
__asm__ volatile( __asm__ volatile(
".set push \n\t"
".set noat \n\t"
".insn \n\t" ".insn \n\t"
"dli $8, 0x01 \n\t" "dli $1, 0x01 \n\t"
".word (0xc9084918) \n\t" ".word (0xc8080118) \n\t"
"usw $9, 0x00(%0) \n\t" "move %0, $1 \n\t"
".set pop \n\t"
: "=r"(flag)
:
: :
: "r"(&flag)
: "memory"
); );
if (flag & MSA_MASK) if (flag & MSA_MASK)
return (&gotoblas_LOONGSON3R4); return (&gotoblas_LOONGSON3R4);
@ -153,7 +134,7 @@ static gotoblas_t *get_coretype_from_cpucfg(void) {
} }
static gotoblas_t *get_coretype_from_cpuinfo(void) { static gotoblas_t *get_coretype_from_cpuinfo(void) {
#ifdef linux #ifdef __linux
FILE *infile; FILE *infile;
char buffer[512], *p; char buffer[512], *p;
@ -176,17 +157,19 @@ static gotoblas_t *get_coretype_from_cpuinfo(void) {
return NULL; return NULL;
} }
#endif #endif
return NULL; return NULL;
} }
#endif
static gotoblas_t *get_coretype(void) { static gotoblas_t *get_coretype(void) {
int ret = 0; #if (!defined OS_LINUX && !defined OS_ANDROID)
return NULL;
ret = cpucfg_test(); #else
if (ret == 1) if (!(getauxval(AT_HWCAP) & HWCAP_LOONGSON_CPUCFG))
return get_coretype_from_cpucfg(); return get_coretype_from_cpucfg();
else else
return get_coretype_from_cpuinfo(); return get_coretype_from_cpuinfo();
#endif
} }
void gotoblas_dynamic_init(void) { void gotoblas_dynamic_init(void) {
@ -208,9 +191,9 @@ void gotoblas_dynamic_init(void) {
if (gotoblas == NULL) if (gotoblas == NULL)
{ {
snprintf(coremsg, 128, "Falling back to loongson3r3 core\n"); snprintf(coremsg, 128, "Falling back to MIPS64_GENEIRC\n");
openblas_warning(1, coremsg); openblas_warning(1, coremsg);
gotoblas = &gotoblas_LOONGSON3R3; gotoblas = &gotoblas_MIPS64_GENERIC;
} }
if (gotoblas && gotoblas->init) { if (gotoblas && gotoblas->init) {

View File

@ -823,6 +823,8 @@ void gotoblas_affinity_init(void) {
if (numprocs == 0) numprocs = readenv_atoi("OMP_NUM_THREADS"); if (numprocs == 0) numprocs = readenv_atoi("OMP_NUM_THREADS");
if (numprocs == 0) numprocs = readenv_atoi("OPENBLAS_DEFAULT_NUM_THREADS");
numnodes = 1; numnodes = 1;
if (numprocs == 1) { if (numprocs == 1) {

View File

@ -249,8 +249,11 @@ int get_num_procs(void) {
#if defined(USE_OPENMP) #if defined(USE_OPENMP)
#if _OPENMP >= 201511 #if _OPENMP >= 201511
int i,n;
n = 0;
ret = omp_get_num_places(); ret = omp_get_num_places();
if (ret >0 ) nums = ret; if (ret > 0) for (i=0; i<ret;i++) n+= omp_get_place_num_procs(i);
if (n > 0) nums = n;
#endif #endif
return (nums > 0 ? nums : 2); return (nums > 0 ? nums : 2);
#endif #endif
@ -419,6 +422,8 @@ This value is equal or large than blas_cpu_number. This means some threads are s
*/ */
int blas_num_threads = 0; int blas_num_threads = 0;
int blas_num_threads_set = 0;
int goto_get_num_procs (void) { int goto_get_num_procs (void) {
return blas_cpu_number; return blas_cpu_number;
} }
@ -1820,8 +1825,11 @@ int get_num_procs(void) {
#if defined(USE_OPENMP) #if defined(USE_OPENMP)
/* if (omp_get_proc_bind() != omp_proc_bind_false) */ /* if (omp_get_proc_bind() != omp_proc_bind_false) */
#if _OPENMP >= 201511 #if _OPENMP >= 201511
int i,n;
n = 0;
ret = omp_get_num_places(); ret = omp_get_num_places();
if (ret >0 ) nums = ret; if (ret > 0) for (i=0;i<ret;i++) n+= omp_get_place_num_procs(i);
if (n > 0) nums = n;
#endif #endif
return (nums > 0 ? nums :2); return (nums > 0 ? nums :2);
#endif #endif
@ -1988,6 +1996,8 @@ This value is equal or large than blas_cpu_number. This means some threads are s
*/ */
int blas_num_threads = 0; int blas_num_threads = 0;
int blas_num_threads_set = 0;
int goto_get_num_procs (void) { int goto_get_num_procs (void) {
return blas_cpu_number; return blas_cpu_number;
} }

View File

@ -283,6 +283,7 @@ The numbers of threads in the thread pool.
This value is equal or large than blas_cpu_number. This means some threads are sleep. This value is equal or large than blas_cpu_number. This means some threads are sleep.
*/ */
int blas_num_threads = 0; int blas_num_threads = 0;
int blas_num_threads_set = 0;
int goto_get_num_procs (void) { int goto_get_num_procs (void) {
return blas_cpu_number; return blas_cpu_number;

View File

@ -67,10 +67,16 @@ void openblas_read_env() {
openblas_env_thread_timeout=(unsigned int)ret; openblas_env_thread_timeout=(unsigned int)ret;
ret=0; ret=0;
if (readenv(p,"OPENBLAS_NUM_THREADS")) ret = atoi(p); if (readenv(p,"OPENBLAS_DEFAULT_NUM_THREADS")) ret = atoi(p);
if(ret<0) ret=0; if(ret<0) ret=0;
openblas_env_openblas_num_threads=ret; openblas_env_openblas_num_threads=ret;
ret=0;
if (readenv(p,"OPENBLAS_NUM_THREADS")) ret = atoi(p);
if(ret<0) ret=0;
if(ret != 0 || openblas_env_openblas_num_threads == 0)
openblas_env_openblas_num_threads=ret;
ret=0; ret=0;
if (readenv(p,"GOTO_NUM_THREADS")) ret = atoi(p); if (readenv(p,"GOTO_NUM_THREADS")) ret = atoi(p);
if(ret<0) ret=0; if(ret<0) ret=0;

View File

@ -4000,6 +4000,22 @@ case "$p1" in
no_underscore_objs="$no_underscore_objs $misc_common_objs" no_underscore_objs="$no_underscore_objs $misc_common_objs"
printf 'int main(void){\n' printf 'int main(void){\n'
for obj in $underscore_objs; do
[ "$obj" != "xerbla" ] && printf 'extern void %s%s%s%s();\n' \
"$symbolprefix" "$obj" "$bu" "$symbolsuffix"
done
for obj in $need_2underscore_objs; do
printf 'extern void %s%s%s%s%s();\n' \
"$symbolprefix" "$obj" "$bu" "$bu" "$symbolsuffix"
done
for obj in $no_underscore_objs; do
printf 'extern void %s%s%s();\n' \
"$symbolprefix" "$obj" "$symbolsuffix"
done
printf '\n'
for obj in $underscore_objs; do for obj in $underscore_objs; do
[ "$obj" != "xerbla" ] && printf '%s%s%s%s();\n' \ [ "$obj" != "xerbla" ] && printf '%s%s%s%s();\n' \
"$symbolprefix" "$obj" "$bu" "$symbolsuffix" "$symbolprefix" "$obj" "$bu" "$symbolsuffix"

View File

@ -3955,6 +3955,18 @@ if ($ARGV[0] eq "linktest") {
@no_underscore_objs = (@no_underscore_objs, @misc_common_objs); @no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
print "int main(void){\n"; print "int main(void){\n";
foreach $objs (@underscore_objs) {
print "extern void ", $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
}
foreach $objs (@need_2underscore_objs) {
print "extern void ", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "();\n";
}
foreach $objs (@no_underscore_objs) {
print "extern void ", $symbolprefix, $objs, $symbolsuffix, "();\n";
}
foreach $objs (@underscore_objs) { foreach $objs (@underscore_objs) {
print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla"; print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
} }

10
f_check
View File

@ -82,10 +82,10 @@ else
vendor=FUJITSU vendor=FUJITSU
openmp='-Kopenmp' openmp='-Kopenmp'
;; ;;
*Cray*) *Hewlett*)
vendor=CRAY vendor=CRAY
openmp='-fopenmp' openmp='-fopenmp'
;; ;;
*GNU*|*GCC*) *GNU*|*GCC*)
v="${data#*GCC: *\) }" v="${data#*GCC: *\) }"
@ -102,7 +102,7 @@ else
vendor=FLANG vendor=FLANG
openmp='-fopenmp' openmp='-fopenmp'
;; ;;
*ifx*) *ifort*|*ifx*)
vendor=INTEL vendor=INTEL
openmp='-fopenmp' openmp='-fopenmp'
;; ;;
@ -117,6 +117,10 @@ else
esac esac
fi fi
;; ;;
*Cray*)
vendor=CRAY
openmp='-fopenmp'
;;
*g95*) *g95*)
vendor=G95 vendor=G95
openmp='' openmp=''

View File

@ -76,11 +76,6 @@ if ($compiler eq "") {
$vendor = FUJITSU; $vendor = FUJITSU;
$openmp = "-Kopenmp"; $openmp = "-Kopenmp";
} elsif ($data =~ /Cray/) {
$vendor = CRAY;
$openmp = "-fopenmp";
} elsif ($data =~ /GNU/ || $data =~ /GCC/ ) { } elsif ($data =~ /GNU/ || $data =~ /GCC/ ) {
$data =~ s/\(+.*?\)+//g; $data =~ s/\(+.*?\)+//g;
@ -95,7 +90,7 @@ if ($compiler eq "") {
if ($compiler =~ /flang/) { if ($compiler =~ /flang/) {
$vendor = FLANG; $vendor = FLANG;
$openmp = "-fopenmp"; $openmp = "-fopenmp";
} elsif ($compiler =~ /ifx/) { } elsif ($compiler =~ /ifort/ || $compiler =~ /ifx/) {
$vendor = INTEL; $vendor = INTEL;
$openmp = "-fopenmp"; $openmp = "-fopenmp";
} elsif ($compiler =~ /pgf/ || $compiler =~ /nvf/) { } elsif ($compiler =~ /pgf/ || $compiler =~ /nvf/) {
@ -106,6 +101,10 @@ if ($compiler eq "") {
$openmp = ""; $openmp = "";
} }
} }
} elsif ($data =~ /Cray/) {
$vendor = CRAY;
$openmp = "-fopenmp";
} }

View File

@ -131,6 +131,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* #define FORCE_PPC440 */ /* #define FORCE_PPC440 */
/* #define FORCE_PPC440FP2 */ /* #define FORCE_PPC440FP2 */
/* #define FORCE_CELL */ /* #define FORCE_CELL */
/* #define FORCE_MIPS64_GENERIC */
/* #define FORCE_SICORTEX */ /* #define FORCE_SICORTEX */
/* #define FORCE_LOONGSON3R3 */ /* #define FORCE_LOONGSON3R3 */
/* #define FORCE_LOONGSON3R4 */ /* #define FORCE_LOONGSON3R4 */
@ -146,6 +147,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* #define FORCE_SPARCV7 */ /* #define FORCE_SPARCV7 */
/* #define FORCE_ZARCH_GENERIC */ /* #define FORCE_ZARCH_GENERIC */
/* #define FORCE_Z13 */ /* #define FORCE_Z13 */
/* #define FORCE_EV4 */
/* #define FORCE_EV5 */
/* #define FORCE_EV6 */
/* #define FORCE_GENERIC */ /* #define FORCE_GENERIC */
#ifdef FORCE_P2 #ifdef FORCE_P2
@ -915,6 +919,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "CELL" #define CORENAME "CELL"
#endif #endif
#ifdef FORCE_MIPS64_GENERIC
#define FORCE
#define ARCHITECTURE "MIPS"
#define SUBARCHITECTURE "MIPS64_GENERIC"
#define SUBDIRNAME "mips64"
#define ARCHCONFIG "-DMIPS64_GENERIC " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
#define LIBNAME "mips64_generic"
#define CORENAME "MIPS64_GENERIC"
#else
#endif
#ifdef FORCE_SICORTEX #ifdef FORCE_SICORTEX
#define FORCE #define FORCE
#define ARCHITECTURE "MIPS" #define ARCHITECTURE "MIPS"
@ -951,7 +969,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHCONFIG "-DLOONGSON3R4 " \ #define ARCHCONFIG "-DLOONGSON3R4 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 -DHAVE_MSA"
#define LIBNAME "loongson3r4" #define LIBNAME "loongson3r4"
#define CORENAME "LOONGSON3R4" #define CORENAME "LOONGSON3R4"
#else #else
@ -965,7 +983,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHCONFIG "-DLOONGSON3R5 " \ #define ARCHCONFIG "-DLOONGSON3R5 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 -DHAVE_MSA"
#define LIBNAME "loongson3r5" #define LIBNAME "loongson3r5"
#define CORENAME "LOONGSON3R5" #define CORENAME "LOONGSON3R5"
#else #else
@ -979,7 +997,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHCONFIG "-DLOONGSON2K1000 " \ #define ARCHCONFIG "-DLOONGSON2K1000 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 -DHAVE_MSA"
#define LIBNAME "loongson2k1000" #define LIBNAME "loongson2k1000"
#define CORENAME "LOONGSON2K1000" #define CORENAME "LOONGSON2K1000"
#else #else
@ -993,7 +1011,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHCONFIG "-DLOONGSONGENERIC " \ #define ARCHCONFIG "-DLOONGSONGENERIC " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 -DHAVE_MSA"
#define LIBNAME "loongsongeneric" #define LIBNAME "loongsongeneric"
#define CORENAME "LOONGSONGENERIC" #define CORENAME "LOONGSONGENERIC"
#else #else
@ -1007,7 +1025,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHCONFIG "-DI6400 " \ #define ARCHCONFIG "-DI6400 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 -DHAVE_MSA "
#define LIBNAME "i6400" #define LIBNAME "i6400"
#define CORENAME "I6400" #define CORENAME "I6400"
#else #else
@ -1035,7 +1053,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHCONFIG "-DP5600 " \ #define ARCHCONFIG "-DP5600 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 -DNO_MSA" "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8"
#define LIBNAME "p5600" #define LIBNAME "p5600"
#define CORENAME "P5600" #define CORENAME "P5600"
#else #else
@ -1049,7 +1067,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHCONFIG "-DMIPS1004K " \ #define ARCHCONFIG "-DMIPS1004K " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=32 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 -DNO_MSA" "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8"
#define LIBNAME "mips1004K" #define LIBNAME "mips1004K"
#define CORENAME "MIPS1004K" #define CORENAME "MIPS1004K"
#else #else
@ -1063,7 +1081,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHCONFIG "-DMIPS24K " \ #define ARCHCONFIG "-DMIPS24K " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=32768 -DL2_LINESIZE=32 " \ "-DL2_SIZE=32768 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 -DNO_MSA" "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8"
#define LIBNAME "mips24K" #define LIBNAME "mips24K"
#define CORENAME "MIPS24K" #define CORENAME "MIPS24K"
#else #else
@ -1077,7 +1095,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHCONFIG "-DI6500 " \ #define ARCHCONFIG "-DI6500 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 -DHAVE_MSA"
#define LIBNAME "i6500" #define LIBNAME "i6500"
#define CORENAME "I6500" #define CORENAME "I6500"
#else #else
@ -1392,7 +1410,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 " \ "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 " \
"-march=armv8.4-a -mtune=neoverse-v1" "-march=armv8.4-a+sve -mtune=neoverse-v1"
#define LIBNAME "neoversev1" #define LIBNAME "neoversev1"
#define CORENAME "NEOVERSEV1" #define CORENAME "NEOVERSEV1"
#endif #endif
@ -1601,6 +1619,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "Z14" #define CORENAME "Z14"
#endif #endif
#ifdef FORCE_EV4
#define FORCE
#define ARCHITECTURE "ALPHA"
#define SUBARCHITECTURE "ev4"
#define ARCHCONFIG "-DEV4 " \
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=2097152 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=8192 "
#define LIBNAME "ev4"
#define CORENAME "EV4"
#endif
#ifdef FORCE_EV5
#define FORCE
#define ARCHITECTURE "ALPHA"
#define SUBARCHITECTURE "ev5"
#define ARCHCONFIG "-DEV5 " \
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=8192 "
#define LIBNAME "ev5"
#define CORENAME "EV5"
#endif
#ifdef FORCE_EV6
#define FORCE
#define ARCHITECTURE "ALPHA"
#define SUBARCHITECTURE "ev6"
#define ARCHCONFIG "-DEV6 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=4194304 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=8192 "
#define LIBNAME "ev6"
#define CORENAME "EV6"
#endif
#ifdef FORCE_C910V #ifdef FORCE_C910V
#define FORCE #define FORCE
#define ARCHITECTURE "RISCV64" #define ARCHITECTURE "RISCV64"
@ -1777,7 +1831,7 @@ int main(int argc, char *argv[]){
#ifdef FORCE #ifdef FORCE
printf("CORE=%s\n", CORENAME); printf("CORE=%s\n", CORENAME);
#else #else
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) #if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__alpha__)
printf("CORE=%s\n", get_corename()); printf("CORE=%s\n", get_corename());
#endif #endif
#endif #endif

View File

@ -53,7 +53,7 @@ set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES
# these do not have separate 'z' sources # these do not have separate 'z' sources
set(BLAS3_SOURCES set(BLAS3_SOURCES
gemm.c symm.c gemm.c symm.c
trsm.c syrk.c syr2k.c trsm.c syrk.c syr2k.c gemmt.c
) )
set(BLAS3_MANGLED_SOURCES set(BLAS3_MANGLED_SOURCES
@ -189,7 +189,16 @@ if (NOT DEFINED NO_LAPACK)
) )
GenerateNamedObjects("${LAPACK_SOURCES}") GenerateNamedObjects("${LAPACK_SOURCES}")
if (NOT RELAPACK_REPLACE)
GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3) GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3)
else ()
GenerateNamedObjects("lapack/getrs.c" "" "" 0 "" "" 0 3)
GenerateNamedObjects("lapack/getf2.c" "" "" 0 "" "" 0 3)
GenerateNamedObjects("lapack/potf2.c" "" "" 0 "" "" 0 3)
GenerateNamedObjects("lapack/laswp.c" "" "" 0 "" "" 0 3)
GenerateNamedObjects("lapack/lauu2.c" "" "" 0 "" "" 0 3)
GenerateNamedObjects("lapack/trti2.c" "" "" 0 "" "" 0 3)
endif()
endif () endif ()
if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) if ( BUILD_COMPLEX AND NOT BUILD_SINGLE)

View File

@ -44,12 +44,12 @@ SBLAS3OBJS = \
sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \ sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \
strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) \ strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) \
somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\ somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\
sgeadd.$(SUFFIX) sgeadd.$(SUFFIX) sgemmt.$(SUFFIX)
ifeq ($(BUILD_BFLOAT16),1) ifeq ($(BUILD_BFLOAT16),1)
SBBLAS1OBJS = sbdot.$(SUFFIX) SBBLAS1OBJS = sbdot.$(SUFFIX)
SBBLAS2OBJS = sbgemv.$(SUFFIX) SBBLAS2OBJS = sbgemv.$(SUFFIX)
SBBLAS3OBJS = sbgemm.$(SUFFIX) SBBLAS3OBJS = sbgemm.$(SUFFIX) sbgemmt.$(SUFFIX)
SBEXTOBJS = sbstobf16.$(SUFFIX) sbdtobf16.$(SUFFIX) sbf16tos.$(SUFFIX) dbf16tod.$(SUFFIX) SBEXTOBJS = sbstobf16.$(SUFFIX) sbdtobf16.$(SUFFIX) sbf16tos.$(SUFFIX) dbf16tod.$(SUFFIX)
endif endif
@ -76,7 +76,7 @@ DBLAS3OBJS = \
dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \ dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \
dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \ dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \
domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX)\ domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX)\
dgeadd.$(SUFFIX) dgeadd.$(SUFFIX) dgemmt.$(SUFFIX)
CBLAS1OBJS = \ CBLAS1OBJS = \
caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \ caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \
@ -92,8 +92,9 @@ CBLAS2OBJS = \
cgemv.$(SUFFIX) cgeru.$(SUFFIX) cgerc.$(SUFFIX) \ cgemv.$(SUFFIX) cgeru.$(SUFFIX) cgerc.$(SUFFIX) \
ctrsv.$(SUFFIX) ctrmv.$(SUFFIX) \ ctrsv.$(SUFFIX) ctrmv.$(SUFFIX) \
csyr2.$(SUFFIX) cgbmv.$(SUFFIX) \ csyr2.$(SUFFIX) cgbmv.$(SUFFIX) \
csbmv.$(SUFFIX) \ csbmv.$(SUFFIX) cspmv.$(SUFFIX) \
cspr2.$(SUFFIX) \ cspr.$(SUFFIX) cspr2.$(SUFFIX) \
csymv.$(SUFFIX) csyr.$(SUFFIX) \
ctbsv.$(SUFFIX) ctbmv.$(SUFFIX) \ ctbsv.$(SUFFIX) ctbmv.$(SUFFIX) \
ctpsv.$(SUFFIX) ctpmv.$(SUFFIX) \ ctpsv.$(SUFFIX) ctpmv.$(SUFFIX) \
chemv.$(SUFFIX) chbmv.$(SUFFIX) \ chemv.$(SUFFIX) chbmv.$(SUFFIX) \
@ -105,7 +106,7 @@ CBLAS3OBJS = \
ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \ ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \
chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \ chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \
comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX)\ comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX)\
cgeadd.$(SUFFIX) cgeadd.$(SUFFIX) cgemmt.$(SUFFIX)
ZBLAS1OBJS = \ ZBLAS1OBJS = \
zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \ zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \
@ -121,8 +122,9 @@ ZBLAS2OBJS = \
zgemv.$(SUFFIX) zgeru.$(SUFFIX) zgerc.$(SUFFIX) \ zgemv.$(SUFFIX) zgeru.$(SUFFIX) zgerc.$(SUFFIX) \
ztrsv.$(SUFFIX) ztrmv.$(SUFFIX) \ ztrsv.$(SUFFIX) ztrmv.$(SUFFIX) \
zsyr2.$(SUFFIX) zgbmv.$(SUFFIX) \ zsyr2.$(SUFFIX) zgbmv.$(SUFFIX) \
zsbmv.$(SUFFIX) \ zsbmv.$(SUFFIX) zspmv.$(SUFFIX) \
zspr2.$(SUFFIX) \ zspr.$(SUFFIX) zspr2.$(SUFFIX) \
zsymv.$(SUFFIX) zsyr.$(SUFFIX) \
ztbsv.$(SUFFIX) ztbmv.$(SUFFIX) \ ztbsv.$(SUFFIX) ztbmv.$(SUFFIX) \
ztpsv.$(SUFFIX) ztpmv.$(SUFFIX) \ ztpsv.$(SUFFIX) ztpmv.$(SUFFIX) \
zhemv.$(SUFFIX) zhbmv.$(SUFFIX) \ zhemv.$(SUFFIX) zhbmv.$(SUFFIX) \
@ -134,7 +136,7 @@ ZBLAS3OBJS = \
ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \ ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \ zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \
zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)\ zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)\
zgeadd.$(SUFFIX) zgeadd.$(SUFFIX) zgemmt.$(SUFFIX)
ifeq ($(SUPPORT_GEMM3M), 1) ifeq ($(SUPPORT_GEMM3M), 1)
@ -281,12 +283,12 @@ CSBLAS2OBJS = \
CSBLAS3OBJS = \ CSBLAS3OBJS = \
cblas_sgemm.$(SUFFIX) cblas_ssymm.$(SUFFIX) cblas_strmm.$(SUFFIX) cblas_strsm.$(SUFFIX) \ cblas_sgemm.$(SUFFIX) cblas_ssymm.$(SUFFIX) cblas_strmm.$(SUFFIX) cblas_strsm.$(SUFFIX) \
cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\ cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\
cblas_sgeadd.$(SUFFIX) cblas_sgeadd.$(SUFFIX) cblas_sgemmt.$(SUFFIX)
ifeq ($(BUILD_BFLOAT16),1) ifeq ($(BUILD_BFLOAT16),1)
CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX) CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX)
CSBBLAS2OBJS = cblas_sbgemv.$(SUFFIX) CSBBLAS2OBJS = cblas_sbgemv.$(SUFFIX)
CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX) CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX) cblas_sbgemmt.$(SUFFIX)
CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX) CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX)
endif endif
@ -306,7 +308,7 @@ CDBLAS2OBJS = \
CDBLAS3OBJS += \ CDBLAS3OBJS += \
cblas_dgemm.$(SUFFIX) cblas_dsymm.$(SUFFIX) cblas_dtrmm.$(SUFFIX) cblas_dtrsm.$(SUFFIX) \ cblas_dgemm.$(SUFFIX) cblas_dsymm.$(SUFFIX) cblas_dtrmm.$(SUFFIX) cblas_dtrsm.$(SUFFIX) \
cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) cblas_domatcopy.$(SUFFIX) cblas_dimatcopy.$(SUFFIX) \ cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) cblas_domatcopy.$(SUFFIX) cblas_dimatcopy.$(SUFFIX) \
cblas_dgeadd.$(SUFFIX) cblas_dgeadd.$(SUFFIX) cblas_dgemmt.$(SUFFIX)
CCBLAS1OBJS = \ CCBLAS1OBJS = \
cblas_icamax.$(SUFFIX) cblas_icamin.$(SUFFIX) cblas_scasum.$(SUFFIX) cblas_caxpy.$(SUFFIX) \ cblas_icamax.$(SUFFIX) cblas_icamin.$(SUFFIX) cblas_scasum.$(SUFFIX) cblas_caxpy.$(SUFFIX) \
@ -331,7 +333,7 @@ CCBLAS3OBJS = \
cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \ cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \
cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \ cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \
cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\ cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\
cblas_cgeadd.$(SUFFIX) cblas_cgeadd.$(SUFFIX) cblas_cgemmt.$(SUFFIX)
CXERBLAOBJ = \ CXERBLAOBJ = \
cblas_xerbla.$(SUFFIX) cblas_xerbla.$(SUFFIX)
@ -362,7 +364,7 @@ CZBLAS3OBJS = \
cblas_zsyrk.$(SUFFIX) cblas_zsyr2k.$(SUFFIX) \ cblas_zsyrk.$(SUFFIX) cblas_zsyr2k.$(SUFFIX) \
cblas_zhemm.$(SUFFIX) cblas_zherk.$(SUFFIX) cblas_zher2k.$(SUFFIX)\ cblas_zhemm.$(SUFFIX) cblas_zherk.$(SUFFIX) cblas_zher2k.$(SUFFIX)\
cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \ cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \
cblas_zgeadd.$(SUFFIX) cblas_zgeadd.$(SUFFIX) cblas_zgemmt.$(SUFFIX)
ifeq ($(SUPPORT_GEMM3M), 1) ifeq ($(SUPPORT_GEMM3M), 1)
@ -1300,6 +1302,8 @@ xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c
ifeq ($(BUILD_BFLOAT16),1) ifeq ($(BUILD_BFLOAT16),1)
sbgemm.$(SUFFIX) sbgemm.$(PSUFFIX) : gemm.c ../param.h sbgemm.$(SUFFIX) sbgemm.$(PSUFFIX) : gemm.c ../param.h
$(CC) -c $(CFLAGS) $< -o $(@F) $(CC) -c $(CFLAGS) $< -o $(@F)
sbgemmt.$(SUFFIX) sbgemm.$(PSUFFIX) : gemmt.c ../param.h
$(CC) -c $(CFLAGS) $< -o $(@F)
endif endif
sgemm.$(SUFFIX) sgemm.$(PSUFFIX) : gemm.c ../param.h sgemm.$(SUFFIX) sgemm.$(PSUFFIX) : gemm.c ../param.h
@ -1320,6 +1324,24 @@ zgemm.$(SUFFIX) zgemm.$(PSUFFIX) : gemm.c ../param.h
xgemm.$(SUFFIX) xgemm.$(PSUFFIX) : gemm.c ../param.h xgemm.$(SUFFIX) xgemm.$(PSUFFIX) : gemm.c ../param.h
$(CC) -c $(CFLAGS) $< -o $(@F) $(CC) -c $(CFLAGS) $< -o $(@F)
sgemmt.$(SUFFIX) sgemmt.$(PSUFFIX) : gemmt.c ../param.h
$(CC) -c $(CFLAGS) $< -o $(@F)
dgemmt.$(SUFFIX) dgemmt.$(PSUFFIX) : gemmt.c ../param.h
$(CC) -c $(CFLAGS) $< -o $(@F)
qgemmt.$(SUFFIX) qgemmt.$(PSUFFIX) : gemmt.c ../param.h
$(CC) -c $(CFLAGS) $< -o $(@F)
cgemmt.$(SUFFIX) cgemmt.$(PSUFFIX) : gemmt.c ../param.h
$(CC) -c $(CFLAGS) $< -o $(@F)
zgemmt.$(SUFFIX) zgemmt.$(PSUFFIX) : gemmt.c ../param.h
$(CC) -c $(CFLAGS) $< -o $(@F)
xgemmt.$(SUFFIX) xgemmt.$(PSUFFIX) : gemmt.c ../param.h
$(CC) -c $(CFLAGS) $< -o $(@F)
ssymm.$(SUFFIX) ssymm.$(PSUFFIX) : symm.c ssymm.$(SUFFIX) ssymm.$(PSUFFIX) : symm.c
$(CC) -c $(CFLAGS) $< -o $(@F) $(CC) -c $(CFLAGS) $< -o $(@F)
@ -1907,6 +1929,23 @@ cblas_cgemm.$(SUFFIX) cblas_cgemm.$(PSUFFIX) : gemm.c ../param.h
cblas_zgemm.$(SUFFIX) cblas_zgemm.$(PSUFFIX) : gemm.c ../param.h cblas_zgemm.$(SUFFIX) cblas_zgemm.$(PSUFFIX) : gemm.c ../param.h
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
cblas_sgemmt.$(SUFFIX) cblas_sgemmt.$(PSUFFIX) : gemmt.c ../param.h
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
ifeq ($(BUILD_BFLOAT16),1)
cblas_sbgemmt.$(SUFFIX) cblas_sbgemmt.$(PSUFFIX) : gemmt.c ../param.h
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
endif
cblas_dgemmt.$(SUFFIX) cblas_dgemmt.$(PSUFFIX) : gemmt.c ../param.h
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
cblas_cgemmt.$(SUFFIX) cblas_cgemmt.$(PSUFFIX) : gemmt.c ../param.h
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
cblas_zgemmt.$(SUFFIX) cblas_zgemmt.$(PSUFFIX) : gemmt.c ../param.h
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
cblas_ssymm.$(SUFFIX) cblas_ssymm.$(PSUFFIX) : symm.c cblas_ssymm.$(SUFFIX) cblas_ssymm.$(PSUFFIX) : symm.c
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)

589
interface/gemmt.c Normal file
View File

@ -0,0 +1,589 @@
/*********************************************************************/
/* Copyright 2022, The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/*********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "common.h"
#ifdef FUNCTION_PROFILE
#include "functable.h"
#endif
#ifndef COMPLEX
#define SMP_THRESHOLD_MIN 65536.0
#ifdef XDOUBLE
#define ERROR_NAME "QGEMT "
#elif defined(DOUBLE)
#define ERROR_NAME "DGEMT "
#elif defined(BFLOAT16)
#define ERROR_NAME "SBGEMT "
#else
#define ERROR_NAME "SGEMT "
#endif
#else
#define SMP_THRESHOLD_MIN 8192.0
#ifdef XDOUBLE
#define ERROR_NAME "XGEMT "
#elif defined(DOUBLE)
#define ERROR_NAME "ZGEMT "
#else
#define ERROR_NAME "CGEMT "
#endif
#endif
#ifndef GEMM_MULTITHREAD_THRESHOLD
#define GEMM_MULTITHREAD_THRESHOLD 4
#endif
#ifndef CBLAS
void NAME(char *UPLO, char *TRANSA, char *TRANSB,
blasint * M, blasint * N, blasint * K,
FLOAT * Alpha,
IFLOAT * a, blasint * ldA,
IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC)
{
blasint m, n, k;
blasint lda, ldb, ldc;
int transa, transb, uplo;
blasint info;
char transA, transB, Uplo;
IFLOAT *buffer;
IFLOAT *aa, *bb;
FLOAT *cc;
#if defined(COMPLEX)
FLOAT alpha_r, alpha_i, beta_r, beta_i;
#else
FLOAT alpha, beta;
#endif
PRINT_DEBUG_NAME;
m = *M;
n = *N;
k = *K;
#if defined(COMPLEX)
FLOAT *alpha = Alpha;
alpha_r = *(Alpha + 0);
alpha_i = *(Alpha + 1);
beta_r = *(Beta + 0);
beta_i = *(Beta + 1);
#else
alpha = *Alpha;
beta = *Beta;
#endif
lda = *ldA;
ldb = *ldB;
ldc = *ldC;
transA = *TRANSA;
transB = *TRANSB;
Uplo = *UPLO;
TOUPPER(transA);
TOUPPER(transB);
TOUPPER(Uplo);
transa = -1;
transb = -1;
uplo = -1;
if (transA == 'N')
transa = 0;
if (transA == 'T')
transa = 1;
#ifndef COMPLEX
if (transA == 'R')
transa = 0;
if (transA == 'C')
transa = 1;
#else
if (transA == 'R')
transa = 2;
if (transA == 'C')
transa = 3;
#endif
if (transB == 'N')
transb = 0;
if (transB == 'T')
transb = 1;
#ifndef COMPLEX
if (transB == 'R')
transb = 0;
if (transB == 'C')
transb = 1;
#else
if (transB == 'R')
transb = 2;
if (transB == 'C')
transb = 3;
#endif
if (Uplo == 'U')
uplo = 0;
if (Uplo == 'L')
uplo = 1;
info = 0;
if (uplo < 0)
info = 14;
if (ldc < m)
info = 13;
if (k < 0)
info = 5;
if (n < 0)
info = 4;
if (m < 0)
info = 3;
if (transb < 0)
info = 2;
if (transa < 0)
info = 1;
if (info) {
BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME));
return;
}
#else
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M,
blasint N, blasint k,
#ifndef COMPLEX
FLOAT alpha,
IFLOAT * A, blasint LDA,
IFLOAT * B, blasint LDB, FLOAT beta, FLOAT * c, blasint ldc)
{
#else
void *valpha,
void *va, blasint LDA,
void *vb, blasint LDB, void *vbeta, void *vc, blasint ldc)
{
FLOAT *alpha = (FLOAT *) valpha;
FLOAT *beta = (FLOAT *) vbeta;
FLOAT *A = (FLOAT *) va;
FLOAT *B = (FLOAT *) vb;
FLOAT *c = (FLOAT *) vc;
#endif
FLOAT *aa, *bb, *cc;
int transa, transb, uplo;
blasint info;
blasint m, n, lda, ldb;
FLOAT *a, *b;
XFLOAT *buffer;
PRINT_DEBUG_CNAME;
transa = -1;
transb = -1;
info = 0;
if (order == CblasColMajor) {
if (TransA == CblasNoTrans)
transa = 0;
if (TransA == CblasTrans)
transa = 1;
#ifndef COMPLEX
if (TransA == CblasConjNoTrans)
transa = 0;
if (TransA == CblasConjTrans)
transa = 1;
#else
if (TransA == CblasConjNoTrans)
transa = 2;
if (TransA == CblasConjTrans)
transa = 3;
#endif
if (TransB == CblasNoTrans)
transb = 0;
if (TransB == CblasTrans)
transb = 1;
#ifndef COMPLEX
if (TransB == CblasConjNoTrans)
transb = 0;
if (TransB == CblasConjTrans)
transb = 1;
#else
if (TransB == CblasConjNoTrans)
transb = 2;
if (TransB == CblasConjTrans)
transb = 3;
#endif
m = M;
n = N;
a = (void *)A;
b = (void *)B;
lda = LDA;
ldb = LDB;
info = -1;
if (ldc < m)
info = 13;
if (k < 0)
info = 5;
if (n < 0)
info = 4;
if (m < 0)
info = 3;
if (transb < 0)
info = 2;
if (transa < 0)
info = 1;
}
if (order == CblasRowMajor) {
m = N;
n = M;
a = (void *)B;
b = (void *)A;
lda = LDB;
ldb = LDA;
if (TransB == CblasNoTrans)
transa = 0;
if (TransB == CblasTrans)
transa = 1;
#ifndef COMPLEX
if (TransB == CblasConjNoTrans)
transa = 0;
if (TransB == CblasConjTrans)
transa = 1;
#else
if (TransB == CblasConjNoTrans)
transa = 2;
if (TransB == CblasConjTrans)
transa = 3;
#endif
if (TransA == CblasNoTrans)
transb = 0;
if (TransA == CblasTrans)
transb = 1;
#ifndef COMPLEX
if (TransA == CblasConjNoTrans)
transb = 0;
if (TransA == CblasConjTrans)
transb = 1;
#else
if (TransA == CblasConjNoTrans)
transb = 2;
if (TransA == CblasConjTrans)
transb = 3;
#endif
info = -1;
if (ldc < m)
info = 13;
if (k < 0)
info = 5;
if (n < 0)
info = 4;
if (m < 0)
info = 3;
if (transb < 0)
info = 2;
if (transa < 0)
info = 1;
}
uplo = -1;
if (Uplo == CblasUpper)
uplo = 0;
if (Uplo == CblasLower)
uplo = 1;
if (uplo < 0)
info = 14;
if (info >= 0) {
BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME));
return;
}
#if defined(COMPLEX)
FLOAT alpha_r = *(alpha + 0);
FLOAT alpha_i = *(alpha + 1);
FLOAT beta_r = *(beta + 0);
FLOAT beta_i = *(beta + 1);
#endif
#endif
int buffer_size;
blasint l;
blasint i, j;
#ifdef SMP
int nthreads;
#endif
#if defined(COMPLEX)
#ifdef SMP
static int (*gemv_thread[]) (BLASLONG, BLASLONG, FLOAT *, FLOAT *,
BLASLONG, FLOAT *, BLASLONG, FLOAT *,
BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
xgemv_thread_n, xgemv_thread_t, xgemv_thread_r, xgemv_thread_c,
xgemv_thread_o, xgemv_thread_u, xgemv_thread_s,
xgemv_thread_d,
#elif defined DOUBLE
zgemv_thread_n, zgemv_thread_t, zgemv_thread_r, zgemv_thread_c,
zgemv_thread_o, zgemv_thread_u, zgemv_thread_s,
zgemv_thread_d,
#else
cgemv_thread_n, cgemv_thread_t, cgemv_thread_r, cgemv_thread_c,
cgemv_thread_o, cgemv_thread_u, cgemv_thread_s,
cgemv_thread_d,
#endif
};
#endif
int (*gemv[]) (BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *,
BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG,
FLOAT *) = {
GEMV_N, GEMV_T, GEMV_R, GEMV_C, GEMV_O, GEMV_U, GEMV_S, GEMV_D,};
#else
#ifdef SMP
static int (*gemv_thread[]) (BLASLONG, BLASLONG, FLOAT, FLOAT *,
BLASLONG, FLOAT *, BLASLONG, FLOAT *,
BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
qgemv_thread_n, qgemv_thread_t,
#elif defined DOUBLE
dgemv_thread_n, dgemv_thread_t,
#else
sgemv_thread_n, sgemv_thread_t,
#endif
};
#endif
int (*gemv[]) (BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG,
FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = {
GEMV_N, GEMV_T,};
#endif
if ((m == 0) || (n == 0))
return;
IDEBUG_START;
FUNCTION_PROFILE_START();
const blasint incb = (transb == 0) ? 1 : ldb;
if (uplo == 1) {
for (i = 0; i < n; i++) {
j = n - i;
l = j;
#if defined(COMPLEX)
aa = a + i * 2;
bb = b + i * ldb * 2;
if (transa) {
l = k;
aa = a + lda * i * 2;
bb = b + i * 2;
}
cc = c + i * 2 * ldc + i * 2;
#else
aa = a + i;
bb = b + i * ldb;
if (transa) {
l = k;
aa = a + lda * i;
bb = b + i;
}
cc = c + i * ldc + i;
#endif
#if defined(COMPLEX)
if (beta_r != ONE || beta_i != ZERO)
SCAL_K(l, 0, 0, beta_r, beta_i, cc, 1, NULL, 0,
NULL, 0);
if (alpha_r == ZERO && alpha_i == ZERO)
return;
#else
if (beta != ONE)
SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0);
if (alpha == ZERO)
continue;
#endif
IDEBUG_START;
FUNCTION_PROFILE_START();
buffer_size = j + k + 128 / sizeof(FLOAT);
#ifdef WINDOWS_ABI
buffer_size += 160 / sizeof(FLOAT);
#endif
// for alignment
buffer_size = (buffer_size + 3) & ~3;
STACK_ALLOC(buffer_size, FLOAT, buffer);
#ifdef SMP
if (1L * j * k < 2304L * GEMM_MULTITHREAD_THRESHOLD)
nthreads = 1;
else
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
#endif
#if defined(COMPLEX)
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i,
aa, lda, bb, incb, cc, 1,
buffer);
#else
(gemv[(int)transa]) (j, k, 0, alpha, aa, lda,
bb, incb, cc, 1, buffer);
#endif
#ifdef SMP
} else {
(gemv_thread[(int)transa]) (j, k, alpha, aa,
lda, bb, incb, cc,
1, buffer,
nthreads);
}
#endif
STACK_FREE(buffer);
}
} else {
for (i = 0; i < n; i++) {
j = i + 1;
l = j;
#if defined COMPLEX
bb = b + i * ldb * 2;
if (transa) {
l = k;
bb = b + i * 2;
}
cc = c + i * 2 * ldc;
#else
bb = b + i * ldb;
if (transa) {
l = k;
bb = b + i;
}
cc = c + i * ldc;
#endif
#if defined(COMPLEX)
if (beta_r != ONE || beta_i != ZERO)
SCAL_K(l, 0, 0, beta_r, beta_i, cc, 1, NULL, 0,
NULL, 0);
if (alpha_r == ZERO && alpha_i == ZERO)
return;
#else
if (beta != ONE)
SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0);
if (alpha == ZERO)
continue;
#endif
IDEBUG_START;
FUNCTION_PROFILE_START();
buffer_size = j + k + 128 / sizeof(FLOAT);
#ifdef WINDOWS_ABI
buffer_size += 160 / sizeof(FLOAT);
#endif
// for alignment
buffer_size = (buffer_size + 3) & ~3;
STACK_ALLOC(buffer_size, FLOAT, buffer);
#ifdef SMP
if (1L * j * k < 2304L * GEMM_MULTITHREAD_THRESHOLD)
nthreads = 1;
else
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
#endif
#if defined(COMPLEX)
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i,
a, lda, bb, incb, cc, 1,
buffer);
#else
(gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb,
incb, cc, 1, buffer);
#endif
#ifdef SMP
} else {
(gemv_thread[(int)transa]) (j, k, alpha, a, lda,
bb, incb, cc, 1,
buffer, nthreads);
}
#endif
STACK_FREE(buffer);
}
}
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE,
args.m * args.k + args.k * args.n +
args.m * args.n, 2 * args.m * args.n * args.k);
IDEBUG_END;
return;
}

View File

@ -44,6 +44,7 @@
#endif #endif
#ifndef COMPLEX #ifndef COMPLEX
#define SMP_THRESHOLD_MIN 65536.
#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "QSYMM " #define ERROR_NAME "QSYMM "
#elif defined(DOUBLE) #elif defined(DOUBLE)
@ -52,6 +53,7 @@
#define ERROR_NAME "SSYMM " #define ERROR_NAME "SSYMM "
#endif #endif
#else #else
#define SMP_THRESHOLD_MIN 8192.
#ifndef GEMM3M #ifndef GEMM3M
#ifndef HEMM #ifndef HEMM
#ifdef XDOUBLE #ifdef XDOUBLE
@ -91,6 +93,10 @@
#endif #endif
#endif #endif
#ifndef GEMM_MULTITHREAD_THRESHOLD
#define GEMM_MULTITHREAD_THRESHOLD 4
#endif
#ifdef SMP #ifdef SMP
#ifndef COMPLEX #ifndef COMPLEX
@ -159,7 +165,9 @@ void NAME(char *SIDE, char *UPLO,
#if defined(SMP) && !defined(NO_AFFINITY) #if defined(SMP) && !defined(NO_AFFINITY)
int nodes; int nodes;
#endif #endif
# if defined(SMP)
int MN;
#endif
blasint info; blasint info;
int side; int side;
int uplo; int uplo;
@ -255,6 +263,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
#if defined(SMP) && !defined(NO_AFFINITY) #if defined(SMP) && !defined(NO_AFFINITY)
int nodes; int nodes;
#endif #endif
#if defined(SMP)
int MN;
#endif
PRINT_DEBUG_CNAME; PRINT_DEBUG_CNAME;
@ -375,15 +386,18 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
#ifdef SMP #ifdef SMP
args.common = NULL; args.common = NULL;
args.nthreads = num_cpu_avail(3); MN = 2.* (double) args.m * (double)args.m * (double) args.n;
if (MN <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) ) {
args.nthreads = 1;
} else {
args.nthreads = num_cpu_avail(3);
}
if (args.nthreads == 1) { if (args.nthreads == 1) {
#endif #endif
(symm[(side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0); (symm[(side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0);
#ifdef SMP #ifdef SMP
} else { } else {
#ifndef NO_AFFINITY #ifndef NO_AFFINITY

View File

@ -180,6 +180,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha,
buffer = (FLOAT *)blas_memory_alloc(1); buffer = (FLOAT *)blas_memory_alloc(1);
#ifdef SMP #ifdef SMP
if (n <200)
nthreads=1;
else
nthreads = num_cpu_avail(2); nthreads = num_cpu_avail(2);
if (nthreads == 1) { if (nthreads == 1) {

View File

@ -368,6 +368,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
mode |= (uplo << BLAS_UPLO_SHIFT); mode |= (uplo << BLAS_UPLO_SHIFT);
args.common = NULL; args.common = NULL;
if (args.n*args.k <1000)
args.nthreads =1 ;
else
args.nthreads = num_cpu_avail(3); args.nthreads = num_cpu_avail(3);
if (args.nthreads == 1) { if (args.nthreads == 1) {

View File

@ -44,6 +44,7 @@
#endif #endif
#ifndef COMPLEX #ifndef COMPLEX
#define SMP_THRESHOLD_MIN 109944.
#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "QSYRK " #define ERROR_NAME "QSYRK "
#elif defined(DOUBLE) #elif defined(DOUBLE)
@ -52,6 +53,7 @@
#define ERROR_NAME "SSYRK " #define ERROR_NAME "SSYRK "
#endif #endif
#else #else
#define SMP_THRESHOLD_MIN 14824.
#ifndef HEMM #ifndef HEMM
#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "XSYRK " #define ERROR_NAME "XSYRK "
@ -71,6 +73,10 @@
#endif #endif
#endif #endif
#ifndef GEMM_MULTITHREAD_THRESHOLD
#define GEMM_MULTITHREAD_THRESHOLD 4
#endif
static int (*syrk[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { static int (*syrk[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
#ifndef HEMM #ifndef HEMM
SYRK_UN, SYRK_UC, SYRK_LN, SYRK_LC, SYRK_UN, SYRK_UC, SYRK_LN, SYRK_LC,
@ -101,6 +107,7 @@ void NAME(char *UPLO, char *TRANS,
FLOAT *sa, *sb; FLOAT *sa, *sb;
#ifdef SMP #ifdef SMP
int NNK;
#ifdef USE_SIMPLE_THREADED_LEVEL3 #ifdef USE_SIMPLE_THREADED_LEVEL3
#ifndef COMPLEX #ifndef COMPLEX
#ifdef XDOUBLE #ifdef XDOUBLE
@ -225,6 +232,8 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
FLOAT *sa, *sb; FLOAT *sa, *sb;
#ifdef SMP #ifdef SMP
int NNK;
#ifdef USE_SIMPLE_THREADED_LEVEL3 #ifdef USE_SIMPLE_THREADED_LEVEL3
#ifndef COMPLEX #ifndef COMPLEX
#ifdef XDOUBLE #ifdef XDOUBLE
@ -354,18 +363,13 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
#endif #endif
args.common = NULL; args.common = NULL;
#ifndef COMPLEX
#ifdef DOUBLE NNK = (double)(args.n+1)*(double)args.n*(double)args.k;
if (args.n < 100) if (NNK <= (SMP_THRESHOLD_MIN * GEMM_MULTITHREAD_THRESHOLD)) {
#else
if (args.n < 200)
#endif
#else
if (args.n < 65)
#endif
args.nthreads = 1; args.nthreads = 1;
else } else {
args.nthreads = num_cpu_avail(3); args.nthreads = num_cpu_avail(3);
}
if (args.nthreads == 1) { if (args.nthreads == 1) {
#endif #endif
@ -373,7 +377,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
(syrk[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0); (syrk[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0);
#ifdef SMP #ifdef SMP
} else { } else {
#ifndef USE_SIMPLE_THREADED_LEVEL3 #ifndef USE_SIMPLE_THREADED_LEVEL3

View File

@ -181,7 +181,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLO
alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1], alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1],
x, 1, a, 1, NULL, 0); x, 1, a, 1, NULL, 0);
} }
a += lda; a += lda * 2;
} }
} else { } else {
for (i = 0; i < n; i++){ for (i = 0; i < n; i++){
@ -191,7 +191,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLO
alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1], alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1],
x + i * 2, 1, a, 1, NULL, 0); x + i * 2, 1, a, 1, NULL, 0);
} }
a += 2 + lda; a += 2 + lda * 2;
} }
} }
return; return;

View File

@ -237,8 +237,74 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
if (DGEMMOTCOPY) if (DGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE") GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE")
endif () endif ()
GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE") GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE")
endif () GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE")
GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE")
if (SMALL_MATRIX_OPT)
if (NOT DEFINED DGEMM_SMALL_M_PERMIT)
set(DGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_NN)
set(DGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_NT)
set(DGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_TN)
set(DGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_TT)
set(DGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_NN)
set(DGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_NT)
set(DGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_TN)
set(DGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_TT)
set(DGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
endif ()
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NN" "gemm_small_kernel_nn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NR" "gemm_small_kernel_nr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RN" "gemm_small_kernel_rn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RR" "gemm_small_kernel_rr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NT" "gemm_small_kernel_nt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NC" "gemm_small_kernel_nc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RT" "gemm_small_kernel_rt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RC" "gemm_small_kernel_rc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TN" "gemm_small_kernel_tn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TR" "gemm_small_kernel_tr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CN" "gemm_small_kernel_cn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CR" "gemm_small_kernel_cr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TT" "gemm_small_kernel_tt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TC" "gemm_small_kernel_tc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CT" "gemm_small_kernel_ct" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CC" "gemm_small_kernel_cc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "DOUBLE")
endif ()
endif ()
if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE) if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${SGEMMKERNEL}" "" "gemm_kernel" false "" "" false "SINGLE") GenerateNamedObjects("${KERNELDIR}/${SGEMMKERNEL}" "" "gemm_kernel" false "" "" false "SINGLE")
if (SGEMMINCOPY) if (SGEMMINCOPY)
@ -825,7 +891,7 @@ endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type})
endforeach () endforeach ()
if (BUILD_DOUBLE AND NOT BUILD_SINGLE) if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "SINGLE") GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "SINGLE") GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "SINGLE") GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "SINGLE")
@ -849,6 +915,45 @@ endif ()
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false "SINGLE") GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false "SINGLE") GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false "SINGLE") GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false "SINGLE")
if (SMALL_MATRIX_OPT)
if (NOT DEFINED SGEMM_SMALL_M_PERMIT)
set(SGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_NN)
set(SGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_NT)
set(SGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_TN)
set(SGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_TT)
set(SGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_B0_NN)
set(SGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_B0_NT)
set(SGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_B0_TN)
set(SGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_B0_TT)
set(SGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
endif ()
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_NT}" "" "gemm_small_kernel_nt" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_TN}" "" "gemm_small_kernel_tn" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_TT}" "" "gemm_small_kernel_tt" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_NN}" "B0" "gemm_small_kernel_b0_nn" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_NT}" "B0" "gemm_small_kernel_b0_nt" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_TT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false "SINGLE")
endif ()
endif () endif ()
# Makefile.LA # Makefile.LA
@ -878,25 +983,25 @@ endif ()
endforeach() endforeach()
if (BUILD_COMPLEX AND NOT BUILD_SINGLE) if (BUILD_COMPLEX AND NOT BUILD_SINGLE)
if (NOT DEFINED SNEG_TCOPY) if (NOT DEFINED SNEG_TCOPY)
set(SNEG_TCOPY ../generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c) set(SNEG_TCOPY ../generic/neg_tcopy_${SGEMM_UNROLL_M}.c)
endif () endif ()
if (NOT DEFINED SLASWP_NCOPY) if (NOT DEFINED SLASWP_NCOPY)
set(SLASWP_NCOPY ../generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c) set(SLASWP_NCOPY ../generic/laswp_ncopy_${SGEMM_UNROLL_N}.c)
endif () endif ()
GenerateNamedObjects("${KERNELDIR}/${SNEG_TCOPY}" "" "neg_tcopy" false "" "" false "SINGLE") GenerateNamedObjects("${KERNELDIR}/${SNEG_TCOPY}" "" "neg_tcopy" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SLASWP_NCOPY}" "" "laswp_ncopy" false "" "" false "SINGLE") GenerateNamedObjects("${KERNELDIR}/${SLASWP_NCOPY}" "" "laswp_ncopy" false "" "" false "SINGLE")
endif() endif()
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
if (NOT DEFINED DNEG_TCOPY) if (NOT DEFINED DNEG_TCOPY)
set(DNEG_TCOPY ../generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c) set(DNEG_TCOPY ../generic/neg_tcopy_${DGEMM_UNROLL_M}.c)
endif () endif ()
if (NOT DEFINED DLASWP_NCOPY) if (NOT DEFINED DLASWP_NCOPY)
set(DLASWP_NCOPY ../generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c) set(DLASWP_NCOPY ../generic/laswp_ncopy_${DGEMM_UNROLL_N}.c)
endif () endif ()
GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" "" false "DOUBLE") GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}" "" "neg_tcopy" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" "" false "DOUBLE") GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}" "" "laswp_ncopy" false "" "" false "DOUBLE")
endif() endif()
endif() endif()
@ -979,10 +1084,117 @@ endif ()
endif () endif ()
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k" false "" "" false "DOUBLE")
if (DEFINED DMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k" false "" "" false "DOUBLE")
endif ()
if (DEFINED DMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "USE_MIN" "min_k" false "" "" false "DOUBLE")
endif ()
if (DEFINED IDMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "DOUBLE")
endif ()
if (DEFINED IDMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k" false "" "" false "DOUBLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMVNKERNEL}" "" "gemv_n" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false "DOUBLE")
if (DGEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "${DGEMMINCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
if (DGEMMITCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "${DGEMMITCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
if (DGEMMONCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "${DGEMMONCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
if (DGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE")
GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE") GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE")
GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE") GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE")
if (SMALL_MATRIX_OPT)
if (NOT DEFINED DGEMM_SMALL_M_PERMIT)
set(DGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_NN)
set(DGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_NT)
set(DGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_TN)
set(DGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_TT)
set(DGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_NN)
set(DGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_NT)
set(DGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_TN)
set(DGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_TT)
set(DGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
endif ()
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NN" "gemm_small_kernel_nn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NR" "gemm_small_kernel_nr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RN" "gemm_small_kernel_rn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RR" "gemm_small_kernel_rr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NT" "gemm_small_kernel_nt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NC" "gemm_small_kernel_nc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RT" "gemm_small_kernel_rt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RC" "gemm_small_kernel_rc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TN" "gemm_small_kernel_tn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TR" "gemm_small_kernel_tr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CN" "gemm_small_kernel_cn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CR" "gemm_small_kernel_cr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TT" "gemm_small_kernel_tt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TC" "gemm_small_kernel_tc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CT" "gemm_small_kernel_ct" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CC" "gemm_small_kernel_cc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "DOUBLE")
endif ()
endif () endif ()
if (BUILD_COMPLEX16 AND NOT BUILD_COMPLEX) if (BUILD_COMPLEX16 AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${SSCALKERNEL}" "" "scal_k" false "" "" false "SINGLE")
endif()
if (BUILD_COMPLEX160 AND NOT BUILD_COMPLEX)
GenerateNamedObjects("${KERNELDIR}/${CAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "COMPLEX") GenerateNamedObjects("${KERNELDIR}/${CAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "COMPLEX") GenerateNamedObjects("${KERNELDIR}/${CAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "COMPLEX")
if (DEFINED CMAXKERNEL) if (DEFINED CMAXKERNEL)
@ -1046,7 +1258,69 @@ endif ()
if (CGEMMOTCOPY) if (CGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${CGEMMOTCOPY}" "COMPLEX" "${CGEMMOTCOPYOBJ}" false "" "" true "COMPLEX") GenerateNamedObjects("${KERNELDIR}/${CGEMMOTCOPY}" "COMPLEX" "${CGEMMOTCOPYOBJ}" false "" "" true "COMPLEX")
endif () endif ()
GenerateNamedObjects("${KERNELDIR}/${CGEMM_BETA}" "" "gemm_beta" false "" "" false "COMPLEX") GenerateNamedObjects("${KERNELDIR}/${CGEMM_BETA}" "" "gemm_beta" false "" "" false "COMPLEX")
if (SMALL_MATRIX_OPT)
if (NOT DEFINED CGEMM_SMALL_M_PERMIT)
set(CGEMM_SMALL_M_PERMIT ../generic/zgemm_small_matrix_permit)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_NN)
set(CGEMM_SMALL_K_NN ../generic/zgemm_small_matrix_kernel_nn)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_NT)
set(CGEMM_SMALL_K_NT ../generic/zgemm_small_matrix_kernel_nt)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_TN)
set(CGEMM_SMALL_K_TN ../generic/zgemm_small_matrix_kernel_tn)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_TT)
set(CGEMM_SMALL_K_TT ../generic/zgemm_small_matrix_kernel_tt)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_B0_NN)
set(CGEMM_SMALL_K_B0_NN ../generic/zgemm_small_matrix_kernel_nn)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_B0_NT)
set(CGEMM_SMALL_K_B0_NT ../generic/zgemm_small_matrix_kernel_nt)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_B0_TN)
set(CGEMM_SMALL_K_B0_TN ../generic/zgemm_small_matrix_kernel_tn)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_B0_TT)
set(CGEMM_SMALL_K_B0_TT ../generic/zgemm_small_matrix_kernel_tt)
endif ()
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_M_PERMIT}.c" "" "gemm_small_matrix_permit" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "NN" "gemm_small_kernel_nn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "NR" "gemm_small_kernel_nr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "RN" "gemm_small_kernel_rn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "RR" "gemm_small_kernel_rr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "NT" "gemm_small_kernel_nt" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "NC" "gemm_small_kernel_nc" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "RT" "gemm_small_kernel_rt" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "RC" "gemm_small_kernel_rc" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "TN" "gemm_small_kernel_tn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "TR" "gemm_small_kernel_tr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "CN" "gemm_small_kernel_cn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "CR" "gemm_small_kernel_cr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "TT" "gemm_small_kernel_tt" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "TC" "gemm_small_kernel_tc" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "CT" "gemm_small_kernel_ct" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "CC" "gemm_small_kernel_cc" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "COMPLEX")
endif ()
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "COMPLEX") GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "COMPLEX") GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "COMPLEX") GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "COMPLEX")

View File

@ -23,7 +23,7 @@ ifeq ($(C_COMPILER), CLANG)
# Any clang posing as gcc 4.2 should be new enough (3.4 or later) # Any clang posing as gcc 4.2 should be new enough (3.4 or later)
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2) GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2)
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111)) ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
AVX2OPT = -mavx2 AVX2OPT = -mavx2 -mfma
endif endif
endif endif
ifdef NO_AVX2 ifdef NO_AVX2
@ -73,6 +73,8 @@ else ifeq ($(TARGET_CORE), SKYLAKEX)
endif endif
else ifeq ($(TARGET_CORE), HASWELL) else ifeq ($(TARGET_CORE), HASWELL)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
else ifeq ($(TARGET_CORE), ZEN)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(AVX2OPT)
else ifeq ($(TARGET_CORE), LOONGSON3R4) else ifeq ($(TARGET_CORE), LOONGSON3R4)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) $(MSA_FLAGS)
else else

View File

@ -207,9 +207,12 @@ ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" ""
SBLASOBJS += \ SBLASOBJS += \
sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX) sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX)
endif endif
ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE))" ""
SBLASOBJS += \
ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_SINGLE),1) ifeq ($(BUILD_SINGLE),1)
SBLASOBJS += \ SBLASOBJS += \
ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX) \
sger_k$(TSUFFIX).$(SUFFIX) sger_k$(TSUFFIX).$(SUFFIX)
endif endif
ifeq ($(BUILD_DOUBLE),1) ifeq ($(BUILD_DOUBLE),1)
@ -359,8 +362,7 @@ $(KDIR)xgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNE
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ $(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@
ifeq ($(BUILD_SINGLE),1) ifneq "$(or (BUILD_SINGLE),$(BUILD_DOUBLE))" ""
$(KDIR)ssymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_U_KERNEL) $(SSYMV_U_PARAM) $(KDIR)ssymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_U_KERNEL) $(SSYMV_U_PARAM)
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $@ $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $@

View File

@ -38,7 +38,7 @@
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
#include "version.h"
#define N $16 #define N $16
#define X $17 #define X $17

View File

@ -38,7 +38,7 @@
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
#include "version.h"
#define PREFETCHSIZE 88 #define PREFETCHSIZE 88

View File

@ -38,7 +38,7 @@
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
#include "version.h"
#define PREFETCHSIZE 40 #define PREFETCHSIZE 40

View File

@ -38,7 +38,7 @@
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
#include "version.h"
.set noat .set noat
.set noreorder .set noreorder

View File

@ -39,7 +39,7 @@
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
#include "version.h"
#define PREFETCH_SIZE 80 #define PREFETCH_SIZE 80

View File

@ -38,7 +38,7 @@
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
#include "version.h"
#define N $16 #define N $16
#define X $17 #define X $17

View File

@ -42,7 +42,7 @@
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
#include "version.h"
.globl NAME .globl NAME
.ent NAME .ent NAME

View File

@ -39,7 +39,7 @@
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
#include "version.h"
#define PREFETCH_SIZE 80 #define PREFETCH_SIZE 80

View File

@ -38,7 +38,7 @@
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
#include "version.h"
#define PREFETCHSIZE 88 #define PREFETCHSIZE 88

View File

@ -38,7 +38,7 @@
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
#include "version.h"
.set noat .set noat
.set noreorder .set noreorder

View File

@ -38,7 +38,7 @@
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
#include "version.h"
#if !defined(EV4) && !defined(EV5) && !defined(EV6) #if !defined(EV4) && !defined(EV5) && !defined(EV6)
#error "Architecture is not specified." #error "Architecture is not specified."

View File

@ -38,7 +38,7 @@
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
#include "version.h"
#define STACKSIZE 64 #define STACKSIZE 64
#define PREFETCHSIZE 32 #define PREFETCHSIZE 32

View File

@ -38,7 +38,7 @@
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
#include "version.h"
#define STACKSIZE 64 #define STACKSIZE 64
#define PREFETCHSIZE 32 #define PREFETCHSIZE 32

Some files were not shown because too many files have changed in this diff Show More