Merge branch 'OpenMathLib:develop' into issue4728

This commit is contained in:
Martin Kroeker 2024-06-21 09:35:56 +02:00 committed by GitHub
commit a2ee4b1966
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 166 additions and 104 deletions

View File

@ -33,10 +33,8 @@ jobs:
- name: Install APT deps
run: |
sudo add-apt-repository ppa:savoury1/virtualisation
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
qemu-user-static
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache
- name: Download and install loongarch64-toolchain
run: |
@ -44,6 +42,20 @@ jobs:
#wget https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz
tar -xf CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz -C /opt
- name: Checkout qemu
uses: actions/checkout@v3
with:
repository: qemu/qemu
path: qemu
ref: master
- name: Install qemu
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=loongarch64-linux-user --disable-system --static
make -j$(nproc)
make install
- name: Set env
run: |
echo "LD_LIBRARY_PATH=/opt/cross-tools/target/usr/lib64:/opt/cross-tools/loongarch64-unknown-linux-gnu/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
@ -76,45 +88,46 @@ jobs:
- name: Test
run: |
qemu-loongarch64-static ./utest/openblas_utest
qemu-loongarch64-static ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
qemu-loongarch64 ./utest/openblas_utest
qemu-loongarch64 ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat

View File

@ -34,18 +34,30 @@ jobs:
- name: Install APT deps
run: |
sudo add-apt-repository ppa:savoury1/virtualisation
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
qemu-user-static
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache
- name: Download and install loongarch64-toolchain
run: |
wget http://ftp.loongnix.cn/toolchain/llvm/llvm8/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz
wget http://ftp.loongnix.cn/toolchain/gcc/release/loongarch/gcc8/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz
wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz
wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz
tar -xf clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz -C /opt
tar -xf loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz -C /opt
- name: Checkout qemu
uses: actions/checkout@v3
with:
repository: qemu/qemu
path: qemu
ref: master
- name: Install qemu
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=loongarch64-linux-user --disable-system --static
make -j$(nproc)
make install
- name: Set env
run: |
echo "PATH=$GITHUB_WORKSPACE:/opt/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10/bin:/opt/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3/bin:$PATH" >> $GITHUB_ENV
@ -77,46 +89,47 @@ jobs:
- name: Test
run: |
qemu-loongarch64-static ./utest/openblas_utest
qemu-loongarch64-static ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
qemu-loongarch64 ./utest/openblas_utest
qemu-loongarch64 ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat

View File

@ -6,6 +6,7 @@ include(CheckCCompilerFlag)
if (${CMAKE_C_COMPILER_ID} MATCHES "IntelLLVM")
set(CCOMMON_OPT "${CCOMMON_OPT} -fp-model=consistent")
set(GCC_VERSION 100)
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB" OR ${CMAKE_C_COMPILER_ID} MATCHES "Clang")

View File

@ -117,12 +117,12 @@ if (${F_COMPILER} STREQUAL "GFORTRAN" OR ${F_COMPILER} STREQUAL "F95" OR CMAKE_F
endif ()
endif ()
if (${F_COMPILER} STREQUAL "INTEL")
if (${F_COMPILER} STREQUAL "INTEL" OR CMAKE_Fortran_COMPILER_ID MATCHES "Intel")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL")
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -recursive")
set(FCOMMON_OPT "${FCOMMON_OPT} -recursive -fp-model=consistent")
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()

View File

@ -2525,6 +2525,7 @@ int get_coretype(void){
case 0x7:
switch (exmodel) {
case 5:
case 6:
if (support_avx2())
return CORE_ZEN;
else

View File

@ -570,6 +570,8 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t level3_wakeup = PTHREAD_COND_INITIALIZER;
volatile static BLASLONG CPU_AVAILABLE = MAX_CPU_NUMBER;
#endif
blas_arg_t newarg;
@ -639,6 +641,12 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
pthread_mutex_lock(&level3_lock);
while(CPU_AVAILABLE < nthreads) {
pthread_cond_wait(&level3_wakeup, &level3_lock);
}
CPU_AVAILABLE -= nthreads;
WMB;
pthread_mutex_unlock(&level3_lock);
#endif
#ifdef USE_ALLOC_HEAP
@ -783,6 +791,10 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
#elif defined(OS_WINDOWS)
LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
pthread_mutex_lock(&level3_lock);
CPU_AVAILABLE += nthreads;
WMB;
pthread_cond_signal(&level3_wakeup);
pthread_mutex_unlock(&level3_lock);
#endif

View File

@ -927,6 +927,7 @@ static gotoblas_t *get_coretype(void){
case 0x7:
switch (exmodel) {
case 5:
case 6:
if (support_avx2())
return &gotoblas_ZEN;
else

View File

@ -137,7 +137,7 @@ endif ()
foreach (float_type ${FLOAT_TYPES})
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("zaxpy.c" "" "axpyc" ${CBLAS_FLAG} "" "" false ${float_type})
GenerateNamedObjects("zaxpy.c" "CONJ" "axpyc" ${CBLAS_FLAG} "" "" false ${float_type})
GenerateNamedObjects("zger.c" "" "geru" ${CBLAS_FLAG} "" "" false ${float_type})
GenerateNamedObjects("zger.c" "CONJ" "gerc" ${CBLAS_FLAG} "" "" false ${float_type})

View File

@ -108,7 +108,12 @@ dot_kernel_sve(BLASLONG n, FLOAT* x, FLOAT* y)
[N_] "r" (n),
[X_] "r" (x),
[Y_] "r" (y)
:);
: "cc",
"memory",
"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
"x8", "x9", "x10", "x11", "x12", "x13", "d1",
"z0", "z1"
);
return ret;
}

View File

@ -292,7 +292,10 @@ static void zdot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLON
: "cc",
"memory",
"x0", "x1", "x2", "x3", "x4", "x5",
"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
"v16", "v17", "v18", "v19", "v20", "v21", "v22",
"v23", "v24", "v25", "v26", "v27", "v28", "v29",
"v30", "v31"
);
cf=OPENBLAS_MAKE_COMPLEX_FLOAT(dotr, doti);

View File

@ -144,10 +144,11 @@ void CNAME(BLASLONG n, FLOAT_TYPE * in, BLASLONG inc_in, bfloat16 * out, BLASLON
if (inc_in == 0 || inc_out == 0 || n <= 100000) {
nthreads = 1;
} else {
nthreads = num_cpu_avail(1);
if (n/100000 < 100) {
nthreads = 4;
} else {
nthreads = 16;
nthreads = MAX(nthreads,4);
// } else {
// nthreads = MAX(nthreads,16);
}
}

View File

@ -2772,7 +2772,7 @@ c LIWEDC = 12
RESULT( NTEST ) = ULPINV
RESULT( NTEST+1 ) = ULPINV
RESULT( NTEST+2 ) = ULPINV
GO TO 700
GO TO 1750
END IF
END IF
*
@ -2797,13 +2797,13 @@ c LIWEDC = 12
RETURN
ELSE
RESULT( NTEST ) = ULPINV
GO TO 700
GO TO 1750
END IF
END IF
*
IF( M3.EQ.0 .AND. N.GT.0 ) THEN
RESULT( NTEST ) = ULPINV
GO TO 700
GO TO 1750
END IF
*
* Do test 78 (or +54)
@ -2819,6 +2819,8 @@ c LIWEDC = 12
$ MAX( UNFL, TEMP3*ULP )
*
CALL DLACPY( ' ', N, N, V, LDU, A, LDA )
*
1750 CONTINUE
*
1720 CONTINUE
*

View File

@ -534,8 +534,8 @@
$ / ' 2: norm( I - Q'' Q ) / ( m ulp )',
$ / ' 3: norm( I - PT PT'' ) / ( n ulp )',
$ / ' 4: norm( Y - Q'' C ) / ( norm(Y) max(m,nrhs) ulp )' )
9968 FORMAT( / ' Tests performed: See sdrvst.f' )
9967 FORMAT( / ' Tests performed: See cdrvst.f' )
9968 FORMAT( / ' Tests performed: See ddrvst.f' )
9967 FORMAT( / ' Tests performed: See zdrvst.f' )
*
* End of DLAHD2
*

View File

@ -2772,7 +2772,7 @@ c LIWEDC = 12
RESULT( NTEST ) = ULPINV
RESULT( NTEST+1 ) = ULPINV
RESULT( NTEST+2 ) = ULPINV
GO TO 700
GO TO 1750
END IF
END IF
*
@ -2797,13 +2797,13 @@ c LIWEDC = 12
RETURN
ELSE
RESULT( NTEST ) = ULPINV
GO TO 700
GO TO 1750
END IF
END IF
*
IF( M3.EQ.0 .AND. N.GT.0 ) THEN
RESULT( NTEST ) = ULPINV
GO TO 700
GO TO 1750
END IF
*
* Do test 78 (or +54)
@ -2819,6 +2819,8 @@ c LIWEDC = 12
$ MAX( UNFL, TEMP3*ULP )
*
CALL SLACPY( ' ', N, N, V, LDU, A, LDA )
*
1750 CONTINUE
*
1720 CONTINUE
*

View File

@ -33,6 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <sys/types.h>
#include <sys/wait.h>
#include <errno.h>
#include <cblas.h>
#include "openblas_utest.h"
@ -41,7 +42,7 @@ static void* xmalloc(size_t n)
void* tmp;
tmp = malloc(n);
if (tmp == NULL) {
fprintf(stderr, "You are about to die\n");
fprintf(stderr, "Failed to allocate memory for the testcase.\n");
exit(1);
} else {
return tmp;
@ -103,6 +104,7 @@ exit(0);
fork_pid = fork();
if (fork_pid == -1) {
perror("fork");
CTEST_ERR("Failed to fork process.");
} else if (fork_pid == 0) {
// Compute a DGEMM product in the child process to check that the
@ -113,7 +115,8 @@ exit(0);
// recursively
fork_pid_nested = fork();
if (fork_pid_nested == -1) {
CTEST_ERR("Failed to fork process.");
perror("fork");
CTEST_ERR("Failed to fork nested process.");
exit(1);
} else if (fork_pid_nested == 0) {
check_dgemm(a, b, d, c, n);

View File

@ -33,6 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <sys/types.h>
#include <sys/wait.h>
#include <errno.h>
#include <cblas.h>
#ifdef USE_OPENMP
#include <omp.h>
@ -44,7 +45,7 @@ static void* xmalloc(size_t n)
void* tmp;
tmp = malloc(n);
if (tmp == NULL) {
fprintf(stderr, "You are about to die\n");
fprintf(stderr, "Failed to allocate memory for the test payload.\n");
exit(1);
} else {
return tmp;
@ -114,7 +115,11 @@ exit(0);
fork_pid = fork();
if (fork_pid == -1) {
CTEST_ERR("Failed to fork process.");
perror("fork");
CTEST_ERR("Failed to fork subprocesses in a loop.");
#ifdef USE_OPENMP
CTEST_ERR("Number of OpenMP threads was %d in this attempt.",i);
#endif
} else if (fork_pid == 0) {
// Just pretend to do something, e.g. call `uname`, then exit
exit(0);