diff --git a/.github/workflows/loongarch64.yml b/.github/workflows/loongarch64.yml index b23c81f96..da7f6c9a0 100644 --- a/.github/workflows/loongarch64.yml +++ b/.github/workflows/loongarch64.yml @@ -33,10 +33,8 @@ jobs: - name: Install APT deps run: | - sudo add-apt-repository ppa:savoury1/virtualisation sudo apt-get update - sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \ - qemu-user-static + sudo apt-get install autoconf automake autotools-dev ninja-build make ccache - name: Download and install loongarch64-toolchain run: | @@ -44,6 +42,20 @@ jobs: #wget https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz tar -xf CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz -C /opt + - name: Checkout qemu + uses: actions/checkout@v3 + with: + repository: qemu/qemu + path: qemu + ref: master + + - name: Install qemu + run: | + cd qemu + ./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=loongarch64-linux-user --disable-system --static + make -j$(nproc) + make install + - name: Set env run: | echo "LD_LIBRARY_PATH=/opt/cross-tools/target/usr/lib64:/opt/cross-tools/loongarch64-unknown-linux-gnu/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV @@ -76,45 +88,46 @@ jobs: - name: Test run: | - qemu-loongarch64-static ./utest/openblas_utest - qemu-loongarch64-static ./utest/openblas_utest_ext - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1 + export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH + qemu-loongarch64 ./utest/openblas_utest + qemu-loongarch64 ./utest/openblas_utest_ext + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat2 < ./ctest/sin2 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat2 < ./ctest/din2 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat2 < ./ctest/cin2 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat2 < ./ctest/zin2 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat3 < ./ctest/sin3 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat3 < ./ctest/din3 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat3 < ./ctest/cin3 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat3 < ./ctest/zin3 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat1 rm -f ./test/?BLAT2.SUMM - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat rm -f ./test/?BLAT2.SUMM - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat rm -f ./test/?BLAT3.SUMM - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat rm -f ./test/?BLAT3.SUMM - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat diff --git a/.github/workflows/loongarch64_clang.yml b/.github/workflows/loongarch64_clang.yml index b32e4d731..d08e56f62 100644 --- a/.github/workflows/loongarch64_clang.yml +++ b/.github/workflows/loongarch64_clang.yml @@ -34,18 +34,30 @@ jobs: - name: Install APT deps run: | - sudo add-apt-repository ppa:savoury1/virtualisation sudo apt-get update - sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \ - qemu-user-static + sudo apt-get install autoconf automake autotools-dev ninja-build make ccache - name: Download and install loongarch64-toolchain run: | - wget http://ftp.loongnix.cn/toolchain/llvm/llvm8/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz - wget http://ftp.loongnix.cn/toolchain/gcc/release/loongarch/gcc8/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz + wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz + wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz tar -xf clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz -C /opt tar -xf loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz -C /opt + - name: Checkout qemu + uses: actions/checkout@v3 + with: + repository: qemu/qemu + path: qemu + ref: master + + - name: Install qemu + run: | + cd qemu + ./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=loongarch64-linux-user --disable-system --static + make -j$(nproc) + make install + - name: Set env run: | echo "PATH=$GITHUB_WORKSPACE:/opt/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10/bin:/opt/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3/bin:$PATH" >> $GITHUB_ENV @@ -77,46 +89,47 @@ jobs: - name: Test run: | - qemu-loongarch64-static ./utest/openblas_utest - qemu-loongarch64-static ./utest/openblas_utest_ext - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1 + export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH + qemu-loongarch64 ./utest/openblas_utest + qemu-loongarch64 ./utest/openblas_utest_ext + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat2 < ./ctest/sin2 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat2 < ./ctest/din2 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat2 < ./ctest/cin2 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat2 < ./ctest/zin2 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat3 < ./ctest/sin3 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat3 < ./ctest/din3 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat3 < ./ctest/cin3 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat3 < ./ctest/zin3 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat1 + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat1 rm -f ./test/?BLAT2.SUMM - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat rm -f ./test/?BLAT2.SUMM - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat rm -f ./test/?BLAT3.SUMM - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat rm -f ./test/?BLAT3.SUMM - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat + OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat diff --git a/cmake/cc.cmake b/cmake/cc.cmake index 9dce9ea35..775239e1c 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -6,6 +6,7 @@ include(CheckCCompilerFlag) if (${CMAKE_C_COMPILER_ID} MATCHES "IntelLLVM") set(CCOMMON_OPT "${CCOMMON_OPT} -fp-model=consistent") + set(GCC_VERSION 100) endif () if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB" OR ${CMAKE_C_COMPILER_ID} MATCHES "Clang") diff --git a/cmake/fc.cmake b/cmake/fc.cmake index 69246385c..8798ce8b4 100644 --- a/cmake/fc.cmake +++ b/cmake/fc.cmake @@ -117,12 +117,12 @@ if (${F_COMPILER} STREQUAL "GFORTRAN" OR ${F_COMPILER} STREQUAL "F95" OR CMAKE_F endif () endif () -if (${F_COMPILER} STREQUAL "INTEL") +if (${F_COMPILER} STREQUAL "INTEL" OR CMAKE_Fortran_COMPILER_ID MATCHES "Intel") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL") if (INTERFACE64) set(FCOMMON_OPT "${FCOMMON_OPT} -i8") endif () - set(FCOMMON_OPT "${FCOMMON_OPT} -recursive") + set(FCOMMON_OPT "${FCOMMON_OPT} -recursive -fp-model=consistent") if (USE_OPENMP) set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") endif () diff --git a/cpuid_x86.c b/cpuid_x86.c index 40735c563..e157232cf 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -2525,6 +2525,7 @@ int get_coretype(void){ case 0x7: switch (exmodel) { case 5: + case 6: if (support_avx2()) return CORE_ZEN; else diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index 9fec7afca..ddb39abd6 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -570,6 +570,8 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock); #else static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER; + static pthread_cond_t level3_wakeup = PTHREAD_COND_INITIALIZER; + volatile static BLASLONG CPU_AVAILABLE = MAX_CPU_NUMBER; #endif blas_arg_t newarg; @@ -639,6 +641,12 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG EnterCriticalSection((PCRITICAL_SECTION)&level3_lock); #else pthread_mutex_lock(&level3_lock); + while(CPU_AVAILABLE < nthreads) { + pthread_cond_wait(&level3_wakeup, &level3_lock); + } + CPU_AVAILABLE -= nthreads; + WMB; + pthread_mutex_unlock(&level3_lock); #endif #ifdef USE_ALLOC_HEAP @@ -783,6 +791,10 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG #elif defined(OS_WINDOWS) LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock); #else + pthread_mutex_lock(&level3_lock); + CPU_AVAILABLE += nthreads; + WMB; + pthread_cond_signal(&level3_wakeup); pthread_mutex_unlock(&level3_lock); #endif diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index e3f905265..1f7142009 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -927,6 +927,7 @@ static gotoblas_t *get_coretype(void){ case 0x7: switch (exmodel) { case 5: + case 6: if (support_avx2()) return &gotoblas_ZEN; else diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 05b893b8c..449072bae 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -137,7 +137,7 @@ endif () foreach (float_type ${FLOAT_TYPES}) if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") - GenerateNamedObjects("zaxpy.c" "" "axpyc" ${CBLAS_FLAG} "" "" false ${float_type}) + GenerateNamedObjects("zaxpy.c" "CONJ" "axpyc" ${CBLAS_FLAG} "" "" false ${float_type}) GenerateNamedObjects("zger.c" "" "geru" ${CBLAS_FLAG} "" "" false ${float_type}) GenerateNamedObjects("zger.c" "CONJ" "gerc" ${CBLAS_FLAG} "" "" false ${float_type}) diff --git a/kernel/arm64/dot_kernel_sve.c b/kernel/arm64/dot_kernel_sve.c index 16f4cd537..bc9975214 100644 --- a/kernel/arm64/dot_kernel_sve.c +++ b/kernel/arm64/dot_kernel_sve.c @@ -108,7 +108,12 @@ dot_kernel_sve(BLASLONG n, FLOAT* x, FLOAT* y) [N_] "r" (n), [X_] "r" (x), [Y_] "r" (y) - :); + : "cc", + "memory", + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "d1", + "z0", "z1" + ); return ret; } diff --git a/kernel/arm64/zdot_thunderx2t99.c b/kernel/arm64/zdot_thunderx2t99.c index 6f65e5cfd..d48392412 100644 --- a/kernel/arm64/zdot_thunderx2t99.c +++ b/kernel/arm64/zdot_thunderx2t99.c @@ -292,7 +292,10 @@ static void zdot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLON : "cc", "memory", "x0", "x1", "x2", "x3", "x4", "x5", - "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", + "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "v30", "v31" ); cf=OPENBLAS_MAKE_COMPLEX_FLOAT(dotr, doti); diff --git a/kernel/x86_64/tobf16.c b/kernel/x86_64/tobf16.c index a88fdcc2e..e8e228955 100644 --- a/kernel/x86_64/tobf16.c +++ b/kernel/x86_64/tobf16.c @@ -144,10 +144,11 @@ void CNAME(BLASLONG n, FLOAT_TYPE * in, BLASLONG inc_in, bfloat16 * out, BLASLON if (inc_in == 0 || inc_out == 0 || n <= 100000) { nthreads = 1; } else { + nthreads = num_cpu_avail(1); if (n/100000 < 100) { - nthreads = 4; - } else { - nthreads = 16; + nthreads = MAX(nthreads,4); +// } else { +// nthreads = MAX(nthreads,16); } } diff --git a/lapack-netlib/TESTING/EIG/ddrvst.f b/lapack-netlib/TESTING/EIG/ddrvst.f index 805fd8271..311684238 100644 --- a/lapack-netlib/TESTING/EIG/ddrvst.f +++ b/lapack-netlib/TESTING/EIG/ddrvst.f @@ -2772,7 +2772,7 @@ c LIWEDC = 12 RESULT( NTEST ) = ULPINV RESULT( NTEST+1 ) = ULPINV RESULT( NTEST+2 ) = ULPINV - GO TO 700 + GO TO 1750 END IF END IF * @@ -2797,13 +2797,13 @@ c LIWEDC = 12 RETURN ELSE RESULT( NTEST ) = ULPINV - GO TO 700 + GO TO 1750 END IF END IF * IF( M3.EQ.0 .AND. N.GT.0 ) THEN RESULT( NTEST ) = ULPINV - GO TO 700 + GO TO 1750 END IF * * Do test 78 (or +54) @@ -2819,6 +2819,8 @@ c LIWEDC = 12 $ MAX( UNFL, TEMP3*ULP ) * CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) +* + 1750 CONTINUE * 1720 CONTINUE * diff --git a/lapack-netlib/TESTING/EIG/dlahd2.f b/lapack-netlib/TESTING/EIG/dlahd2.f index a6c65635f..cccbd2537 100644 --- a/lapack-netlib/TESTING/EIG/dlahd2.f +++ b/lapack-netlib/TESTING/EIG/dlahd2.f @@ -534,8 +534,8 @@ $ / ' 2: norm( I - Q'' Q ) / ( m ulp )', $ / ' 3: norm( I - PT PT'' ) / ( n ulp )', $ / ' 4: norm( Y - Q'' C ) / ( norm(Y) max(m,nrhs) ulp )' ) - 9968 FORMAT( / ' Tests performed: See sdrvst.f' ) - 9967 FORMAT( / ' Tests performed: See cdrvst.f' ) + 9968 FORMAT( / ' Tests performed: See ddrvst.f' ) + 9967 FORMAT( / ' Tests performed: See zdrvst.f' ) * * End of DLAHD2 * diff --git a/lapack-netlib/TESTING/EIG/sdrvst.f b/lapack-netlib/TESTING/EIG/sdrvst.f index be6d33cee..2d02f54ed 100644 --- a/lapack-netlib/TESTING/EIG/sdrvst.f +++ b/lapack-netlib/TESTING/EIG/sdrvst.f @@ -2772,7 +2772,7 @@ c LIWEDC = 12 RESULT( NTEST ) = ULPINV RESULT( NTEST+1 ) = ULPINV RESULT( NTEST+2 ) = ULPINV - GO TO 700 + GO TO 1750 END IF END IF * @@ -2797,13 +2797,13 @@ c LIWEDC = 12 RETURN ELSE RESULT( NTEST ) = ULPINV - GO TO 700 + GO TO 1750 END IF END IF * IF( M3.EQ.0 .AND. N.GT.0 ) THEN RESULT( NTEST ) = ULPINV - GO TO 700 + GO TO 1750 END IF * * Do test 78 (or +54) @@ -2819,6 +2819,8 @@ c LIWEDC = 12 $ MAX( UNFL, TEMP3*ULP ) * CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) +* + 1750 CONTINUE * 1720 CONTINUE * diff --git a/utest/test_fork.c b/utest/test_fork.c index 558026031..44a619349 100644 --- a/utest/test_fork.c +++ b/utest/test_fork.c @@ -33,6 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include "openblas_utest.h" @@ -41,7 +42,7 @@ static void* xmalloc(size_t n) void* tmp; tmp = malloc(n); if (tmp == NULL) { - fprintf(stderr, "You are about to die\n"); + fprintf(stderr, "Failed to allocate memory for the testcase.\n"); exit(1); } else { return tmp; @@ -103,6 +104,7 @@ exit(0); fork_pid = fork(); if (fork_pid == -1) { + perror("fork"); CTEST_ERR("Failed to fork process."); } else if (fork_pid == 0) { // Compute a DGEMM product in the child process to check that the @@ -113,7 +115,8 @@ exit(0); // recursively fork_pid_nested = fork(); if (fork_pid_nested == -1) { - CTEST_ERR("Failed to fork process."); + perror("fork"); + CTEST_ERR("Failed to fork nested process."); exit(1); } else if (fork_pid_nested == 0) { check_dgemm(a, b, d, c, n); diff --git a/utest/test_post_fork.c b/utest/test_post_fork.c index 6d640aebb..d6e87f2ba 100644 --- a/utest/test_post_fork.c +++ b/utest/test_post_fork.c @@ -33,6 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #ifdef USE_OPENMP #include @@ -44,7 +45,7 @@ static void* xmalloc(size_t n) void* tmp; tmp = malloc(n); if (tmp == NULL) { - fprintf(stderr, "You are about to die\n"); + fprintf(stderr, "Failed to allocate memory for the test payload.\n"); exit(1); } else { return tmp; @@ -114,7 +115,11 @@ exit(0); fork_pid = fork(); if (fork_pid == -1) { - CTEST_ERR("Failed to fork process."); + perror("fork"); + CTEST_ERR("Failed to fork subprocesses in a loop."); +#ifdef USE_OPENMP + CTEST_ERR("Number of OpenMP threads was %d in this attempt.",i); +#endif } else if (fork_pid == 0) { // Just pretend to do something, e.g. call `uname`, then exit exit(0);