From 19d4f90c44c3a5bd244c188997fa5a5c57c6fcc0 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Sun, 31 Jul 2022 18:17:07 +0100 Subject: [PATCH 01/19] Use auvx to detect CPUCFG on mips/loongson It's safer and easier than SIGILL. Signed-off-by: Jiaxun Yang --- driver/others/dynamic_mips64.c | 80 +++++++++------------------------- 1 file changed, 21 insertions(+), 59 deletions(-) diff --git a/driver/others/dynamic_mips64.c b/driver/others/dynamic_mips64.c index 9fd19d739..d50b2dc27 100644 --- a/driver/others/dynamic_mips64.c +++ b/driver/others/dynamic_mips64.c @@ -38,6 +38,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "common.h" +#if (defined OS_LINUX || defined OS_ANDROID) +#include +#include + +#ifndef HWCAP_LOONGSON_CPUCFG +#define HWCAP_LOONGSON_CPUCFG (1 << 14) +#endif +#endif + extern gotoblas_t gotoblas_LOONGSON3R3; extern gotoblas_t gotoblas_LOONGSON3R4; @@ -81,59 +90,10 @@ static gotoblas_t *force_coretype(char *coretype) { return NULL; } +#if (defined OS_LINUX || defined OS_ANDROID) #define MMI_MASK 0x00000010 #define MSA_MASK 0x00000020 -int fd[2]; -int support_cpucfg; - -static void handler(int signum) -{ - close(fd[1]); - exit(1); -} - -/* Brief : Function to check if cpucfg supported on loongson - * Return: 1 supported - * 0 not supported - */ -static int cpucfg_test(void) { - pid_t pid; - int status = 0; - - support_cpucfg = 0; - pipe(fd); - pid = fork(); - if (pid == 0) { /* Subprocess */ - struct sigaction act; - close(fd[0]); - /* Set signal action for SIGILL. */ - act.sa_handler = handler; - sigaction(SIGILL,&act,NULL); - - /* Execute cpucfg in subprocess. */ - __asm__ volatile( - ".insn \n\t" - ".word (0xc8080118) \n\t" - ::: - ); - support_cpucfg = 1; - write(fd[1],&support_cpucfg,sizeof(support_cpucfg)); - close(fd[1]); - exit(0); - } else if (pid > 0){ /* Parent process*/ - close(fd[1]); - if ((waitpid(pid,&status,0) <= 0) || - (read(fd[0],&support_cpucfg,sizeof(support_cpucfg)) <= 0)) - support_cpucfg = 0; - close(fd[0]); - } else { - support_cpucfg = 0; - } - - return support_cpucfg; -} - static gotoblas_t *get_coretype_from_cpucfg(void) { int flag = 0; __asm__ volatile( @@ -153,7 +113,7 @@ static gotoblas_t *get_coretype_from_cpucfg(void) { } static gotoblas_t *get_coretype_from_cpuinfo(void) { -#ifdef linux +#ifdef __linux FILE *infile; char buffer[512], *p; @@ -176,17 +136,19 @@ static gotoblas_t *get_coretype_from_cpuinfo(void) { return NULL; } #endif - return NULL; + return NULL; } +#endif static gotoblas_t *get_coretype(void) { - int ret = 0; - - ret = cpucfg_test(); - if (ret == 1) - return get_coretype_from_cpucfg(); - else - return get_coretype_from_cpuinfo(); +#if (!defined OS_LINUX && !defined OS_ANDROID) + return NULL; +#else + if (!(getauxval(AT_HWCAP) & HWCAP_LOONGSON_CPUCFG)) + return get_coretype_from_cpucfg(); + else + return get_coretype_from_cpuinfo(); +#endif } void gotoblas_dynamic_init(void) { From b633eb79f23f921ed4f22ca7cd49e65afb378248 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Sun, 31 Jul 2022 18:42:51 +0100 Subject: [PATCH 02/19] Use $at as temporary register for mips/loongson CPUCFG read Some compilers (namely LLVM) are not happy with clobbering registers in inline assembly. Use $at as temporary register and explicitly use noat hint. Signed-off-by: Jiaxun Yang --- driver/others/dynamic_mips64.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/driver/others/dynamic_mips64.c b/driver/others/dynamic_mips64.c index d50b2dc27..d80e81356 100644 --- a/driver/others/dynamic_mips64.c +++ b/driver/others/dynamic_mips64.c @@ -97,13 +97,16 @@ static gotoblas_t *force_coretype(char *coretype) { static gotoblas_t *get_coretype_from_cpucfg(void) { int flag = 0; __asm__ volatile( + ".set push \n\t" + ".set noat \n\t" ".insn \n\t" - "dli $8, 0x01 \n\t" - ".word (0xc9084918) \n\t" - "usw $9, 0x00(%0) \n\t" + "dli $1, 0x01 \n\t" + ".word (0xc8080118) \n\t" + "move %0, $1 \n\t" + ".set pop \n\t" + : "=r"(flag) + : : - : "r"(&flag) - : "memory" ); if (flag & MSA_MASK) return (&gotoblas_LOONGSON3R4); From 38e9c969c4bb59387ba900d47f834eb8aa52282c Mon Sep 17 00:00:00 2001 From: haanhvu Date: Wed, 10 Aug 2022 16:28:50 +0700 Subject: [PATCH 03/19] Clarify the installation guide in the output of make --- Makefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Makefile b/Makefile index 967ab1bb6..289f0eca5 100644 --- a/Makefile +++ b/Makefile @@ -110,6 +110,10 @@ ifeq ($(OSNAME), Darwin) @echo "\"make PREFIX=/your_installation_path/ install\"." @echo @echo "(or set PREFIX in Makefile.rule and run make install." + @echo + @echo "Note that any flags passed to make during build should also be passed to make install" + @echo "to circumvent any install errors." + @echo @echo "If you want to move the .dylib to a new location later, make sure you change" @echo "the internal name of the dylib with:" @echo @@ -118,6 +122,9 @@ endif @echo @echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"." @echo + @echo "Note that any flags passed to make during build should also be passed to make install" + @echo "to circumvent any install errors." + @echo shared : ifneq ($(NO_SHARED), 1) From aaaecdbf193964c7e378feadef04b207bfe30b71 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Thu, 11 Aug 2022 14:45:27 +0100 Subject: [PATCH 04/19] Allow CC to contain space Signed-off-by: Jiaxun Yang --- Makefile.prebuild | 8 ++++---- c_check | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile.prebuild b/Makefile.prebuild index c59e9049c..5dd7dfa4e 100644 --- a/Makefile.prebuild +++ b/Makefile.prebuild @@ -60,9 +60,9 @@ all: getarch_2nd ./getarch_2nd 1 >> $(TARGET_CONF) $(TARGET_CONF): c_check$(SCRIPTSUFFIX) f_check$(SCRIPTSUFFIX) getarch - ./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) $(CFLAGS) + ./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(CC)" "$(TARGET_FLAGS) $(CFLAGS)" ifneq ($(ONLY_CBLAS), 1) - ./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS) + ./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(FC)" "$(TARGET_FLAGS)" else #When we only build CBLAS, we set NOFORTRAN=2 echo "NOFORTRAN=2" >> $(TARGET_MAKE) @@ -77,8 +77,8 @@ endif getarch : getarch.c cpuid.S dummy $(CPUIDEMU) - avx512=$$(./c_check$(SCRIPTSUFFIX) - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \ - rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_RV64GV); \ + avx512=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" "$(TARGET_FLAGS) $(CFLAGS)" | grep NO_AVX512); \ + rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" "$(TARGET_FLAGS) $(CFLAGS)" | grep NO_RV64GV); \ $(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} $${rv64gv:+-D$${rv64gv}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU) getarch_2nd : getarch_2nd.c $(TARGET_CONF) dummy diff --git a/c_check b/c_check index 01d4f4a7c..d2bd47524 100755 --- a/c_check +++ b/c_check @@ -31,8 +31,8 @@ flags="$*" cross_suffix="" -if [ "`dirname $compiler_name`" != '.' ]; then - cross_suffix="$cross_suffix`dirname $compiler_name`/" +if [ "`dirname \"$compiler_name\"`" != '.' ]; then + cross_suffix="$cross_suffix`dirname \"$compiler_name\"`/" fi bn=`basename $compiler_name` From 1569a43f7e610fca5d551793e5c06fe5b81aee62 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Mon, 8 Aug 2022 20:14:05 +0100 Subject: [PATCH 05/19] GitHub Actions: Add cross compile tests Add cross compile tests without running checks. Currently only mips64el, riscv64, mipsel, alpha is wired up. Just help us make sure those less popular CPUs are not messed up by changes. Signed-off-by: Jiaxun Yang --- .github/workflows/dynamic_arch.yml | 50 ++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/.github/workflows/dynamic_arch.yml b/.github/workflows/dynamic_arch.yml index 153c63045..c34b0c462 100644 --- a/.github/workflows/dynamic_arch.yml +++ b/.github/workflows/dynamic_arch.yml @@ -257,3 +257,53 @@ jobs: - name: Run tests timeout-minutes: 60 run: cd build && ctest + + cross_build: + runs-on: ubuntu-22.04 + + strategy: + fail-fast: false + matrix: + include: + - target: mips64el + triple: mips64el-linux-gnuabi64 + opts: DYNAMIC_ARCH=1 + - target: riscv64 + triple: riscv64-linux-gnu + opts: TARGET=RISCV64_GENERIC + - target: mipsel + triple: mipsel-linux-gnu + opts: TARGET=MIPS1004K + - target: alpha + triple: alpha-linux-gnu + opts: TARGET=EV4 + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Install Dependencies + run: | + sudo apt-get install -y ccache gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-${{ matrix.target }}-cross + + - name: Compilation cache + uses: actions/cache@v3 + with: + path: ~/.ccache + key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} + restore-keys: | + ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} + ccache-${{ runner.os }}-${{ matrix.target }} + + - name: Configure ccache + run: | + # Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). + test -d ~/.ccache || mkdir -p ~/.ccache + echo "max_size = 300M" > ~/.ccache/ccache.conf + echo "compression = true" >> ~/.ccache/ccache.conf + ccache -s + + + - name: Build OpenBLAS + run: | + make -j$(nproc) HOSTCC="ccache gcc" CC="ccache ${{ matrix.triple }}-gcc" FC="ccache ${{ matrix.triple }}-gfortran" ARCH=${{ matrix.target }} ${{ matrix.opts }} From 99f82f1f81e86c5080ce9454323dfe2292d303fd Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Thu, 11 Aug 2022 14:46:29 +0100 Subject: [PATCH 06/19] alpha: Use mb for rmb Alpha never have a rmb instruction. Just use mb for rmb. Signed-off-by: Jiaxun Yang --- common_alpha.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common_alpha.h b/common_alpha.h index f1ea8ff94..021eb93ae 100644 --- a/common_alpha.h +++ b/common_alpha.h @@ -43,7 +43,7 @@ #define MB asm("mb") #define WMB asm("wmb") -#define RMB asm("rmb") +#define RMB asm("mb") static void __inline blas_lock(unsigned long *address){ #ifndef __DECC From fa14bdb26d45040e9a4aa5fb78de33b5f68c46cf Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Thu, 11 Aug 2022 14:47:39 +0100 Subject: [PATCH 07/19] Entitle missing declearation for alpha Signed-off-by: Jiaxun Yang --- common_macro.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common_macro.h b/common_macro.h index d2fa822c2..3226d0f11 100644 --- a/common_macro.h +++ b/common_macro.h @@ -2612,7 +2612,7 @@ #ifndef ASSEMBLER #if !defined(DYNAMIC_ARCH) \ && (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \ - || defined(ARCH_LOONGARCH64) || defined(ARCH_E2K)) + || defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) || defined(ARCH_ALPHA)) extern BLASLONG gemm_offset_a; extern BLASLONG gemm_offset_b; extern BLASLONG sbgemm_p; From 50c4eeb97dd12e8f7fd5e801ea29b42682778340 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Thu, 11 Aug 2022 14:49:36 +0100 Subject: [PATCH 08/19] alpha: Remove include of version.h It will be defined by preprocessor argument. Signed-off-by: Jiaxun Yang --- kernel/alpha/amax.S | 2 +- kernel/alpha/asum.S | 2 +- kernel/alpha/axpy.S | 2 +- kernel/alpha/cabs.S | 2 +- kernel/alpha/cnrm2.S | 2 +- kernel/alpha/copy.S | 2 +- kernel/alpha/cscal.S | 2 +- kernel/alpha/dnrm2.S | 2 +- kernel/alpha/dot.S | 2 +- kernel/alpha/gemm_beta.S | 2 +- kernel/alpha/gemm_kernel_4x4.S | 2 +- kernel/alpha/gemv_n.S | 2 +- kernel/alpha/gemv_t.S | 2 +- kernel/alpha/iamax.S | 2 +- kernel/alpha/imax.S | 2 +- kernel/alpha/izamax.S | 2 +- kernel/alpha/lsame.S | 2 +- kernel/alpha/max.S | 2 +- kernel/alpha/rot.S | 2 +- kernel/alpha/scal.S | 2 +- kernel/alpha/snrm2.S | 2 +- kernel/alpha/sum.S | 2 +- kernel/alpha/swap.S | 2 +- kernel/alpha/trsm_kernel_4x4_LN.S | 2 +- kernel/alpha/trsm_kernel_4x4_LT.S | 2 +- kernel/alpha/trsm_kernel_4x4_RT.S | 2 +- kernel/alpha/zamax.S | 2 +- kernel/alpha/zasum.S | 2 +- kernel/alpha/zaxpy.S | 2 +- kernel/alpha/zdot.S | 2 +- kernel/alpha/zgemm_beta.S | 2 +- kernel/alpha/zgemm_kernel_2x2.S | 2 +- kernel/alpha/zgemv_n.S | 2 +- kernel/alpha/zgemv_t.S | 2 +- kernel/alpha/znrm2.S | 2 +- kernel/alpha/zrot.S | 2 +- kernel/alpha/zscal.S | 2 +- kernel/alpha/zsum.S | 2 +- kernel/alpha/zswap.S | 2 +- kernel/alpha/ztrsm_kernel_2x2_LN.S | 2 +- kernel/alpha/ztrsm_kernel_2x2_LT.S | 2 +- kernel/alpha/ztrsm_kernel_2x2_RT.S | 2 +- 42 files changed, 42 insertions(+), 42 deletions(-) diff --git a/kernel/alpha/amax.S b/kernel/alpha/amax.S index e528adc07..88635e8ec 100644 --- a/kernel/alpha/amax.S +++ b/kernel/alpha/amax.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/asum.S b/kernel/alpha/asum.S index b312d064b..54725b5cc 100644 --- a/kernel/alpha/asum.S +++ b/kernel/alpha/asum.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/axpy.S b/kernel/alpha/axpy.S index 1007b063b..403b89df1 100644 --- a/kernel/alpha/axpy.S +++ b/kernel/alpha/axpy.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 40 diff --git a/kernel/alpha/cabs.S b/kernel/alpha/cabs.S index 5fa27af53..79b92836b 100644 --- a/kernel/alpha/cabs.S +++ b/kernel/alpha/cabs.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + .set noat .set noreorder diff --git a/kernel/alpha/cnrm2.S b/kernel/alpha/cnrm2.S index bd1ab8782..445eaa7ea 100644 --- a/kernel/alpha/cnrm2.S +++ b/kernel/alpha/cnrm2.S @@ -39,7 +39,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCH_SIZE 80 diff --git a/kernel/alpha/copy.S b/kernel/alpha/copy.S index 749039c9e..315a02b1e 100644 --- a/kernel/alpha/copy.S +++ b/kernel/alpha/copy.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/cscal.S b/kernel/alpha/cscal.S index bba3137a9..a09306a1c 100644 --- a/kernel/alpha/cscal.S +++ b/kernel/alpha/cscal.S @@ -42,7 +42,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + .globl NAME .ent NAME diff --git a/kernel/alpha/dnrm2.S b/kernel/alpha/dnrm2.S index 0dfb64924..c71a8e3c9 100644 --- a/kernel/alpha/dnrm2.S +++ b/kernel/alpha/dnrm2.S @@ -39,7 +39,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCH_SIZE 80 diff --git a/kernel/alpha/dot.S b/kernel/alpha/dot.S index 330196c78..fe84c719f 100644 --- a/kernel/alpha/dot.S +++ b/kernel/alpha/dot.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/gemm_beta.S b/kernel/alpha/gemm_beta.S index 44b2fada1..e234a3216 100644 --- a/kernel/alpha/gemm_beta.S +++ b/kernel/alpha/gemm_beta.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + .set noat .set noreorder diff --git a/kernel/alpha/gemm_kernel_4x4.S b/kernel/alpha/gemm_kernel_4x4.S index c55d817df..8fda1ab5a 100644 --- a/kernel/alpha/gemm_kernel_4x4.S +++ b/kernel/alpha/gemm_kernel_4x4.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/alpha/gemv_n.S b/kernel/alpha/gemv_n.S index 3e9d1d7fb..0fcd5b865 100644 --- a/kernel/alpha/gemv_n.S +++ b/kernel/alpha/gemv_n.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define STACKSIZE 64 #define PREFETCHSIZE 32 diff --git a/kernel/alpha/gemv_t.S b/kernel/alpha/gemv_t.S index ea95546e8..f9432486f 100644 --- a/kernel/alpha/gemv_t.S +++ b/kernel/alpha/gemv_t.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define STACKSIZE 64 #define PREFETCHSIZE 32 diff --git a/kernel/alpha/iamax.S b/kernel/alpha/iamax.S index 2be5d5d08..384df07e6 100644 --- a/kernel/alpha/iamax.S +++ b/kernel/alpha/iamax.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/imax.S b/kernel/alpha/imax.S index d8958c86a..785751075 100644 --- a/kernel/alpha/imax.S +++ b/kernel/alpha/imax.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/izamax.S b/kernel/alpha/izamax.S index c932581ae..d85b909e1 100644 --- a/kernel/alpha/izamax.S +++ b/kernel/alpha/izamax.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/lsame.S b/kernel/alpha/lsame.S index 082f79082..b1a7d5b70 100644 --- a/kernel/alpha/lsame.S +++ b/kernel/alpha/lsame.S @@ -36,7 +36,7 @@ /* or implied, of The University of Texas at Austin. */ /*********************************************************************/ -#include "version.h" + .set noat .set noreorder diff --git a/kernel/alpha/max.S b/kernel/alpha/max.S index af1b8fb85..935f27718 100644 --- a/kernel/alpha/max.S +++ b/kernel/alpha/max.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/rot.S b/kernel/alpha/rot.S index d1656d7e3..7a0991015 100644 --- a/kernel/alpha/rot.S +++ b/kernel/alpha/rot.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/scal.S b/kernel/alpha/scal.S index 2d95801c8..db6959520 100644 --- a/kernel/alpha/scal.S +++ b/kernel/alpha/scal.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/snrm2.S b/kernel/alpha/snrm2.S index 0dfb64924..c71a8e3c9 100644 --- a/kernel/alpha/snrm2.S +++ b/kernel/alpha/snrm2.S @@ -39,7 +39,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCH_SIZE 80 diff --git a/kernel/alpha/sum.S b/kernel/alpha/sum.S index 3902817a7..adc4ca5a1 100644 --- a/kernel/alpha/sum.S +++ b/kernel/alpha/sum.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/swap.S b/kernel/alpha/swap.S index 9e21990c4..34e58a72a 100644 --- a/kernel/alpha/swap.S +++ b/kernel/alpha/swap.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + PROLOGUE PROFCODE diff --git a/kernel/alpha/trsm_kernel_4x4_LN.S b/kernel/alpha/trsm_kernel_4x4_LN.S index 600b4e255..be5062244 100644 --- a/kernel/alpha/trsm_kernel_4x4_LN.S +++ b/kernel/alpha/trsm_kernel_4x4_LN.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/alpha/trsm_kernel_4x4_LT.S b/kernel/alpha/trsm_kernel_4x4_LT.S index 81436d034..dfc7e98aa 100644 --- a/kernel/alpha/trsm_kernel_4x4_LT.S +++ b/kernel/alpha/trsm_kernel_4x4_LT.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/alpha/trsm_kernel_4x4_RT.S b/kernel/alpha/trsm_kernel_4x4_RT.S index 71d6c43fa..d77ccc61b 100644 --- a/kernel/alpha/trsm_kernel_4x4_RT.S +++ b/kernel/alpha/trsm_kernel_4x4_RT.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/alpha/zamax.S b/kernel/alpha/zamax.S index f1ea18d2d..96502a7a9 100644 --- a/kernel/alpha/zamax.S +++ b/kernel/alpha/zamax.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/zasum.S b/kernel/alpha/zasum.S index 67ed78584..37a1c234a 100644 --- a/kernel/alpha/zasum.S +++ b/kernel/alpha/zasum.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/zaxpy.S b/kernel/alpha/zaxpy.S index 1416769a1..1494c7fc0 100644 --- a/kernel/alpha/zaxpy.S +++ b/kernel/alpha/zaxpy.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 40 diff --git a/kernel/alpha/zdot.S b/kernel/alpha/zdot.S index 78dcae668..724526407 100644 --- a/kernel/alpha/zdot.S +++ b/kernel/alpha/zdot.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/zgemm_beta.S b/kernel/alpha/zgemm_beta.S index f7ca347f1..fcabe48d0 100644 --- a/kernel/alpha/zgemm_beta.S +++ b/kernel/alpha/zgemm_beta.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + .set noat .set noreorder diff --git a/kernel/alpha/zgemm_kernel_2x2.S b/kernel/alpha/zgemm_kernel_2x2.S index 67ba6d108..e56a3e10d 100644 --- a/kernel/alpha/zgemm_kernel_2x2.S +++ b/kernel/alpha/zgemm_kernel_2x2.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/alpha/zgemv_n.S b/kernel/alpha/zgemv_n.S index fd602a3eb..2ebb918d5 100644 --- a/kernel/alpha/zgemv_n.S +++ b/kernel/alpha/zgemv_n.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define STACKSIZE 64 #define PREFETCHSIZE 32 diff --git a/kernel/alpha/zgemv_t.S b/kernel/alpha/zgemv_t.S index bac56eb3f..96d8caa27 100644 --- a/kernel/alpha/zgemv_t.S +++ b/kernel/alpha/zgemv_t.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define STACKSIZE 64 #define PREFETCHSIZE 32 diff --git a/kernel/alpha/znrm2.S b/kernel/alpha/znrm2.S index bd1ab8782..445eaa7ea 100644 --- a/kernel/alpha/znrm2.S +++ b/kernel/alpha/znrm2.S @@ -39,7 +39,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCH_SIZE 80 diff --git a/kernel/alpha/zrot.S b/kernel/alpha/zrot.S index afcdf12b4..61fe4f3d9 100644 --- a/kernel/alpha/zrot.S +++ b/kernel/alpha/zrot.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/zscal.S b/kernel/alpha/zscal.S index 1a2ac10b3..bed3033f8 100644 --- a/kernel/alpha/zscal.S +++ b/kernel/alpha/zscal.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/zsum.S b/kernel/alpha/zsum.S index 1ad0eb137..5c51bbc6f 100644 --- a/kernel/alpha/zsum.S +++ b/kernel/alpha/zsum.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/zswap.S b/kernel/alpha/zswap.S index a12a2c7a7..02be94115 100644 --- a/kernel/alpha/zswap.S +++ b/kernel/alpha/zswap.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + PROLOGUE PROFCODE diff --git a/kernel/alpha/ztrsm_kernel_2x2_LN.S b/kernel/alpha/ztrsm_kernel_2x2_LN.S index dcbe4e236..44d46daa7 100644 --- a/kernel/alpha/ztrsm_kernel_2x2_LN.S +++ b/kernel/alpha/ztrsm_kernel_2x2_LN.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/alpha/ztrsm_kernel_2x2_LT.S b/kernel/alpha/ztrsm_kernel_2x2_LT.S index e0c82026e..f17987faf 100644 --- a/kernel/alpha/ztrsm_kernel_2x2_LT.S +++ b/kernel/alpha/ztrsm_kernel_2x2_LT.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/alpha/ztrsm_kernel_2x2_RT.S b/kernel/alpha/ztrsm_kernel_2x2_RT.S index e890f599d..90b56c954 100644 --- a/kernel/alpha/ztrsm_kernel_2x2_RT.S +++ b/kernel/alpha/ztrsm_kernel_2x2_RT.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." From a03ed065e1d693642732f9135cdd9cbe70b8cb1b Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Thu, 11 Aug 2022 14:51:57 +0100 Subject: [PATCH 09/19] Wire up alpha in new build system Signed-off-by: Jiaxun Yang --- Makefile.alpha | 42 ++++++++++++------------------------------ cpuid_alpha.c | 5 +++++ getarch.c | 41 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 31 deletions(-) diff --git a/Makefile.alpha b/Makefile.alpha index bd4f4d58b..97e4d757e 100644 --- a/Makefile.alpha +++ b/Makefile.alpha @@ -1,42 +1,24 @@ -CPP = $(CC) -E -RANLIB = ranlib - -ifeq ($(LIBSUBARCH), EV4) -LIBNAME = $(LIBPREFIX)_ev4.a -LIBNAME_P = $(LIBPREFIX)_ev4_p.a -endif - -ifeq ($(LIBSUBARCH), EV5) -LIBNAME = $(LIBPREFIX)_ev5.a -LIBNAME_P = $(LIBPREFIX)_ev5_p.a -endif - -ifeq ($(LIBSUBARCH), EV6) -LIBNAME = $(LIBPREFIX)_ev6.a -LIBNAME_P = $(LIBPREFIX)_ev6_p.a -endif - ifneq ($(COMPILER), NATIVE) # GCC User -ifeq ($(LIBSUBARCH), EV4) -OPTION += -DEV4 -mcpu=ev4 +ifeq ($(CORE), EV4) +CCOMMON_OPT += -mcpu=ev4 endif -ifeq ($(LIBSUBARCH), EV5) -OPTION += -DEV5 -mcpu=ev5 +ifeq ($(CORE), EV5) +CCOMMON_OPT += -mcpu=ev5 endif -ifeq ($(LIBSUBARCH), EV6) -OPTION += -DEV6 -mcpu=ev6 +ifeq ($(CORE), EV6) +CCOMMON_OPT += -mcpu=ev6 endif else # Compaq Compiler User -ifeq ($(LIBSUBARCH), EV4) -OPTION += -DEV4 -tune ev4 -arch ev4 +ifeq ($(CORE), EV4) +CCOMMON_OPT += -tune ev4 -arch ev4 endif -ifeq ($(LIBSUBARCH), EV5) -OPTION += -DEV5 -tune ev5 -arch ev5 +ifeq ($(CORE), EV5) +CCOMMON_OPT += -tune ev5 -arch ev5 endif -ifeq ($(LIBSUBARCH), EV6) -OPTION += -DEV6 -tune ev6 -arch ev6 +ifeq ($(CORE), EV6) +CCOMMON_OPT += -tune ev6 -arch ev6 endif endif diff --git a/cpuid_alpha.c b/cpuid_alpha.c index 58dccdefc..e0e019af2 100644 --- a/cpuid_alpha.c +++ b/cpuid_alpha.c @@ -59,6 +59,11 @@ void get_subarchitecture(void){ printf("ev%d", implver() + 4); } + +void get_corename(void){ + printf("EV%d", implver() + 4); +} + void get_subdirname(void){ printf("alpha"); } diff --git a/getarch.c b/getarch.c index 7761551ea..403f3ef7b 100644 --- a/getarch.c +++ b/getarch.c @@ -146,6 +146,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* #define FORCE_SPARCV7 */ /* #define FORCE_ZARCH_GENERIC */ /* #define FORCE_Z13 */ +/* #define FORCE_EV4 */ +/* #define FORCE_EV5 */ +/* #define FORCE_EV6 */ /* #define FORCE_GENERIC */ #ifdef FORCE_P2 @@ -1601,6 +1604,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "Z14" #endif +#ifdef FORCE_EV4 +#define FORCE +#define ARCHITECTURE "ALPHA" +#define SUBARCHITECTURE "ev4" +#define ARCHCONFIG "-DEV4 " \ + "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=2097152 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=8192 " +#define LIBNAME "ev4" +#define CORENAME "EV4" +#endif + +#ifdef FORCE_EV5 +#define FORCE +#define ARCHITECTURE "ALPHA" +#define SUBARCHITECTURE "ev5" +#define ARCHCONFIG "-DEV5 " \ + "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=2097152 -DL2_LINESIZE=64 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=8192 " +#define LIBNAME "ev5" +#define CORENAME "EV5" +#endif + +#ifdef FORCE_EV6 +#define FORCE +#define ARCHITECTURE "ALPHA" +#define SUBARCHITECTURE "ev6" +#define ARCHCONFIG "-DEV6 " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ + "-DL2_SIZE=4194304 -DL2_LINESIZE=64 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=8192 " +#define LIBNAME "ev6" +#define CORENAME "EV6" +#endif + #ifdef FORCE_C910V #define FORCE #define ARCHITECTURE "RISCV64" @@ -1777,7 +1816,7 @@ int main(int argc, char *argv[]){ #ifdef FORCE printf("CORE=%s\n", CORENAME); #else -#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) +#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__alpha__) printf("CORE=%s\n", get_corename()); #endif #endif From 407af4b6aacfe070ac3ed7a72b293363cc711e8c Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Thu, 11 Aug 2022 14:52:43 +0100 Subject: [PATCH 10/19] Document alpha targets Signed-off-by: Jiaxun Yang --- TargetList.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/TargetList.txt b/TargetList.txt index d17caf480..6274c4a74 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -128,3 +128,7 @@ LOONGSON2K1000 12. Elbrus E2000: E2K +13. Alpha +EV4 +EV5 +EV6 From 704a024df48dedc5d69ab585081bbb54fa5c46cd Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Aug 2022 16:37:23 +0200 Subject: [PATCH 11/19] Fix C99-style declaration of loop variable --- lapack-netlib/SRC/iparam2stage.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lapack-netlib/SRC/iparam2stage.c b/lapack-netlib/SRC/iparam2stage.c index 1b6c4c8b8..7ba938dcd 100644 --- a/lapack-netlib/SRC/iparam2stage.c +++ b/lapack-netlib/SRC/iparam2stage.c @@ -717,11 +717,12 @@ integer iparam2stage_(integer *ispec, char *name__, char *opts, integer *ni, ret_val = -1; // s_copy(subnam, name__, (ftnlen)12, name_len); -strncpy(subnam,name__,13); -subnam[13]='\0'; -for (int i=0;i<13;i++) subnam[i]=toupper(subnam[i]); - //fprintf(stderr,"iparam2stage, name__ gelesen #%s#\n",name__); -//fprintf(stderr,"iparam2stage, subnam gelesen #%s#\n",subnam); + strncpy(subnam,name__,13); + subnam[13]='\0'; + { + int i; + for (i=0;i<13;i++) subnam[i]=toupper(subnam[i]); + } #if 0 From 3fc06b6e7e5febed4c41cb6d2109935828acbbe9 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Aug 2022 16:42:02 +0200 Subject: [PATCH 12/19] Double the wait time for ppc jobs in Travis CI --- .travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 531377456..a4edad726 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,7 +30,7 @@ matrix: before_script: &common-before - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32" script: - - travis_wait 20 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE + - travis_wait 40 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE - make -C test $COMMON_FLAGS $BTYPE - make -C ctest $COMMON_FLAGS $BTYPE - make -C utest $COMMON_FLAGS $BTYPE @@ -104,7 +104,7 @@ matrix: - sudo apt-get update - sudo apt-get install gcc-9 gfortran-9 -y script: - - travis_wait 20 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9 + - travis_wait 40 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9 - make -C test $COMMON_FLAGS $BTYPE - make -C ctest $COMMON_FLAGS $BTYPE - make -C utest $COMMON_FLAGS $BTYPE @@ -121,7 +121,7 @@ matrix: - sudo apt-get update - sudo apt-get install gcc-9 gfortran-9 -y script: - - travis_wait 20 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9 + - travis_wait 40 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9 - make -C test $COMMON_FLAGS $BTYPE - make -C ctest $COMMON_FLAGS $BTYPE - make -C utest $COMMON_FLAGS $BTYPE From a50b29c540c25baf8f788131cc905ebe0575f253 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Sun, 31 Jul 2022 19:15:57 +0100 Subject: [PATCH 13/19] Provide a fallback MIPS64_GENERIC target It is really dangerous to fallback to Loongson core on other MIPS64 processors. Signed-off-by: Jiaxun Yang --- Makefile.system | 7 +++++- TargetList.txt | 1 + cpuid_mips64.c | 31 ++++++++++++++++---------- driver/others/dynamic_mips64.c | 18 +++++++++------ getarch.c | 15 +++++++++++++ kernel/mips64/KERNEL | 40 ++++++++++++++++++++-------------- param.h | 2 +- 7 files changed, 77 insertions(+), 37 deletions(-) diff --git a/Makefile.system b/Makefile.system index 3be5efa0c..526be236d 100644 --- a/Makefile.system +++ b/Makefile.system @@ -677,7 +677,7 @@ endif endif ifeq ($(ARCH), mips64) -DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4 +DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4 MIPS64_GENERIC endif ifeq ($(ARCH), loongarch64) @@ -856,6 +856,11 @@ CCOMMON_OPT += -mabi=32 BINARY_DEFINED = 1 endif +ifneq (, $(filter $(CORE), MIPS64_GENERIC)) +CCOMMON_OPT += -DNO_MSA +FCOMMON_OPT += -DNO_MSA +endif + ifneq (, $(filter $(CORE),LOONGSON3R3 LOONGSON3R4)) CCOMMON_OPT += -march=loongson3a FCOMMON_OPT += -march=loongson3a diff --git a/TargetList.txt b/TargetList.txt index d17caf480..d81cc29fa 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -65,6 +65,7 @@ MIPS1004K MIPS24K 4.MIPS64 CPU: +MIPS64_GENERIC SICORTEX LOONGSON3A LOONGSON3B diff --git a/cpuid_mips64.c b/cpuid_mips64.c index 8753ee3f0..e5f68b5a1 100644 --- a/cpuid_mips64.c +++ b/cpuid_mips64.c @@ -70,16 +70,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* or implied, of The University of Texas at Austin. */ /*********************************************************************/ -#define CPU_UNKNOWN 0 -#define CPU_SICORTEX 1 -#define CPU_LOONGSON3R3 2 -#define CPU_LOONGSON3R4 3 -#define CPU_I6400 4 -#define CPU_P6600 5 -#define CPU_I6500 6 +#define CPU_UNKNOWN 0 +#define CPU_MIPS64_GENERIC 1 +#define CPU_SICORTEX 2 +#define CPU_LOONGSON3R3 3 +#define CPU_LOONGSON3R4 4 +#define CPU_I6400 5 +#define CPU_P6600 6 +#define CPU_I6500 7 static char *cpuname[] = { "UNKNOWN", + "MIPS64_GENERIC" "SICORTEX", "LOONGSON3R3", "LOONGSON3R4", @@ -113,8 +115,11 @@ int detect(void){ return CPU_SICORTEX; } } + + return CPU_MIPS64_GENERIC; +#else + return CPU_UNKNOWN; #endif - return CPU_UNKNOWN; } char *get_corename(void){ @@ -136,9 +141,11 @@ void get_subarchitecture(void){ printf("P6600"); }else if(detect()==CPU_I6500){ printf("I6500"); - }else{ + }else if(detect()==CPU_SICORTEX){ printf("SICORTEX"); - } + }else{ + printf("MIPS64_GENERIC"); + } } void get_subdirname(void){ @@ -215,8 +222,8 @@ void get_libname(void){ printf("p6600\n"); }else if(detect()==CPU_I6500) { printf("i6500\n"); - }else{ - printf("mips64\n"); + }else { + printf("mips64_generic\n"); } } diff --git a/driver/others/dynamic_mips64.c b/driver/others/dynamic_mips64.c index d80e81356..7a1d5228c 100644 --- a/driver/others/dynamic_mips64.c +++ b/driver/others/dynamic_mips64.c @@ -49,20 +49,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. extern gotoblas_t gotoblas_LOONGSON3R3; extern gotoblas_t gotoblas_LOONGSON3R4; +extern gotoblas_t gotoblas_MIPS64_GENERIC; extern void openblas_warning(int verbose, const char * msg); -#define NUM_CORETYPES 2 +#define NUM_CORETYPES 3 static char *corename[] = { + "MIPS64_GENERIC" "loongson3r3", "loongson3r4", "UNKNOWN" }; char *gotoblas_corename(void) { - if (gotoblas == &gotoblas_LOONGSON3R3) return corename[0]; - if (gotoblas == &gotoblas_LOONGSON3R4) return corename[1]; + if (gotoblas == &gotoblas_MIPS64_GENERIC) return corename[0]; + if (gotoblas == &gotoblas_LOONGSON3R3) return corename[1]; + if (gotoblas == &gotoblas_LOONGSON3R4) return corename[2]; return corename[NUM_CORETYPES]; } @@ -82,8 +85,9 @@ static gotoblas_t *force_coretype(char *coretype) { switch (found) { - case 0: return (&gotoblas_LOONGSON3R3); - case 1: return (&gotoblas_LOONGSON3R4); + case 0: return (&gotoblas_MIPS64_GENERIC); + case 1: return (&gotoblas_LOONGSON3R3); + case 2: return (&gotoblas_LOONGSON3R4); } snprintf(message, 128, "Core not found: %s\n", coretype); openblas_warning(1, message); @@ -173,9 +177,9 @@ void gotoblas_dynamic_init(void) { if (gotoblas == NULL) { - snprintf(coremsg, 128, "Falling back to loongson3r3 core\n"); + snprintf(coremsg, 128, "Falling back to MIPS64_GENEIRC\n"); openblas_warning(1, coremsg); - gotoblas = &gotoblas_LOONGSON3R3; + gotoblas = &gotoblas_MIPS64_GENERIC; } if (gotoblas && gotoblas->init) { diff --git a/getarch.c b/getarch.c index 7761551ea..a76c89185 100644 --- a/getarch.c +++ b/getarch.c @@ -131,6 +131,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* #define FORCE_PPC440 */ /* #define FORCE_PPC440FP2 */ /* #define FORCE_CELL */ +/* #define FORCE_MIPS64_GENERIC */ /* #define FORCE_SICORTEX */ /* #define FORCE_LOONGSON3R3 */ /* #define FORCE_LOONGSON3R4 */ @@ -915,6 +916,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "CELL" #endif +#ifdef FORCE_MIPS64_GENERIC +#define FORCE +#define ARCHITECTURE "MIPS" +#define SUBARCHITECTURE "MIPS64_GENERIC" +#define SUBDIRNAME "mips64" +#define ARCHCONFIG "-DMIPS64_GENERIC " \ + "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " +#define LIBNAME "mips64_generic" +#define CORENAME "MIPS64_GENERIC" +#else +#endif + #ifdef FORCE_SICORTEX #define FORCE #define ARCHITECTURE "MIPS" diff --git a/kernel/mips64/KERNEL b/kernel/mips64/KERNEL index 97ef3692c..54939a9ef 100644 --- a/kernel/mips64/KERNEL +++ b/kernel/mips64/KERNEL @@ -42,50 +42,58 @@ endif ifndef SGEMMKERNEL SGEMMKERNEL = gemm_kernel.S +ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) SGEMMINCOPY = ../generic/gemm_ncopy_2.c SGEMMITCOPY = ../generic/gemm_tcopy_2.c +SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) +SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif SGEMMONCOPY = ../generic/gemm_ncopy_8.c SGEMMOTCOPY = ../generic/gemm_tcopy_8.c -SGEMMINCOPYOBJ = sgemm_incopy.o -SGEMMITCOPYOBJ = sgemm_itcopy.o -SGEMMONCOPYOBJ = sgemm_oncopy.o -SGEMMOTCOPYOBJ = sgemm_otcopy.o +SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) +SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) endif ifndef DGEMMKERNEL DGEMMKERNEL = gemm_kernel.S +ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) DGEMMINCOPY = ../generic/gemm_ncopy_2.c DGEMMITCOPY = ../generic/gemm_tcopy_2.c +DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) +DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif DGEMMONCOPY = ../generic/gemm_ncopy_8.c DGEMMOTCOPY = ../generic/gemm_tcopy_8.c -DGEMMINCOPYOBJ = dgemm_incopy.o -DGEMMITCOPYOBJ = dgemm_itcopy.o -DGEMMONCOPYOBJ = dgemm_oncopy.o -DGEMMOTCOPYOBJ = dgemm_otcopy.o +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) endif ifndef CGEMMKERNEL CGEMMKERNEL = zgemm_kernel.S +ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) CGEMMINCOPY = ../generic/zgemm_ncopy_1.c CGEMMITCOPY = ../generic/zgemm_tcopy_1.c +CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) +CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif CGEMMONCOPY = ../generic/zgemm_ncopy_4.c CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c -CGEMMINCOPYOBJ = cgemm_incopy.o -CGEMMITCOPYOBJ = cgemm_itcopy.o -CGEMMONCOPYOBJ = cgemm_oncopy.o -CGEMMOTCOPYOBJ = cgemm_otcopy.o +CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) +CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) endif ifndef ZGEMMKERNEL ZGEMMKERNEL = zgemm_kernel.S +ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) ZGEMMINCOPY = ../generic/zgemm_ncopy_1.c ZGEMMITCOPY = ../generic/zgemm_tcopy_1.c +ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) +ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c -ZGEMMINCOPYOBJ = zgemm_incopy.o -ZGEMMITCOPYOBJ = zgemm_itcopy.o -ZGEMMONCOPYOBJ = zgemm_oncopy.o -ZGEMMOTCOPYOBJ = zgemm_otcopy.o +ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) +ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) endif ifndef SGEMM_BETA diff --git a/param.h b/param.h index dc02147d8..b9b9a55e8 100644 --- a/param.h +++ b/param.h @@ -2945,7 +2945,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif -#if defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500) +#if defined(MIPS64_GENERIC) || defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500) #define SNUMOPT 2 #define DNUMOPT 2 From fae9368f14c78d6cb2679e04a5fab9f2691609a1 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Sat, 6 Aug 2022 10:13:36 +0100 Subject: [PATCH 14/19] Implement DYNAMIC_LIST for MIPS64 Signed-off-by: Jiaxun Yang --- Makefile.system | 5 +++++ driver/others/dynamic_mips64.c | 14 ++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/Makefile.system b/Makefile.system index 526be236d..fa952b2f4 100644 --- a/Makefile.system +++ b/Makefile.system @@ -678,6 +678,11 @@ endif ifeq ($(ARCH), mips64) DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4 MIPS64_GENERIC +ifdef DYNAMIC_LIST +override DYNAMIC_CORE = MIPS64_GENERIC $(DYNAMIC_LIST) +XCCOMMON_OPT = -DDYNAMIC_LIST -DDYN_MIPS64_GENERIC +XCCOMMON_OPT += $(foreach dcore,$(DYNAMIC_LIST),-DDYN_$(dcore)) +endif endif ifeq ($(ARCH), loongarch64) diff --git a/driver/others/dynamic_mips64.c b/driver/others/dynamic_mips64.c index 7a1d5228c..7fc347b0c 100644 --- a/driver/others/dynamic_mips64.c +++ b/driver/others/dynamic_mips64.c @@ -47,9 +47,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #endif +#ifdef DYNAMIC_LIST +extern gotoblas_t gotoblas_MIPS64_GENERIC; +#ifdef DYN_LOONGSON3R3 +extern gotoblas_t gotoblas_LOONGSON3R3; +#else +#define gotoblas_LOONGSON3R3 gotoblas_MIPS64_GENERIC +#endif +#ifdef DYN_LOONGSON3R4 +extern gotoblas_t gotoblas_LOONGSON3R4; +#else +#define gotoblas_LOONGSON3R4 gotoblas_MIPS64_GENERIC +#endif +#else extern gotoblas_t gotoblas_LOONGSON3R3; extern gotoblas_t gotoblas_LOONGSON3R4; extern gotoblas_t gotoblas_MIPS64_GENERIC; +#endif extern void openblas_warning(int verbose, const char * msg); From 4197c354fa1a8c279002f936befa926b1876b123 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Sat, 6 Aug 2022 10:27:59 +0100 Subject: [PATCH 15/19] Set proper assembler arch for MIPS64 Signed-off-by: Jiaxun Yang --- common_mips64.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/common_mips64.h b/common_mips64.h index 287459e7d..006cf33e4 100644 --- a/common_mips64.h +++ b/common_mips64.h @@ -86,7 +86,9 @@ static inline unsigned int rpcc(void){ //__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory"); //ret=tmp; __asm__ __volatile__(".set push \n" +#if !defined(__mips_isa_rev) || __mips_isa_rev < 2 ".set mips32r2\n" +#endif "rdhwr %0, $2\n" ".set pop": "=r"(ret):: "memory"); @@ -99,7 +101,9 @@ static inline unsigned int rpcc(void){ static inline int WhereAmI(void){ int ret=0; __asm__ __volatile__(".set push \n" +#if !defined(__mips_isa_rev) || __mips_isa_rev < 2 ".set mips32r2\n" +#endif "rdhwr %0, $0\n" ".set pop": "=r"(ret):: "memory"); return ret; @@ -197,9 +201,15 @@ static inline int blas_quickdivide(blasint x, blasint y){ #if defined(ASSEMBLER) && !defined(NEEDPARAM) +#if defined(__mips_isa_rev) && __mips_isa_rev >= 6 +#define ASSEMBLER_ARCH mips64r6 +#else +#define ASSEMBLER_ARCH mips64 +#endif + #define PROLOGUE \ .text ;\ - .set mips64 ;\ + .set ASSEMBLER_ARCH ;\ .align 5 ;\ .globl REALNAME ;\ .ent REALNAME ;\ From f703846ad9400a8ea175cb8dd43e18c152aeab93 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 13 Aug 2022 11:38:27 +0200 Subject: [PATCH 16/19] Add function prototypes --- exports/gensymbol | 16 ++++++++++++++++ exports/gensymbol.pl | 12 ++++++++++++ 2 files changed, 28 insertions(+) diff --git a/exports/gensymbol b/exports/gensymbol index 83222a215..f05de626f 100755 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -4000,6 +4000,22 @@ case "$p1" in no_underscore_objs="$no_underscore_objs $misc_common_objs" printf 'int main(void){\n' + for obj in $underscore_objs; do + [ "$obj" != "xerbla" ] && printf 'extern void %s%s%s%s();\n' \ + "$symbolprefix" "$obj" "$bu" "$symbolsuffix" + done + + for obj in $need_2underscore_objs; do + printf 'extern void %s%s%s%s%s();\n' \ + "$symbolprefix" "$obj" "$bu" "$bu" "$symbolsuffix" + done + + for obj in $no_underscore_objs; do + printf 'extern void %s%s%s();\n' \ + "$symbolprefix" "$obj" "$symbolsuffix" + done + + printf '\n' for obj in $underscore_objs; do [ "$obj" != "xerbla" ] && printf '%s%s%s%s();\n' \ "$symbolprefix" "$obj" "$bu" "$symbolsuffix" diff --git a/exports/gensymbol.pl b/exports/gensymbol.pl index ac62bc058..e38a3cc89 100644 --- a/exports/gensymbol.pl +++ b/exports/gensymbol.pl @@ -3955,6 +3955,18 @@ if ($ARGV[0] eq "linktest") { @no_underscore_objs = (@no_underscore_objs, @misc_common_objs); print "int main(void){\n"; + foreach $objs (@underscore_objs) { + print "extern void ", $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla"; + } + + foreach $objs (@need_2underscore_objs) { + print "extern void ", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "();\n"; + } + + foreach $objs (@no_underscore_objs) { + print "extern void ", $symbolprefix, $objs, $symbolsuffix, "();\n"; + } + foreach $objs (@underscore_objs) { print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla"; } From f2cd238450b45f2ac0cd92eb1a8a951b8b7522b4 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 13 Aug 2022 20:03:16 +0200 Subject: [PATCH 17/19] Restore conditional compilation and fix for parallel make in LAPACK MATGEN (#3733) * Restore NOPARALLEL and conditional compilation --- lapack-netlib/TESTING/MATGEN/Makefile | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/lapack-netlib/TESTING/MATGEN/Makefile b/lapack-netlib/TESTING/MATGEN/Makefile index 62a215b58..822a1eee0 100644 --- a/lapack-netlib/TESTING/MATGEN/Makefile +++ b/lapack-netlib/TESTING/MATGEN/Makefile @@ -40,27 +40,40 @@ ifneq ($(C_LAPACK), 1) $(FC) $(FFLAGS) -c -o $@ $< endif +ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" "" SCATGEN = slatm1.o slatm7.o slaran.o slarnd.o +endif +ifeq ($(BUILD_SINGLE),1) SMATGEN = slatms.o slatme.o slatmr.o slatmt.o \ slagge.o slagsy.o slakf2.o slarge.o slaror.o slarot.o slatm2.o \ slatm3.o slatm5.o slatm6.o slahilb.o +endif +ifeq ($(BUILD_COMPLEX),1) CMATGEN = clatms.o clatme.o clatmr.o clatmt.o \ clagge.o claghe.o clagsy.o clakf2.o clarge.o claror.o clarot.o \ clatm1.o clarnd.o clatm2.o clatm3.o clatm5.o clatm6.o clahilb.o +endif +ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" DZATGEN = dlatm1.o dlatm7.o dlaran.o dlarnd.o +endif +ifeq ($(BUILD_DOUBLE),1) DMATGEN = dlatms.o dlatme.o dlatmr.o dlatmt.o \ dlagge.o dlagsy.o dlakf2.o dlarge.o dlaror.o dlarot.o dlatm2.o \ dlatm3.o dlatm5.o dlatm6.o dlahilb.o +endif +ifeq ($(BUILD_COMPLEX16),1) ZMATGEN = zlatms.o zlatme.o zlatmr.o zlatmt.o \ zlagge.o zlaghe.o zlagsy.o zlakf2.o zlarge.o zlaror.o zlarot.o \ zlatm1.o zlarnd.o zlatm2.o zlatm3.o zlatm5.o zlatm6.o zlahilb.o +endif .PHONY: all +.NOTPARALLEL: all: $(TMGLIB) ALLOBJ = $(SMATGEN) $(CMATGEN) $(SCATGEN) $(DMATGEN) $(ZMATGEN) \ @@ -107,9 +120,17 @@ cleanlib: rm -f $(TMGLIB) ifneq ($(C_LAPACK), 1) +ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),) slaran.o: slaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< +endif +ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),) dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< +endif else +ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),) slaran.o: slaran.c ; $(CC) $(CFLAGS) -O0 -c -o $@ $< +endif +ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),) dlaran.o: dlaran.c ; $(CC) $(CFLAGS) -O0 -c -o $@ $< endif +endif From 84a5f0e2eb84bd02a09e00d30d888f162a49e84b Mon Sep 17 00:00:00 2001 From: Pablo Romero Date: Fri, 26 Aug 2022 11:44:11 +0200 Subject: [PATCH 18/19] Fixes #3743. --- common.h | 4 ++-- ctest/CMakeLists.txt | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/common.h b/common.h index 00d1d0baf..e6002d322 100644 --- a/common.h +++ b/common.h @@ -90,7 +90,7 @@ extern "C" { #endif #include -#ifdef OS_LINUX +#if defined(OS_LINUX) || defined(OS_QNX) #include #include #endif @@ -107,7 +107,7 @@ extern "C" { #endif #endif -#ifdef OS_HAIKU +#if defined(OS_HAIKU) || defined(OS_QNX) #define NO_SYSV_IPC #endif diff --git a/ctest/CMakeLists.txt b/ctest/CMakeLists.txt index e779fb168..91338b73b 100644 --- a/ctest/CMakeLists.txt +++ b/ctest/CMakeLists.txt @@ -40,7 +40,7 @@ else() c_${float_char}blas1.c) endif() target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}) - if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") + if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX") target_link_libraries(x${float_char}cblat1 m) endif() add_test(NAME "x${float_char}cblat1" @@ -65,7 +65,7 @@ else() constant.c) endif() target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}) - if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") + if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX") target_link_libraries(x${float_char}cblat2 m) endif() add_test(NAME "x${float_char}cblat2" @@ -90,7 +90,7 @@ else() constant.c) endif() target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}) - if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") + if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX") target_link_libraries(x${float_char}cblat3 m) endif() add_test(NAME "x${float_char}cblat3" From 1b1f781cf986376cb28020d6e5dab9c35b40919e Mon Sep 17 00:00:00 2001 From: Pablo Romero Date: Fri, 26 Aug 2022 11:45:23 +0200 Subject: [PATCH 19/19] Added name and details to contributors' list. --- CONTRIBUTORS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 1714d90c8..f5e9dda91 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -211,4 +211,5 @@ In chronological order: * PLCT Lab, Institute of Software Chinese Academy of Sciences * [2022-03] Support RISC-V Vector Intrinisc 1.0 version. - \ No newline at end of file +* Pablo Romero + * [2022-08] Fix building from sources for QNX \ No newline at end of file