diff --git a/.github/workflows/dynamic_arch.yml b/.github/workflows/dynamic_arch.yml index 153c63045..c34b0c462 100644 --- a/.github/workflows/dynamic_arch.yml +++ b/.github/workflows/dynamic_arch.yml @@ -257,3 +257,53 @@ jobs: - name: Run tests timeout-minutes: 60 run: cd build && ctest + + cross_build: + runs-on: ubuntu-22.04 + + strategy: + fail-fast: false + matrix: + include: + - target: mips64el + triple: mips64el-linux-gnuabi64 + opts: DYNAMIC_ARCH=1 + - target: riscv64 + triple: riscv64-linux-gnu + opts: TARGET=RISCV64_GENERIC + - target: mipsel + triple: mipsel-linux-gnu + opts: TARGET=MIPS1004K + - target: alpha + triple: alpha-linux-gnu + opts: TARGET=EV4 + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Install Dependencies + run: | + sudo apt-get install -y ccache gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-${{ matrix.target }}-cross + + - name: Compilation cache + uses: actions/cache@v3 + with: + path: ~/.ccache + key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} + restore-keys: | + ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} + ccache-${{ runner.os }}-${{ matrix.target }} + + - name: Configure ccache + run: | + # Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). + test -d ~/.ccache || mkdir -p ~/.ccache + echo "max_size = 300M" > ~/.ccache/ccache.conf + echo "compression = true" >> ~/.ccache/ccache.conf + ccache -s + + + - name: Build OpenBLAS + run: | + make -j$(nproc) HOSTCC="ccache gcc" CC="ccache ${{ matrix.triple }}-gcc" FC="ccache ${{ matrix.triple }}-gfortran" ARCH=${{ matrix.target }} ${{ matrix.opts }} diff --git a/.travis.yml b/.travis.yml index 531377456..a4edad726 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,7 +30,7 @@ matrix: before_script: &common-before - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32" script: - - travis_wait 20 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE + - travis_wait 40 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE - make -C test $COMMON_FLAGS $BTYPE - make -C ctest $COMMON_FLAGS $BTYPE - make -C utest $COMMON_FLAGS $BTYPE @@ -104,7 +104,7 @@ matrix: - sudo apt-get update - sudo apt-get install gcc-9 gfortran-9 -y script: - - travis_wait 20 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9 + - travis_wait 40 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9 - make -C test $COMMON_FLAGS $BTYPE - make -C ctest $COMMON_FLAGS $BTYPE - make -C utest $COMMON_FLAGS $BTYPE @@ -121,7 +121,7 @@ matrix: - sudo apt-get update - sudo apt-get install gcc-9 gfortran-9 -y script: - - travis_wait 20 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9 + - travis_wait 40 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9 - make -C test $COMMON_FLAGS $BTYPE - make -C ctest $COMMON_FLAGS $BTYPE - make -C utest $COMMON_FLAGS $BTYPE diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 1714d90c8..f5e9dda91 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -211,4 +211,5 @@ In chronological order: * PLCT Lab, Institute of Software Chinese Academy of Sciences * [2022-03] Support RISC-V Vector Intrinisc 1.0 version. - \ No newline at end of file +* Pablo Romero + * [2022-08] Fix building from sources for QNX \ No newline at end of file diff --git a/Makefile b/Makefile index 967ab1bb6..289f0eca5 100644 --- a/Makefile +++ b/Makefile @@ -110,6 +110,10 @@ ifeq ($(OSNAME), Darwin) @echo "\"make PREFIX=/your_installation_path/ install\"." @echo @echo "(or set PREFIX in Makefile.rule and run make install." + @echo + @echo "Note that any flags passed to make during build should also be passed to make install" + @echo "to circumvent any install errors." + @echo @echo "If you want to move the .dylib to a new location later, make sure you change" @echo "the internal name of the dylib with:" @echo @@ -118,6 +122,9 @@ endif @echo @echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"." @echo + @echo "Note that any flags passed to make during build should also be passed to make install" + @echo "to circumvent any install errors." + @echo shared : ifneq ($(NO_SHARED), 1) diff --git a/Makefile.alpha b/Makefile.alpha index bd4f4d58b..97e4d757e 100644 --- a/Makefile.alpha +++ b/Makefile.alpha @@ -1,42 +1,24 @@ -CPP = $(CC) -E -RANLIB = ranlib - -ifeq ($(LIBSUBARCH), EV4) -LIBNAME = $(LIBPREFIX)_ev4.a -LIBNAME_P = $(LIBPREFIX)_ev4_p.a -endif - -ifeq ($(LIBSUBARCH), EV5) -LIBNAME = $(LIBPREFIX)_ev5.a -LIBNAME_P = $(LIBPREFIX)_ev5_p.a -endif - -ifeq ($(LIBSUBARCH), EV6) -LIBNAME = $(LIBPREFIX)_ev6.a -LIBNAME_P = $(LIBPREFIX)_ev6_p.a -endif - ifneq ($(COMPILER), NATIVE) # GCC User -ifeq ($(LIBSUBARCH), EV4) -OPTION += -DEV4 -mcpu=ev4 +ifeq ($(CORE), EV4) +CCOMMON_OPT += -mcpu=ev4 endif -ifeq ($(LIBSUBARCH), EV5) -OPTION += -DEV5 -mcpu=ev5 +ifeq ($(CORE), EV5) +CCOMMON_OPT += -mcpu=ev5 endif -ifeq ($(LIBSUBARCH), EV6) -OPTION += -DEV6 -mcpu=ev6 +ifeq ($(CORE), EV6) +CCOMMON_OPT += -mcpu=ev6 endif else # Compaq Compiler User -ifeq ($(LIBSUBARCH), EV4) -OPTION += -DEV4 -tune ev4 -arch ev4 +ifeq ($(CORE), EV4) +CCOMMON_OPT += -tune ev4 -arch ev4 endif -ifeq ($(LIBSUBARCH), EV5) -OPTION += -DEV5 -tune ev5 -arch ev5 +ifeq ($(CORE), EV5) +CCOMMON_OPT += -tune ev5 -arch ev5 endif -ifeq ($(LIBSUBARCH), EV6) -OPTION += -DEV6 -tune ev6 -arch ev6 +ifeq ($(CORE), EV6) +CCOMMON_OPT += -tune ev6 -arch ev6 endif endif diff --git a/Makefile.prebuild b/Makefile.prebuild index 0d12b9761..69cac82ac 100644 --- a/Makefile.prebuild +++ b/Makefile.prebuild @@ -60,9 +60,9 @@ all: getarch_2nd ./getarch_2nd 1 >> $(TARGET_CONF) $(TARGET_CONF): c_check$(SCRIPTSUFFIX) f_check$(SCRIPTSUFFIX) getarch - ./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) $(CFLAGS) + ./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(CC)" "$(TARGET_FLAGS) $(CFLAGS)" ifneq ($(ONLY_CBLAS), 1) - ./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS) + ./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(FC)" "$(TARGET_FLAGS)" else #When we only build CBLAS, we set NOFORTRAN=2 echo "NOFORTRAN=2" >> $(TARGET_MAKE) @@ -77,8 +77,8 @@ endif getarch : getarch.c cpuid.S dummy $(CPUIDEMU) - avx512=$$(./c_check$(SCRIPTSUFFIX) - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \ - rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_RV64GV); \ + avx512=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" "$(TARGET_FLAGS) $(CFLAGS)" | grep NO_AVX512); \ + rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" "$(TARGET_FLAGS) $(CFLAGS)" | grep NO_RV64GV); \ $(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} $${rv64gv:+-D$${rv64gv}} -lm -o $(@F) getarch.c cpuid.S $(CPUIDEMU) getarch_2nd : getarch_2nd.c $(TARGET_CONF) dummy diff --git a/Makefile.system b/Makefile.system index 36e4ef05e..8c1bc8c4e 100644 --- a/Makefile.system +++ b/Makefile.system @@ -677,7 +677,12 @@ endif endif ifeq ($(ARCH), mips64) -DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4 +DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4 MIPS64_GENERIC +ifdef DYNAMIC_LIST +override DYNAMIC_CORE = MIPS64_GENERIC $(DYNAMIC_LIST) +XCCOMMON_OPT = -DDYNAMIC_LIST -DDYN_MIPS64_GENERIC +XCCOMMON_OPT += $(foreach dcore,$(DYNAMIC_LIST),-DDYN_$(dcore)) +endif endif ifeq ($(ARCH), loongarch64) @@ -856,6 +861,11 @@ CCOMMON_OPT += -mabi=32 BINARY_DEFINED = 1 endif +ifneq (, $(filter $(CORE), MIPS64_GENERIC)) +CCOMMON_OPT += -DNO_MSA +FCOMMON_OPT += -DNO_MSA +endif + ifneq (, $(filter $(CORE),LOONGSON3R3 LOONGSON3R4)) CCOMMON_OPT += -march=loongson3a FCOMMON_OPT += -march=loongson3a diff --git a/TargetList.txt b/TargetList.txt index 631998353..dce9fa765 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -65,6 +65,7 @@ MIPS1004K MIPS24K 4.MIPS64 CPU: +MIPS64_GENERIC SICORTEX LOONGSON3A LOONGSON3B @@ -128,3 +129,7 @@ LA264 12. Elbrus E2000: E2K +13. Alpha +EV4 +EV5 +EV6 diff --git a/c_check b/c_check index 7f72b05b2..22c5f18ef 100755 --- a/c_check +++ b/c_check @@ -31,8 +31,8 @@ flags="$*" cross_suffix="" -if [ "`dirname $compiler_name`" != '.' ]; then - cross_suffix="$cross_suffix`dirname $compiler_name`/" +if [ "`dirname \"$compiler_name\"`" != '.' ]; then + cross_suffix="$cross_suffix`dirname \"$compiler_name\"`/" fi bn=`basename $compiler_name` diff --git a/common.h b/common.h index 00d1d0baf..e6002d322 100644 --- a/common.h +++ b/common.h @@ -90,7 +90,7 @@ extern "C" { #endif #include -#ifdef OS_LINUX +#if defined(OS_LINUX) || defined(OS_QNX) #include #include #endif @@ -107,7 +107,7 @@ extern "C" { #endif #endif -#ifdef OS_HAIKU +#if defined(OS_HAIKU) || defined(OS_QNX) #define NO_SYSV_IPC #endif diff --git a/common_alpha.h b/common_alpha.h index f1ea8ff94..021eb93ae 100644 --- a/common_alpha.h +++ b/common_alpha.h @@ -43,7 +43,7 @@ #define MB asm("mb") #define WMB asm("wmb") -#define RMB asm("rmb") +#define RMB asm("mb") static void __inline blas_lock(unsigned long *address){ #ifndef __DECC diff --git a/common_macro.h b/common_macro.h index d2fa822c2..3226d0f11 100644 --- a/common_macro.h +++ b/common_macro.h @@ -2612,7 +2612,7 @@ #ifndef ASSEMBLER #if !defined(DYNAMIC_ARCH) \ && (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \ - || defined(ARCH_LOONGARCH64) || defined(ARCH_E2K)) + || defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) || defined(ARCH_ALPHA)) extern BLASLONG gemm_offset_a; extern BLASLONG gemm_offset_b; extern BLASLONG sbgemm_p; diff --git a/common_mips64.h b/common_mips64.h index 287459e7d..006cf33e4 100644 --- a/common_mips64.h +++ b/common_mips64.h @@ -86,7 +86,9 @@ static inline unsigned int rpcc(void){ //__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory"); //ret=tmp; __asm__ __volatile__(".set push \n" +#if !defined(__mips_isa_rev) || __mips_isa_rev < 2 ".set mips32r2\n" +#endif "rdhwr %0, $2\n" ".set pop": "=r"(ret):: "memory"); @@ -99,7 +101,9 @@ static inline unsigned int rpcc(void){ static inline int WhereAmI(void){ int ret=0; __asm__ __volatile__(".set push \n" +#if !defined(__mips_isa_rev) || __mips_isa_rev < 2 ".set mips32r2\n" +#endif "rdhwr %0, $0\n" ".set pop": "=r"(ret):: "memory"); return ret; @@ -197,9 +201,15 @@ static inline int blas_quickdivide(blasint x, blasint y){ #if defined(ASSEMBLER) && !defined(NEEDPARAM) +#if defined(__mips_isa_rev) && __mips_isa_rev >= 6 +#define ASSEMBLER_ARCH mips64r6 +#else +#define ASSEMBLER_ARCH mips64 +#endif + #define PROLOGUE \ .text ;\ - .set mips64 ;\ + .set ASSEMBLER_ARCH ;\ .align 5 ;\ .globl REALNAME ;\ .ent REALNAME ;\ diff --git a/cpuid_alpha.c b/cpuid_alpha.c index 58dccdefc..e0e019af2 100644 --- a/cpuid_alpha.c +++ b/cpuid_alpha.c @@ -59,6 +59,11 @@ void get_subarchitecture(void){ printf("ev%d", implver() + 4); } + +void get_corename(void){ + printf("EV%d", implver() + 4); +} + void get_subdirname(void){ printf("alpha"); } diff --git a/cpuid_mips64.c b/cpuid_mips64.c index 8753ee3f0..e5f68b5a1 100644 --- a/cpuid_mips64.c +++ b/cpuid_mips64.c @@ -70,16 +70,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* or implied, of The University of Texas at Austin. */ /*********************************************************************/ -#define CPU_UNKNOWN 0 -#define CPU_SICORTEX 1 -#define CPU_LOONGSON3R3 2 -#define CPU_LOONGSON3R4 3 -#define CPU_I6400 4 -#define CPU_P6600 5 -#define CPU_I6500 6 +#define CPU_UNKNOWN 0 +#define CPU_MIPS64_GENERIC 1 +#define CPU_SICORTEX 2 +#define CPU_LOONGSON3R3 3 +#define CPU_LOONGSON3R4 4 +#define CPU_I6400 5 +#define CPU_P6600 6 +#define CPU_I6500 7 static char *cpuname[] = { "UNKNOWN", + "MIPS64_GENERIC" "SICORTEX", "LOONGSON3R3", "LOONGSON3R4", @@ -113,8 +115,11 @@ int detect(void){ return CPU_SICORTEX; } } + + return CPU_MIPS64_GENERIC; +#else + return CPU_UNKNOWN; #endif - return CPU_UNKNOWN; } char *get_corename(void){ @@ -136,9 +141,11 @@ void get_subarchitecture(void){ printf("P6600"); }else if(detect()==CPU_I6500){ printf("I6500"); - }else{ + }else if(detect()==CPU_SICORTEX){ printf("SICORTEX"); - } + }else{ + printf("MIPS64_GENERIC"); + } } void get_subdirname(void){ @@ -215,8 +222,8 @@ void get_libname(void){ printf("p6600\n"); }else if(detect()==CPU_I6500) { printf("i6500\n"); - }else{ - printf("mips64\n"); + }else { + printf("mips64_generic\n"); } } diff --git a/ctest/CMakeLists.txt b/ctest/CMakeLists.txt index e779fb168..91338b73b 100644 --- a/ctest/CMakeLists.txt +++ b/ctest/CMakeLists.txt @@ -40,7 +40,7 @@ else() c_${float_char}blas1.c) endif() target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}) - if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") + if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX") target_link_libraries(x${float_char}cblat1 m) endif() add_test(NAME "x${float_char}cblat1" @@ -65,7 +65,7 @@ else() constant.c) endif() target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}) - if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") + if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX") target_link_libraries(x${float_char}cblat2 m) endif() add_test(NAME "x${float_char}cblat2" @@ -90,7 +90,7 @@ else() constant.c) endif() target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}) - if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") + if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX") target_link_libraries(x${float_char}cblat3 m) endif() add_test(NAME "x${float_char}cblat3" diff --git a/driver/others/dynamic_mips64.c b/driver/others/dynamic_mips64.c index 9fd19d739..7fc347b0c 100644 --- a/driver/others/dynamic_mips64.c +++ b/driver/others/dynamic_mips64.c @@ -38,22 +38,48 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "common.h" +#if (defined OS_LINUX || defined OS_ANDROID) +#include +#include + +#ifndef HWCAP_LOONGSON_CPUCFG +#define HWCAP_LOONGSON_CPUCFG (1 << 14) +#endif +#endif + +#ifdef DYNAMIC_LIST +extern gotoblas_t gotoblas_MIPS64_GENERIC; +#ifdef DYN_LOONGSON3R3 +extern gotoblas_t gotoblas_LOONGSON3R3; +#else +#define gotoblas_LOONGSON3R3 gotoblas_MIPS64_GENERIC +#endif +#ifdef DYN_LOONGSON3R4 +extern gotoblas_t gotoblas_LOONGSON3R4; +#else +#define gotoblas_LOONGSON3R4 gotoblas_MIPS64_GENERIC +#endif +#else extern gotoblas_t gotoblas_LOONGSON3R3; extern gotoblas_t gotoblas_LOONGSON3R4; +extern gotoblas_t gotoblas_MIPS64_GENERIC; +#endif extern void openblas_warning(int verbose, const char * msg); -#define NUM_CORETYPES 2 +#define NUM_CORETYPES 3 static char *corename[] = { + "MIPS64_GENERIC" "loongson3r3", "loongson3r4", "UNKNOWN" }; char *gotoblas_corename(void) { - if (gotoblas == &gotoblas_LOONGSON3R3) return corename[0]; - if (gotoblas == &gotoblas_LOONGSON3R4) return corename[1]; + if (gotoblas == &gotoblas_MIPS64_GENERIC) return corename[0]; + if (gotoblas == &gotoblas_LOONGSON3R3) return corename[1]; + if (gotoblas == &gotoblas_LOONGSON3R4) return corename[2]; return corename[NUM_CORETYPES]; } @@ -73,77 +99,32 @@ static gotoblas_t *force_coretype(char *coretype) { switch (found) { - case 0: return (&gotoblas_LOONGSON3R3); - case 1: return (&gotoblas_LOONGSON3R4); + case 0: return (&gotoblas_MIPS64_GENERIC); + case 1: return (&gotoblas_LOONGSON3R3); + case 2: return (&gotoblas_LOONGSON3R4); } snprintf(message, 128, "Core not found: %s\n", coretype); openblas_warning(1, message); return NULL; } +#if (defined OS_LINUX || defined OS_ANDROID) #define MMI_MASK 0x00000010 #define MSA_MASK 0x00000020 -int fd[2]; -int support_cpucfg; - -static void handler(int signum) -{ - close(fd[1]); - exit(1); -} - -/* Brief : Function to check if cpucfg supported on loongson - * Return: 1 supported - * 0 not supported - */ -static int cpucfg_test(void) { - pid_t pid; - int status = 0; - - support_cpucfg = 0; - pipe(fd); - pid = fork(); - if (pid == 0) { /* Subprocess */ - struct sigaction act; - close(fd[0]); - /* Set signal action for SIGILL. */ - act.sa_handler = handler; - sigaction(SIGILL,&act,NULL); - - /* Execute cpucfg in subprocess. */ - __asm__ volatile( - ".insn \n\t" - ".word (0xc8080118) \n\t" - ::: - ); - support_cpucfg = 1; - write(fd[1],&support_cpucfg,sizeof(support_cpucfg)); - close(fd[1]); - exit(0); - } else if (pid > 0){ /* Parent process*/ - close(fd[1]); - if ((waitpid(pid,&status,0) <= 0) || - (read(fd[0],&support_cpucfg,sizeof(support_cpucfg)) <= 0)) - support_cpucfg = 0; - close(fd[0]); - } else { - support_cpucfg = 0; - } - - return support_cpucfg; -} - static gotoblas_t *get_coretype_from_cpucfg(void) { int flag = 0; __asm__ volatile( + ".set push \n\t" + ".set noat \n\t" ".insn \n\t" - "dli $8, 0x01 \n\t" - ".word (0xc9084918) \n\t" - "usw $9, 0x00(%0) \n\t" + "dli $1, 0x01 \n\t" + ".word (0xc8080118) \n\t" + "move %0, $1 \n\t" + ".set pop \n\t" + : "=r"(flag) + : : - : "r"(&flag) - : "memory" ); if (flag & MSA_MASK) return (&gotoblas_LOONGSON3R4); @@ -153,7 +134,7 @@ static gotoblas_t *get_coretype_from_cpucfg(void) { } static gotoblas_t *get_coretype_from_cpuinfo(void) { -#ifdef linux +#ifdef __linux FILE *infile; char buffer[512], *p; @@ -176,17 +157,19 @@ static gotoblas_t *get_coretype_from_cpuinfo(void) { return NULL; } #endif - return NULL; + return NULL; } +#endif static gotoblas_t *get_coretype(void) { - int ret = 0; - - ret = cpucfg_test(); - if (ret == 1) - return get_coretype_from_cpucfg(); - else - return get_coretype_from_cpuinfo(); +#if (!defined OS_LINUX && !defined OS_ANDROID) + return NULL; +#else + if (!(getauxval(AT_HWCAP) & HWCAP_LOONGSON_CPUCFG)) + return get_coretype_from_cpucfg(); + else + return get_coretype_from_cpuinfo(); +#endif } void gotoblas_dynamic_init(void) { @@ -208,9 +191,9 @@ void gotoblas_dynamic_init(void) { if (gotoblas == NULL) { - snprintf(coremsg, 128, "Falling back to loongson3r3 core\n"); + snprintf(coremsg, 128, "Falling back to MIPS64_GENEIRC\n"); openblas_warning(1, coremsg); - gotoblas = &gotoblas_LOONGSON3R3; + gotoblas = &gotoblas_MIPS64_GENERIC; } if (gotoblas && gotoblas->init) { diff --git a/exports/gensymbol b/exports/gensymbol index 83222a215..f05de626f 100755 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -4000,6 +4000,22 @@ case "$p1" in no_underscore_objs="$no_underscore_objs $misc_common_objs" printf 'int main(void){\n' + for obj in $underscore_objs; do + [ "$obj" != "xerbla" ] && printf 'extern void %s%s%s%s();\n' \ + "$symbolprefix" "$obj" "$bu" "$symbolsuffix" + done + + for obj in $need_2underscore_objs; do + printf 'extern void %s%s%s%s%s();\n' \ + "$symbolprefix" "$obj" "$bu" "$bu" "$symbolsuffix" + done + + for obj in $no_underscore_objs; do + printf 'extern void %s%s%s();\n' \ + "$symbolprefix" "$obj" "$symbolsuffix" + done + + printf '\n' for obj in $underscore_objs; do [ "$obj" != "xerbla" ] && printf '%s%s%s%s();\n' \ "$symbolprefix" "$obj" "$bu" "$symbolsuffix" diff --git a/exports/gensymbol.pl b/exports/gensymbol.pl index ac62bc058..e38a3cc89 100644 --- a/exports/gensymbol.pl +++ b/exports/gensymbol.pl @@ -3955,6 +3955,18 @@ if ($ARGV[0] eq "linktest") { @no_underscore_objs = (@no_underscore_objs, @misc_common_objs); print "int main(void){\n"; + foreach $objs (@underscore_objs) { + print "extern void ", $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla"; + } + + foreach $objs (@need_2underscore_objs) { + print "extern void ", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "();\n"; + } + + foreach $objs (@no_underscore_objs) { + print "extern void ", $symbolprefix, $objs, $symbolsuffix, "();\n"; + } + foreach $objs (@underscore_objs) { print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla"; } diff --git a/getarch.c b/getarch.c index 1113d02ce..16ead9d84 100644 --- a/getarch.c +++ b/getarch.c @@ -131,6 +131,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* #define FORCE_PPC440 */ /* #define FORCE_PPC440FP2 */ /* #define FORCE_CELL */ +/* #define FORCE_MIPS64_GENERIC */ /* #define FORCE_SICORTEX */ /* #define FORCE_LOONGSON3R3 */ /* #define FORCE_LOONGSON3R4 */ @@ -146,6 +147,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* #define FORCE_SPARCV7 */ /* #define FORCE_ZARCH_GENERIC */ /* #define FORCE_Z13 */ +/* #define FORCE_EV4 */ +/* #define FORCE_EV5 */ +/* #define FORCE_EV6 */ /* #define FORCE_GENERIC */ #ifdef FORCE_P2 @@ -915,6 +919,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "CELL" #endif +#ifdef FORCE_MIPS64_GENERIC +#define FORCE +#define ARCHITECTURE "MIPS" +#define SUBARCHITECTURE "MIPS64_GENERIC" +#define SUBDIRNAME "mips64" +#define ARCHCONFIG "-DMIPS64_GENERIC " \ + "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " +#define LIBNAME "mips64_generic" +#define CORENAME "MIPS64_GENERIC" +#else +#endif + #ifdef FORCE_SICORTEX #define FORCE #define ARCHITECTURE "MIPS" @@ -1601,6 +1619,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "Z14" #endif +#ifdef FORCE_EV4 +#define FORCE +#define ARCHITECTURE "ALPHA" +#define SUBARCHITECTURE "ev4" +#define ARCHCONFIG "-DEV4 " \ + "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=2097152 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=8192 " +#define LIBNAME "ev4" +#define CORENAME "EV4" +#endif + +#ifdef FORCE_EV5 +#define FORCE +#define ARCHITECTURE "ALPHA" +#define SUBARCHITECTURE "ev5" +#define ARCHCONFIG "-DEV5 " \ + "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=2097152 -DL2_LINESIZE=64 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=8192 " +#define LIBNAME "ev5" +#define CORENAME "EV5" +#endif + +#ifdef FORCE_EV6 +#define FORCE +#define ARCHITECTURE "ALPHA" +#define SUBARCHITECTURE "ev6" +#define ARCHCONFIG "-DEV6 " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ + "-DL2_SIZE=4194304 -DL2_LINESIZE=64 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=8192 " +#define LIBNAME "ev6" +#define CORENAME "EV6" +#endif + #ifdef FORCE_C910V #define FORCE #define ARCHITECTURE "RISCV64" @@ -1777,7 +1831,7 @@ int main(int argc, char *argv[]){ #ifdef FORCE printf("CORE=%s\n", CORENAME); #else -#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) +#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__alpha__) printf("CORE=%s\n", get_corename()); #endif #endif diff --git a/kernel/alpha/amax.S b/kernel/alpha/amax.S index e528adc07..88635e8ec 100644 --- a/kernel/alpha/amax.S +++ b/kernel/alpha/amax.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/asum.S b/kernel/alpha/asum.S index b312d064b..54725b5cc 100644 --- a/kernel/alpha/asum.S +++ b/kernel/alpha/asum.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/axpy.S b/kernel/alpha/axpy.S index 1007b063b..403b89df1 100644 --- a/kernel/alpha/axpy.S +++ b/kernel/alpha/axpy.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 40 diff --git a/kernel/alpha/cabs.S b/kernel/alpha/cabs.S index 5fa27af53..79b92836b 100644 --- a/kernel/alpha/cabs.S +++ b/kernel/alpha/cabs.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + .set noat .set noreorder diff --git a/kernel/alpha/cnrm2.S b/kernel/alpha/cnrm2.S index bd1ab8782..445eaa7ea 100644 --- a/kernel/alpha/cnrm2.S +++ b/kernel/alpha/cnrm2.S @@ -39,7 +39,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCH_SIZE 80 diff --git a/kernel/alpha/copy.S b/kernel/alpha/copy.S index 749039c9e..315a02b1e 100644 --- a/kernel/alpha/copy.S +++ b/kernel/alpha/copy.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/cscal.S b/kernel/alpha/cscal.S index bba3137a9..a09306a1c 100644 --- a/kernel/alpha/cscal.S +++ b/kernel/alpha/cscal.S @@ -42,7 +42,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + .globl NAME .ent NAME diff --git a/kernel/alpha/dnrm2.S b/kernel/alpha/dnrm2.S index 0dfb64924..c71a8e3c9 100644 --- a/kernel/alpha/dnrm2.S +++ b/kernel/alpha/dnrm2.S @@ -39,7 +39,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCH_SIZE 80 diff --git a/kernel/alpha/dot.S b/kernel/alpha/dot.S index 330196c78..fe84c719f 100644 --- a/kernel/alpha/dot.S +++ b/kernel/alpha/dot.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/gemm_beta.S b/kernel/alpha/gemm_beta.S index 44b2fada1..e234a3216 100644 --- a/kernel/alpha/gemm_beta.S +++ b/kernel/alpha/gemm_beta.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + .set noat .set noreorder diff --git a/kernel/alpha/gemm_kernel_4x4.S b/kernel/alpha/gemm_kernel_4x4.S index c55d817df..8fda1ab5a 100644 --- a/kernel/alpha/gemm_kernel_4x4.S +++ b/kernel/alpha/gemm_kernel_4x4.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/alpha/gemv_n.S b/kernel/alpha/gemv_n.S index 3e9d1d7fb..0fcd5b865 100644 --- a/kernel/alpha/gemv_n.S +++ b/kernel/alpha/gemv_n.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define STACKSIZE 64 #define PREFETCHSIZE 32 diff --git a/kernel/alpha/gemv_t.S b/kernel/alpha/gemv_t.S index ea95546e8..f9432486f 100644 --- a/kernel/alpha/gemv_t.S +++ b/kernel/alpha/gemv_t.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define STACKSIZE 64 #define PREFETCHSIZE 32 diff --git a/kernel/alpha/iamax.S b/kernel/alpha/iamax.S index 2be5d5d08..384df07e6 100644 --- a/kernel/alpha/iamax.S +++ b/kernel/alpha/iamax.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/imax.S b/kernel/alpha/imax.S index d8958c86a..785751075 100644 --- a/kernel/alpha/imax.S +++ b/kernel/alpha/imax.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/izamax.S b/kernel/alpha/izamax.S index c932581ae..d85b909e1 100644 --- a/kernel/alpha/izamax.S +++ b/kernel/alpha/izamax.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/lsame.S b/kernel/alpha/lsame.S index 082f79082..b1a7d5b70 100644 --- a/kernel/alpha/lsame.S +++ b/kernel/alpha/lsame.S @@ -36,7 +36,7 @@ /* or implied, of The University of Texas at Austin. */ /*********************************************************************/ -#include "version.h" + .set noat .set noreorder diff --git a/kernel/alpha/max.S b/kernel/alpha/max.S index af1b8fb85..935f27718 100644 --- a/kernel/alpha/max.S +++ b/kernel/alpha/max.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/rot.S b/kernel/alpha/rot.S index d1656d7e3..7a0991015 100644 --- a/kernel/alpha/rot.S +++ b/kernel/alpha/rot.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/scal.S b/kernel/alpha/scal.S index 2d95801c8..db6959520 100644 --- a/kernel/alpha/scal.S +++ b/kernel/alpha/scal.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/snrm2.S b/kernel/alpha/snrm2.S index 0dfb64924..c71a8e3c9 100644 --- a/kernel/alpha/snrm2.S +++ b/kernel/alpha/snrm2.S @@ -39,7 +39,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCH_SIZE 80 diff --git a/kernel/alpha/sum.S b/kernel/alpha/sum.S index 3902817a7..adc4ca5a1 100644 --- a/kernel/alpha/sum.S +++ b/kernel/alpha/sum.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/swap.S b/kernel/alpha/swap.S index 9e21990c4..34e58a72a 100644 --- a/kernel/alpha/swap.S +++ b/kernel/alpha/swap.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + PROLOGUE PROFCODE diff --git a/kernel/alpha/trsm_kernel_4x4_LN.S b/kernel/alpha/trsm_kernel_4x4_LN.S index 600b4e255..be5062244 100644 --- a/kernel/alpha/trsm_kernel_4x4_LN.S +++ b/kernel/alpha/trsm_kernel_4x4_LN.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/alpha/trsm_kernel_4x4_LT.S b/kernel/alpha/trsm_kernel_4x4_LT.S index 81436d034..dfc7e98aa 100644 --- a/kernel/alpha/trsm_kernel_4x4_LT.S +++ b/kernel/alpha/trsm_kernel_4x4_LT.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/alpha/trsm_kernel_4x4_RT.S b/kernel/alpha/trsm_kernel_4x4_RT.S index 71d6c43fa..d77ccc61b 100644 --- a/kernel/alpha/trsm_kernel_4x4_RT.S +++ b/kernel/alpha/trsm_kernel_4x4_RT.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/alpha/zamax.S b/kernel/alpha/zamax.S index f1ea18d2d..96502a7a9 100644 --- a/kernel/alpha/zamax.S +++ b/kernel/alpha/zamax.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/zasum.S b/kernel/alpha/zasum.S index 67ed78584..37a1c234a 100644 --- a/kernel/alpha/zasum.S +++ b/kernel/alpha/zasum.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/zaxpy.S b/kernel/alpha/zaxpy.S index 1416769a1..1494c7fc0 100644 --- a/kernel/alpha/zaxpy.S +++ b/kernel/alpha/zaxpy.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 40 diff --git a/kernel/alpha/zdot.S b/kernel/alpha/zdot.S index 78dcae668..724526407 100644 --- a/kernel/alpha/zdot.S +++ b/kernel/alpha/zdot.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/zgemm_beta.S b/kernel/alpha/zgemm_beta.S index f7ca347f1..fcabe48d0 100644 --- a/kernel/alpha/zgemm_beta.S +++ b/kernel/alpha/zgemm_beta.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + .set noat .set noreorder diff --git a/kernel/alpha/zgemm_kernel_2x2.S b/kernel/alpha/zgemm_kernel_2x2.S index 67ba6d108..e56a3e10d 100644 --- a/kernel/alpha/zgemm_kernel_2x2.S +++ b/kernel/alpha/zgemm_kernel_2x2.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/alpha/zgemv_n.S b/kernel/alpha/zgemv_n.S index fd602a3eb..2ebb918d5 100644 --- a/kernel/alpha/zgemv_n.S +++ b/kernel/alpha/zgemv_n.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define STACKSIZE 64 #define PREFETCHSIZE 32 diff --git a/kernel/alpha/zgemv_t.S b/kernel/alpha/zgemv_t.S index bac56eb3f..96d8caa27 100644 --- a/kernel/alpha/zgemv_t.S +++ b/kernel/alpha/zgemv_t.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define STACKSIZE 64 #define PREFETCHSIZE 32 diff --git a/kernel/alpha/znrm2.S b/kernel/alpha/znrm2.S index bd1ab8782..445eaa7ea 100644 --- a/kernel/alpha/znrm2.S +++ b/kernel/alpha/znrm2.S @@ -39,7 +39,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCH_SIZE 80 diff --git a/kernel/alpha/zrot.S b/kernel/alpha/zrot.S index afcdf12b4..61fe4f3d9 100644 --- a/kernel/alpha/zrot.S +++ b/kernel/alpha/zrot.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define N $16 #define X $17 diff --git a/kernel/alpha/zscal.S b/kernel/alpha/zscal.S index 1a2ac10b3..bed3033f8 100644 --- a/kernel/alpha/zscal.S +++ b/kernel/alpha/zscal.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/zsum.S b/kernel/alpha/zsum.S index 1ad0eb137..5c51bbc6f 100644 --- a/kernel/alpha/zsum.S +++ b/kernel/alpha/zsum.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #define PREFETCHSIZE 88 diff --git a/kernel/alpha/zswap.S b/kernel/alpha/zswap.S index a12a2c7a7..02be94115 100644 --- a/kernel/alpha/zswap.S +++ b/kernel/alpha/zswap.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + PROLOGUE PROFCODE diff --git a/kernel/alpha/ztrsm_kernel_2x2_LN.S b/kernel/alpha/ztrsm_kernel_2x2_LN.S index dcbe4e236..44d46daa7 100644 --- a/kernel/alpha/ztrsm_kernel_2x2_LN.S +++ b/kernel/alpha/ztrsm_kernel_2x2_LN.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/alpha/ztrsm_kernel_2x2_LT.S b/kernel/alpha/ztrsm_kernel_2x2_LT.S index e0c82026e..f17987faf 100644 --- a/kernel/alpha/ztrsm_kernel_2x2_LT.S +++ b/kernel/alpha/ztrsm_kernel_2x2_LT.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/alpha/ztrsm_kernel_2x2_RT.S b/kernel/alpha/ztrsm_kernel_2x2_RT.S index e890f599d..90b56c954 100644 --- a/kernel/alpha/ztrsm_kernel_2x2_RT.S +++ b/kernel/alpha/ztrsm_kernel_2x2_RT.S @@ -38,7 +38,7 @@ #define ASSEMBLER #include "common.h" -#include "version.h" + #if !defined(EV4) && !defined(EV5) && !defined(EV6) #error "Architecture is not specified." diff --git a/kernel/mips64/KERNEL b/kernel/mips64/KERNEL index 97ef3692c..54939a9ef 100644 --- a/kernel/mips64/KERNEL +++ b/kernel/mips64/KERNEL @@ -42,50 +42,58 @@ endif ifndef SGEMMKERNEL SGEMMKERNEL = gemm_kernel.S +ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) SGEMMINCOPY = ../generic/gemm_ncopy_2.c SGEMMITCOPY = ../generic/gemm_tcopy_2.c +SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) +SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif SGEMMONCOPY = ../generic/gemm_ncopy_8.c SGEMMOTCOPY = ../generic/gemm_tcopy_8.c -SGEMMINCOPYOBJ = sgemm_incopy.o -SGEMMITCOPYOBJ = sgemm_itcopy.o -SGEMMONCOPYOBJ = sgemm_oncopy.o -SGEMMOTCOPYOBJ = sgemm_otcopy.o +SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) +SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) endif ifndef DGEMMKERNEL DGEMMKERNEL = gemm_kernel.S +ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) DGEMMINCOPY = ../generic/gemm_ncopy_2.c DGEMMITCOPY = ../generic/gemm_tcopy_2.c +DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) +DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif DGEMMONCOPY = ../generic/gemm_ncopy_8.c DGEMMOTCOPY = ../generic/gemm_tcopy_8.c -DGEMMINCOPYOBJ = dgemm_incopy.o -DGEMMITCOPYOBJ = dgemm_itcopy.o -DGEMMONCOPYOBJ = dgemm_oncopy.o -DGEMMOTCOPYOBJ = dgemm_otcopy.o +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) endif ifndef CGEMMKERNEL CGEMMKERNEL = zgemm_kernel.S +ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) CGEMMINCOPY = ../generic/zgemm_ncopy_1.c CGEMMITCOPY = ../generic/zgemm_tcopy_1.c +CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) +CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif CGEMMONCOPY = ../generic/zgemm_ncopy_4.c CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c -CGEMMINCOPYOBJ = cgemm_incopy.o -CGEMMITCOPYOBJ = cgemm_itcopy.o -CGEMMONCOPYOBJ = cgemm_oncopy.o -CGEMMOTCOPYOBJ = cgemm_otcopy.o +CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) +CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) endif ifndef ZGEMMKERNEL ZGEMMKERNEL = zgemm_kernel.S +ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) ZGEMMINCOPY = ../generic/zgemm_ncopy_1.c ZGEMMITCOPY = ../generic/zgemm_tcopy_1.c +ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) +ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c -ZGEMMINCOPYOBJ = zgemm_incopy.o -ZGEMMITCOPYOBJ = zgemm_itcopy.o -ZGEMMONCOPYOBJ = zgemm_oncopy.o -ZGEMMOTCOPYOBJ = zgemm_otcopy.o +ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) +ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) endif ifndef SGEMM_BETA diff --git a/lapack-netlib/SRC/iparam2stage.c b/lapack-netlib/SRC/iparam2stage.c index 1b6c4c8b8..7ba938dcd 100644 --- a/lapack-netlib/SRC/iparam2stage.c +++ b/lapack-netlib/SRC/iparam2stage.c @@ -717,11 +717,12 @@ integer iparam2stage_(integer *ispec, char *name__, char *opts, integer *ni, ret_val = -1; // s_copy(subnam, name__, (ftnlen)12, name_len); -strncpy(subnam,name__,13); -subnam[13]='\0'; -for (int i=0;i<13;i++) subnam[i]=toupper(subnam[i]); - //fprintf(stderr,"iparam2stage, name__ gelesen #%s#\n",name__); -//fprintf(stderr,"iparam2stage, subnam gelesen #%s#\n",subnam); + strncpy(subnam,name__,13); + subnam[13]='\0'; + { + int i; + for (i=0;i<13;i++) subnam[i]=toupper(subnam[i]); + } #if 0 diff --git a/lapack-netlib/TESTING/MATGEN/Makefile b/lapack-netlib/TESTING/MATGEN/Makefile index 62a215b58..822a1eee0 100644 --- a/lapack-netlib/TESTING/MATGEN/Makefile +++ b/lapack-netlib/TESTING/MATGEN/Makefile @@ -40,27 +40,40 @@ ifneq ($(C_LAPACK), 1) $(FC) $(FFLAGS) -c -o $@ $< endif +ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" "" SCATGEN = slatm1.o slatm7.o slaran.o slarnd.o +endif +ifeq ($(BUILD_SINGLE),1) SMATGEN = slatms.o slatme.o slatmr.o slatmt.o \ slagge.o slagsy.o slakf2.o slarge.o slaror.o slarot.o slatm2.o \ slatm3.o slatm5.o slatm6.o slahilb.o +endif +ifeq ($(BUILD_COMPLEX),1) CMATGEN = clatms.o clatme.o clatmr.o clatmt.o \ clagge.o claghe.o clagsy.o clakf2.o clarge.o claror.o clarot.o \ clatm1.o clarnd.o clatm2.o clatm3.o clatm5.o clatm6.o clahilb.o +endif +ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" DZATGEN = dlatm1.o dlatm7.o dlaran.o dlarnd.o +endif +ifeq ($(BUILD_DOUBLE),1) DMATGEN = dlatms.o dlatme.o dlatmr.o dlatmt.o \ dlagge.o dlagsy.o dlakf2.o dlarge.o dlaror.o dlarot.o dlatm2.o \ dlatm3.o dlatm5.o dlatm6.o dlahilb.o +endif +ifeq ($(BUILD_COMPLEX16),1) ZMATGEN = zlatms.o zlatme.o zlatmr.o zlatmt.o \ zlagge.o zlaghe.o zlagsy.o zlakf2.o zlarge.o zlaror.o zlarot.o \ zlatm1.o zlarnd.o zlatm2.o zlatm3.o zlatm5.o zlatm6.o zlahilb.o +endif .PHONY: all +.NOTPARALLEL: all: $(TMGLIB) ALLOBJ = $(SMATGEN) $(CMATGEN) $(SCATGEN) $(DMATGEN) $(ZMATGEN) \ @@ -107,9 +120,17 @@ cleanlib: rm -f $(TMGLIB) ifneq ($(C_LAPACK), 1) +ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),) slaran.o: slaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< +endif +ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),) dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< +endif else +ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),) slaran.o: slaran.c ; $(CC) $(CFLAGS) -O0 -c -o $@ $< +endif +ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),) dlaran.o: dlaran.c ; $(CC) $(CFLAGS) -O0 -c -o $@ $< endif +endif diff --git a/param.h b/param.h index 62f56c172..5a6c32369 100644 --- a/param.h +++ b/param.h @@ -2951,7 +2951,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif -#if defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500) +#if defined(MIPS64_GENERIC) || defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500) #define SNUMOPT 2 #define DNUMOPT 2