Merge branch 'develop' into loongarch64_rename_targets
This commit is contained in:
commit
acd48edf99
|
@ -257,3 +257,53 @@ jobs:
|
|||
- name: Run tests
|
||||
timeout-minutes: 60
|
||||
run: cd build && ctest
|
||||
|
||||
cross_build:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: mips64el
|
||||
triple: mips64el-linux-gnuabi64
|
||||
opts: DYNAMIC_ARCH=1
|
||||
- target: riscv64
|
||||
triple: riscv64-linux-gnu
|
||||
opts: TARGET=RISCV64_GENERIC
|
||||
- target: mipsel
|
||||
triple: mipsel-linux-gnu
|
||||
opts: TARGET=MIPS1004K
|
||||
- target: alpha
|
||||
triple: alpha-linux-gnu
|
||||
opts: TARGET=EV4
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo apt-get install -y ccache gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-${{ matrix.target }}-cross
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: |
|
||||
make -j$(nproc) HOSTCC="ccache gcc" CC="ccache ${{ matrix.triple }}-gcc" FC="ccache ${{ matrix.triple }}-gfortran" ARCH=${{ matrix.target }} ${{ matrix.opts }}
|
||||
|
|
|
@ -30,7 +30,7 @@ matrix:
|
|||
before_script: &common-before
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32"
|
||||
script:
|
||||
- travis_wait 20 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- travis_wait 40 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
|
@ -104,7 +104,7 @@ matrix:
|
|||
- sudo apt-get update
|
||||
- sudo apt-get install gcc-9 gfortran-9 -y
|
||||
script:
|
||||
- travis_wait 20 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||
- travis_wait 40 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
|
@ -121,7 +121,7 @@ matrix:
|
|||
- sudo apt-get update
|
||||
- sudo apt-get install gcc-9 gfortran-9 -y
|
||||
script:
|
||||
- travis_wait 20 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||
- travis_wait 40 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
|
|
|
@ -211,4 +211,5 @@ In chronological order:
|
|||
* PLCT Lab, Institute of Software Chinese Academy of Sciences
|
||||
* [2022-03] Support RISC-V Vector Intrinisc 1.0 version.
|
||||
|
||||
|
||||
* Pablo Romero <https://github.com/pablorcum>
|
||||
* [2022-08] Fix building from sources for QNX
|
7
Makefile
7
Makefile
|
@ -110,6 +110,10 @@ ifeq ($(OSNAME), Darwin)
|
|||
@echo "\"make PREFIX=/your_installation_path/ install\"."
|
||||
@echo
|
||||
@echo "(or set PREFIX in Makefile.rule and run make install."
|
||||
@echo
|
||||
@echo "Note that any flags passed to make during build should also be passed to make install"
|
||||
@echo "to circumvent any install errors."
|
||||
@echo
|
||||
@echo "If you want to move the .dylib to a new location later, make sure you change"
|
||||
@echo "the internal name of the dylib with:"
|
||||
@echo
|
||||
|
@ -118,6 +122,9 @@ endif
|
|||
@echo
|
||||
@echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"."
|
||||
@echo
|
||||
@echo "Note that any flags passed to make during build should also be passed to make install"
|
||||
@echo "to circumvent any install errors."
|
||||
@echo
|
||||
|
||||
shared :
|
||||
ifneq ($(NO_SHARED), 1)
|
||||
|
|
|
@ -1,42 +1,24 @@
|
|||
CPP = $(CC) -E
|
||||
RANLIB = ranlib
|
||||
|
||||
ifeq ($(LIBSUBARCH), EV4)
|
||||
LIBNAME = $(LIBPREFIX)_ev4.a
|
||||
LIBNAME_P = $(LIBPREFIX)_ev4_p.a
|
||||
endif
|
||||
|
||||
ifeq ($(LIBSUBARCH), EV5)
|
||||
LIBNAME = $(LIBPREFIX)_ev5.a
|
||||
LIBNAME_P = $(LIBPREFIX)_ev5_p.a
|
||||
endif
|
||||
|
||||
ifeq ($(LIBSUBARCH), EV6)
|
||||
LIBNAME = $(LIBPREFIX)_ev6.a
|
||||
LIBNAME_P = $(LIBPREFIX)_ev6_p.a
|
||||
endif
|
||||
|
||||
ifneq ($(COMPILER), NATIVE)
|
||||
# GCC User
|
||||
ifeq ($(LIBSUBARCH), EV4)
|
||||
OPTION += -DEV4 -mcpu=ev4
|
||||
ifeq ($(CORE), EV4)
|
||||
CCOMMON_OPT += -mcpu=ev4
|
||||
endif
|
||||
ifeq ($(LIBSUBARCH), EV5)
|
||||
OPTION += -DEV5 -mcpu=ev5
|
||||
ifeq ($(CORE), EV5)
|
||||
CCOMMON_OPT += -mcpu=ev5
|
||||
endif
|
||||
ifeq ($(LIBSUBARCH), EV6)
|
||||
OPTION += -DEV6 -mcpu=ev6
|
||||
ifeq ($(CORE), EV6)
|
||||
CCOMMON_OPT += -mcpu=ev6
|
||||
endif
|
||||
else
|
||||
# Compaq Compiler User
|
||||
ifeq ($(LIBSUBARCH), EV4)
|
||||
OPTION += -DEV4 -tune ev4 -arch ev4
|
||||
ifeq ($(CORE), EV4)
|
||||
CCOMMON_OPT += -tune ev4 -arch ev4
|
||||
endif
|
||||
ifeq ($(LIBSUBARCH), EV5)
|
||||
OPTION += -DEV5 -tune ev5 -arch ev5
|
||||
ifeq ($(CORE), EV5)
|
||||
CCOMMON_OPT += -tune ev5 -arch ev5
|
||||
endif
|
||||
ifeq ($(LIBSUBARCH), EV6)
|
||||
OPTION += -DEV6 -tune ev6 -arch ev6
|
||||
ifeq ($(CORE), EV6)
|
||||
CCOMMON_OPT += -tune ev6 -arch ev6
|
||||
endif
|
||||
endif
|
||||
|
||||
|
|
|
@ -60,9 +60,9 @@ all: getarch_2nd
|
|||
./getarch_2nd 1 >> $(TARGET_CONF)
|
||||
|
||||
$(TARGET_CONF): c_check$(SCRIPTSUFFIX) f_check$(SCRIPTSUFFIX) getarch
|
||||
./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) $(CFLAGS)
|
||||
./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(CC)" "$(TARGET_FLAGS) $(CFLAGS)"
|
||||
ifneq ($(ONLY_CBLAS), 1)
|
||||
./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS)
|
||||
./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(FC)" "$(TARGET_FLAGS)"
|
||||
else
|
||||
#When we only build CBLAS, we set NOFORTRAN=2
|
||||
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
||||
|
@ -77,8 +77,8 @@ endif
|
|||
|
||||
|
||||
getarch : getarch.c cpuid.S dummy $(CPUIDEMU)
|
||||
avx512=$$(./c_check$(SCRIPTSUFFIX) - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \
|
||||
rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_RV64GV); \
|
||||
avx512=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" "$(TARGET_FLAGS) $(CFLAGS)" | grep NO_AVX512); \
|
||||
rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" "$(TARGET_FLAGS) $(CFLAGS)" | grep NO_RV64GV); \
|
||||
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} $${rv64gv:+-D$${rv64gv}} -lm -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
|
||||
|
||||
getarch_2nd : getarch_2nd.c $(TARGET_CONF) dummy
|
||||
|
|
|
@ -677,7 +677,12 @@ endif
|
|||
endif
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4
|
||||
DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4 MIPS64_GENERIC
|
||||
ifdef DYNAMIC_LIST
|
||||
override DYNAMIC_CORE = MIPS64_GENERIC $(DYNAMIC_LIST)
|
||||
XCCOMMON_OPT = -DDYNAMIC_LIST -DDYN_MIPS64_GENERIC
|
||||
XCCOMMON_OPT += $(foreach dcore,$(DYNAMIC_LIST),-DDYN_$(dcore))
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), loongarch64)
|
||||
|
@ -856,6 +861,11 @@ CCOMMON_OPT += -mabi=32
|
|||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
ifneq (, $(filter $(CORE), MIPS64_GENERIC))
|
||||
CCOMMON_OPT += -DNO_MSA
|
||||
FCOMMON_OPT += -DNO_MSA
|
||||
endif
|
||||
|
||||
ifneq (, $(filter $(CORE),LOONGSON3R3 LOONGSON3R4))
|
||||
CCOMMON_OPT += -march=loongson3a
|
||||
FCOMMON_OPT += -march=loongson3a
|
||||
|
|
|
@ -65,6 +65,7 @@ MIPS1004K
|
|||
MIPS24K
|
||||
|
||||
4.MIPS64 CPU:
|
||||
MIPS64_GENERIC
|
||||
SICORTEX
|
||||
LOONGSON3A
|
||||
LOONGSON3B
|
||||
|
@ -128,3 +129,7 @@ LA264
|
|||
12. Elbrus E2000:
|
||||
E2K
|
||||
|
||||
13. Alpha
|
||||
EV4
|
||||
EV5
|
||||
EV6
|
||||
|
|
4
c_check
4
c_check
|
@ -31,8 +31,8 @@ flags="$*"
|
|||
|
||||
cross_suffix=""
|
||||
|
||||
if [ "`dirname $compiler_name`" != '.' ]; then
|
||||
cross_suffix="$cross_suffix`dirname $compiler_name`/"
|
||||
if [ "`dirname \"$compiler_name\"`" != '.' ]; then
|
||||
cross_suffix="$cross_suffix`dirname \"$compiler_name\"`/"
|
||||
fi
|
||||
|
||||
bn=`basename $compiler_name`
|
||||
|
|
4
common.h
4
common.h
|
@ -90,7 +90,7 @@ extern "C" {
|
|||
#endif
|
||||
#include <time.h>
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#if defined(OS_LINUX) || defined(OS_QNX)
|
||||
#include <malloc.h>
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
@ -107,7 +107,7 @@ extern "C" {
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef OS_HAIKU
|
||||
#if defined(OS_HAIKU) || defined(OS_QNX)
|
||||
#define NO_SYSV_IPC
|
||||
#endif
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@
|
|||
|
||||
#define MB asm("mb")
|
||||
#define WMB asm("wmb")
|
||||
#define RMB asm("rmb")
|
||||
#define RMB asm("mb")
|
||||
|
||||
static void __inline blas_lock(unsigned long *address){
|
||||
#ifndef __DECC
|
||||
|
|
|
@ -2612,7 +2612,7 @@
|
|||
#ifndef ASSEMBLER
|
||||
#if !defined(DYNAMIC_ARCH) \
|
||||
&& (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \
|
||||
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K))
|
||||
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) || defined(ARCH_ALPHA))
|
||||
extern BLASLONG gemm_offset_a;
|
||||
extern BLASLONG gemm_offset_b;
|
||||
extern BLASLONG sbgemm_p;
|
||||
|
|
|
@ -86,7 +86,9 @@ static inline unsigned int rpcc(void){
|
|||
//__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
|
||||
//ret=tmp;
|
||||
__asm__ __volatile__(".set push \n"
|
||||
#if !defined(__mips_isa_rev) || __mips_isa_rev < 2
|
||||
".set mips32r2\n"
|
||||
#endif
|
||||
"rdhwr %0, $2\n"
|
||||
".set pop": "=r"(ret):: "memory");
|
||||
|
||||
|
@ -99,7 +101,9 @@ static inline unsigned int rpcc(void){
|
|||
static inline int WhereAmI(void){
|
||||
int ret=0;
|
||||
__asm__ __volatile__(".set push \n"
|
||||
#if !defined(__mips_isa_rev) || __mips_isa_rev < 2
|
||||
".set mips32r2\n"
|
||||
#endif
|
||||
"rdhwr %0, $0\n"
|
||||
".set pop": "=r"(ret):: "memory");
|
||||
return ret;
|
||||
|
@ -197,9 +201,15 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
|||
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
|
||||
#define ASSEMBLER_ARCH mips64r6
|
||||
#else
|
||||
#define ASSEMBLER_ARCH mips64
|
||||
#endif
|
||||
|
||||
#define PROLOGUE \
|
||||
.text ;\
|
||||
.set mips64 ;\
|
||||
.set ASSEMBLER_ARCH ;\
|
||||
.align 5 ;\
|
||||
.globl REALNAME ;\
|
||||
.ent REALNAME ;\
|
||||
|
|
|
@ -59,6 +59,11 @@ void get_subarchitecture(void){
|
|||
printf("ev%d", implver() + 4);
|
||||
}
|
||||
|
||||
|
||||
void get_corename(void){
|
||||
printf("EV%d", implver() + 4);
|
||||
}
|
||||
|
||||
void get_subdirname(void){
|
||||
printf("alpha");
|
||||
}
|
||||
|
|
|
@ -70,16 +70,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_SICORTEX 1
|
||||
#define CPU_LOONGSON3R3 2
|
||||
#define CPU_LOONGSON3R4 3
|
||||
#define CPU_I6400 4
|
||||
#define CPU_P6600 5
|
||||
#define CPU_I6500 6
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_MIPS64_GENERIC 1
|
||||
#define CPU_SICORTEX 2
|
||||
#define CPU_LOONGSON3R3 3
|
||||
#define CPU_LOONGSON3R4 4
|
||||
#define CPU_I6400 5
|
||||
#define CPU_P6600 6
|
||||
#define CPU_I6500 7
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
"MIPS64_GENERIC"
|
||||
"SICORTEX",
|
||||
"LOONGSON3R3",
|
||||
"LOONGSON3R4",
|
||||
|
@ -113,8 +115,11 @@ int detect(void){
|
|||
return CPU_SICORTEX;
|
||||
}
|
||||
}
|
||||
|
||||
return CPU_MIPS64_GENERIC;
|
||||
#else
|
||||
return CPU_UNKNOWN;
|
||||
#endif
|
||||
return CPU_UNKNOWN;
|
||||
}
|
||||
|
||||
char *get_corename(void){
|
||||
|
@ -136,8 +141,10 @@ void get_subarchitecture(void){
|
|||
printf("P6600");
|
||||
}else if(detect()==CPU_I6500){
|
||||
printf("I6500");
|
||||
}else{
|
||||
}else if(detect()==CPU_SICORTEX){
|
||||
printf("SICORTEX");
|
||||
}else{
|
||||
printf("MIPS64_GENERIC");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -215,8 +222,8 @@ void get_libname(void){
|
|||
printf("p6600\n");
|
||||
}else if(detect()==CPU_I6500) {
|
||||
printf("i6500\n");
|
||||
}else{
|
||||
printf("mips64\n");
|
||||
}else {
|
||||
printf("mips64_generic\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ else()
|
|||
c_${float_char}blas1.c)
|
||||
endif()
|
||||
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME})
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD")
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
|
||||
target_link_libraries(x${float_char}cblat1 m)
|
||||
endif()
|
||||
add_test(NAME "x${float_char}cblat1"
|
||||
|
@ -65,7 +65,7 @@ else()
|
|||
constant.c)
|
||||
endif()
|
||||
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME})
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD")
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
|
||||
target_link_libraries(x${float_char}cblat2 m)
|
||||
endif()
|
||||
add_test(NAME "x${float_char}cblat2"
|
||||
|
@ -90,7 +90,7 @@ else()
|
|||
constant.c)
|
||||
endif()
|
||||
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME})
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD")
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
|
||||
target_link_libraries(x${float_char}cblat3 m)
|
||||
endif()
|
||||
add_test(NAME "x${float_char}cblat3"
|
||||
|
|
|
@ -38,22 +38,48 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include <sys/resource.h>
|
||||
#include "common.h"
|
||||
|
||||
#if (defined OS_LINUX || defined OS_ANDROID)
|
||||
#include <asm/hwcap.h>
|
||||
#include <sys/auxv.h>
|
||||
|
||||
#ifndef HWCAP_LOONGSON_CPUCFG
|
||||
#define HWCAP_LOONGSON_CPUCFG (1 << 14)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef DYNAMIC_LIST
|
||||
extern gotoblas_t gotoblas_MIPS64_GENERIC;
|
||||
#ifdef DYN_LOONGSON3R3
|
||||
extern gotoblas_t gotoblas_LOONGSON3R3;
|
||||
#else
|
||||
#define gotoblas_LOONGSON3R3 gotoblas_MIPS64_GENERIC
|
||||
#endif
|
||||
#ifdef DYN_LOONGSON3R4
|
||||
extern gotoblas_t gotoblas_LOONGSON3R4;
|
||||
#else
|
||||
#define gotoblas_LOONGSON3R4 gotoblas_MIPS64_GENERIC
|
||||
#endif
|
||||
#else
|
||||
extern gotoblas_t gotoblas_LOONGSON3R3;
|
||||
extern gotoblas_t gotoblas_LOONGSON3R4;
|
||||
extern gotoblas_t gotoblas_MIPS64_GENERIC;
|
||||
#endif
|
||||
|
||||
extern void openblas_warning(int verbose, const char * msg);
|
||||
|
||||
#define NUM_CORETYPES 2
|
||||
#define NUM_CORETYPES 3
|
||||
|
||||
static char *corename[] = {
|
||||
"MIPS64_GENERIC"
|
||||
"loongson3r3",
|
||||
"loongson3r4",
|
||||
"UNKNOWN"
|
||||
};
|
||||
|
||||
char *gotoblas_corename(void) {
|
||||
if (gotoblas == &gotoblas_LOONGSON3R3) return corename[0];
|
||||
if (gotoblas == &gotoblas_LOONGSON3R4) return corename[1];
|
||||
if (gotoblas == &gotoblas_MIPS64_GENERIC) return corename[0];
|
||||
if (gotoblas == &gotoblas_LOONGSON3R3) return corename[1];
|
||||
if (gotoblas == &gotoblas_LOONGSON3R4) return corename[2];
|
||||
return corename[NUM_CORETYPES];
|
||||
}
|
||||
|
||||
|
@ -73,77 +99,32 @@ static gotoblas_t *force_coretype(char *coretype) {
|
|||
|
||||
switch (found)
|
||||
{
|
||||
case 0: return (&gotoblas_LOONGSON3R3);
|
||||
case 1: return (&gotoblas_LOONGSON3R4);
|
||||
case 0: return (&gotoblas_MIPS64_GENERIC);
|
||||
case 1: return (&gotoblas_LOONGSON3R3);
|
||||
case 2: return (&gotoblas_LOONGSON3R4);
|
||||
}
|
||||
snprintf(message, 128, "Core not found: %s\n", coretype);
|
||||
openblas_warning(1, message);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if (defined OS_LINUX || defined OS_ANDROID)
|
||||
#define MMI_MASK 0x00000010
|
||||
#define MSA_MASK 0x00000020
|
||||
|
||||
int fd[2];
|
||||
int support_cpucfg;
|
||||
|
||||
static void handler(int signum)
|
||||
{
|
||||
close(fd[1]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* Brief : Function to check if cpucfg supported on loongson
|
||||
* Return: 1 supported
|
||||
* 0 not supported
|
||||
*/
|
||||
static int cpucfg_test(void) {
|
||||
pid_t pid;
|
||||
int status = 0;
|
||||
|
||||
support_cpucfg = 0;
|
||||
pipe(fd);
|
||||
pid = fork();
|
||||
if (pid == 0) { /* Subprocess */
|
||||
struct sigaction act;
|
||||
close(fd[0]);
|
||||
/* Set signal action for SIGILL. */
|
||||
act.sa_handler = handler;
|
||||
sigaction(SIGILL,&act,NULL);
|
||||
|
||||
/* Execute cpucfg in subprocess. */
|
||||
__asm__ volatile(
|
||||
".insn \n\t"
|
||||
".word (0xc8080118) \n\t"
|
||||
:::
|
||||
);
|
||||
support_cpucfg = 1;
|
||||
write(fd[1],&support_cpucfg,sizeof(support_cpucfg));
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
} else if (pid > 0){ /* Parent process*/
|
||||
close(fd[1]);
|
||||
if ((waitpid(pid,&status,0) <= 0) ||
|
||||
(read(fd[0],&support_cpucfg,sizeof(support_cpucfg)) <= 0))
|
||||
support_cpucfg = 0;
|
||||
close(fd[0]);
|
||||
} else {
|
||||
support_cpucfg = 0;
|
||||
}
|
||||
|
||||
return support_cpucfg;
|
||||
}
|
||||
|
||||
static gotoblas_t *get_coretype_from_cpucfg(void) {
|
||||
int flag = 0;
|
||||
__asm__ volatile(
|
||||
".set push \n\t"
|
||||
".set noat \n\t"
|
||||
".insn \n\t"
|
||||
"dli $8, 0x01 \n\t"
|
||||
".word (0xc9084918) \n\t"
|
||||
"usw $9, 0x00(%0) \n\t"
|
||||
"dli $1, 0x01 \n\t"
|
||||
".word (0xc8080118) \n\t"
|
||||
"move %0, $1 \n\t"
|
||||
".set pop \n\t"
|
||||
: "=r"(flag)
|
||||
:
|
||||
:
|
||||
: "r"(&flag)
|
||||
: "memory"
|
||||
);
|
||||
if (flag & MSA_MASK)
|
||||
return (&gotoblas_LOONGSON3R4);
|
||||
|
@ -153,7 +134,7 @@ static gotoblas_t *get_coretype_from_cpucfg(void) {
|
|||
}
|
||||
|
||||
static gotoblas_t *get_coretype_from_cpuinfo(void) {
|
||||
#ifdef linux
|
||||
#ifdef __linux
|
||||
FILE *infile;
|
||||
char buffer[512], *p;
|
||||
|
||||
|
@ -176,17 +157,19 @@ static gotoblas_t *get_coretype_from_cpuinfo(void) {
|
|||
return NULL;
|
||||
}
|
||||
#endif
|
||||
return NULL;
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
static gotoblas_t *get_coretype(void) {
|
||||
int ret = 0;
|
||||
|
||||
ret = cpucfg_test();
|
||||
if (ret == 1)
|
||||
return get_coretype_from_cpucfg();
|
||||
else
|
||||
return get_coretype_from_cpuinfo();
|
||||
#if (!defined OS_LINUX && !defined OS_ANDROID)
|
||||
return NULL;
|
||||
#else
|
||||
if (!(getauxval(AT_HWCAP) & HWCAP_LOONGSON_CPUCFG))
|
||||
return get_coretype_from_cpucfg();
|
||||
else
|
||||
return get_coretype_from_cpuinfo();
|
||||
#endif
|
||||
}
|
||||
|
||||
void gotoblas_dynamic_init(void) {
|
||||
|
@ -208,9 +191,9 @@ void gotoblas_dynamic_init(void) {
|
|||
|
||||
if (gotoblas == NULL)
|
||||
{
|
||||
snprintf(coremsg, 128, "Falling back to loongson3r3 core\n");
|
||||
snprintf(coremsg, 128, "Falling back to MIPS64_GENEIRC\n");
|
||||
openblas_warning(1, coremsg);
|
||||
gotoblas = &gotoblas_LOONGSON3R3;
|
||||
gotoblas = &gotoblas_MIPS64_GENERIC;
|
||||
}
|
||||
|
||||
if (gotoblas && gotoblas->init) {
|
||||
|
|
|
@ -4000,6 +4000,22 @@ case "$p1" in
|
|||
no_underscore_objs="$no_underscore_objs $misc_common_objs"
|
||||
|
||||
printf 'int main(void){\n'
|
||||
for obj in $underscore_objs; do
|
||||
[ "$obj" != "xerbla" ] && printf 'extern void %s%s%s%s();\n' \
|
||||
"$symbolprefix" "$obj" "$bu" "$symbolsuffix"
|
||||
done
|
||||
|
||||
for obj in $need_2underscore_objs; do
|
||||
printf 'extern void %s%s%s%s%s();\n' \
|
||||
"$symbolprefix" "$obj" "$bu" "$bu" "$symbolsuffix"
|
||||
done
|
||||
|
||||
for obj in $no_underscore_objs; do
|
||||
printf 'extern void %s%s%s();\n' \
|
||||
"$symbolprefix" "$obj" "$symbolsuffix"
|
||||
done
|
||||
|
||||
printf '\n'
|
||||
for obj in $underscore_objs; do
|
||||
[ "$obj" != "xerbla" ] && printf '%s%s%s%s();\n' \
|
||||
"$symbolprefix" "$obj" "$bu" "$symbolsuffix"
|
||||
|
|
|
@ -3955,6 +3955,18 @@ if ($ARGV[0] eq "linktest") {
|
|||
@no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
|
||||
|
||||
print "int main(void){\n";
|
||||
foreach $objs (@underscore_objs) {
|
||||
print "extern void ", $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
|
||||
}
|
||||
|
||||
foreach $objs (@need_2underscore_objs) {
|
||||
print "extern void ", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "();\n";
|
||||
}
|
||||
|
||||
foreach $objs (@no_underscore_objs) {
|
||||
print "extern void ", $symbolprefix, $objs, $symbolsuffix, "();\n";
|
||||
}
|
||||
|
||||
foreach $objs (@underscore_objs) {
|
||||
print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
|
||||
}
|
||||
|
|
56
getarch.c
56
getarch.c
|
@ -131,6 +131,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
/* #define FORCE_PPC440 */
|
||||
/* #define FORCE_PPC440FP2 */
|
||||
/* #define FORCE_CELL */
|
||||
/* #define FORCE_MIPS64_GENERIC */
|
||||
/* #define FORCE_SICORTEX */
|
||||
/* #define FORCE_LOONGSON3R3 */
|
||||
/* #define FORCE_LOONGSON3R4 */
|
||||
|
@ -146,6 +147,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
/* #define FORCE_SPARCV7 */
|
||||
/* #define FORCE_ZARCH_GENERIC */
|
||||
/* #define FORCE_Z13 */
|
||||
/* #define FORCE_EV4 */
|
||||
/* #define FORCE_EV5 */
|
||||
/* #define FORCE_EV6 */
|
||||
/* #define FORCE_GENERIC */
|
||||
|
||||
#ifdef FORCE_P2
|
||||
|
@ -915,6 +919,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define CORENAME "CELL"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_MIPS64_GENERIC
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "MIPS"
|
||||
#define SUBARCHITECTURE "MIPS64_GENERIC"
|
||||
#define SUBDIRNAME "mips64"
|
||||
#define ARCHCONFIG "-DMIPS64_GENERIC " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "mips64_generic"
|
||||
#define CORENAME "MIPS64_GENERIC"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_SICORTEX
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "MIPS"
|
||||
|
@ -1601,6 +1619,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define CORENAME "Z14"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_EV4
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ALPHA"
|
||||
#define SUBARCHITECTURE "ev4"
|
||||
#define ARCHCONFIG "-DEV4 " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=2097152 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=8192 "
|
||||
#define LIBNAME "ev4"
|
||||
#define CORENAME "EV4"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_EV5
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ALPHA"
|
||||
#define SUBARCHITECTURE "ev5"
|
||||
#define ARCHCONFIG "-DEV5 " \
|
||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=8192 "
|
||||
#define LIBNAME "ev5"
|
||||
#define CORENAME "EV5"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_EV6
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ALPHA"
|
||||
#define SUBARCHITECTURE "ev6"
|
||||
#define ARCHCONFIG "-DEV6 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=4194304 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=8192 "
|
||||
#define LIBNAME "ev6"
|
||||
#define CORENAME "EV6"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_C910V
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "RISCV64"
|
||||
|
@ -1777,7 +1831,7 @@ int main(int argc, char *argv[]){
|
|||
#ifdef FORCE
|
||||
printf("CORE=%s\n", CORENAME);
|
||||
#else
|
||||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv)
|
||||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__alpha__)
|
||||
printf("CORE=%s\n", get_corename());
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 88
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 40
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
#define ASSEMBLER
|
||||
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCH_SIZE 80
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
#define ASSEMBLER
|
||||
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
.globl NAME
|
||||
.ent NAME
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
#define ASSEMBLER
|
||||
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCH_SIZE 80
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 88
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||
#error "Architecture is not specified."
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define STACKSIZE 64
|
||||
#define PREFETCHSIZE 32
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define STACKSIZE 64
|
||||
#define PREFETCHSIZE 32
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "version.h"
|
||||
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 88
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
#define ASSEMBLER
|
||||
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCH_SIZE 80
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 88
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
PROLOGUE
|
||||
PROFCODE
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||
#error "Architecture is not specified."
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||
#error "Architecture is not specified."
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||
#error "Architecture is not specified."
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 88
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 40
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 88
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||
#error "Architecture is not specified."
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define STACKSIZE 64
|
||||
#define PREFETCHSIZE 32
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define STACKSIZE 64
|
||||
#define PREFETCHSIZE 32
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
#define ASSEMBLER
|
||||
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCH_SIZE 80
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define N $16
|
||||
#define X $17
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 88
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#define PREFETCHSIZE 88
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
PROLOGUE
|
||||
PROFCODE
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||
#error "Architecture is not specified."
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||
#error "Architecture is not specified."
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||
#error "Architecture is not specified."
|
||||
|
|
|
@ -42,50 +42,58 @@ endif
|
|||
|
||||
ifndef SGEMMKERNEL
|
||||
SGEMMKERNEL = gemm_kernel.S
|
||||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
||||
SGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||
SGEMMINCOPYOBJ = sgemm_incopy.o
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifndef DGEMMKERNEL
|
||||
DGEMMKERNEL = gemm_kernel.S
|
||||
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
|
||||
DGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
||||
DGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifndef CGEMMKERNEL
|
||||
CGEMMKERNEL = zgemm_kernel.S
|
||||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
|
||||
CGEMMINCOPY = ../generic/zgemm_ncopy_1.c
|
||||
CGEMMITCOPY = ../generic/zgemm_tcopy_1.c
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||
CGEMMINCOPYOBJ = cgemm_incopy.o
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy.o
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifndef ZGEMMKERNEL
|
||||
ZGEMMKERNEL = zgemm_kernel.S
|
||||
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
|
||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_1.c
|
||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_1.c
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy.o
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy.o
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifndef SGEMM_BETA
|
||||
|
|
|
@ -717,11 +717,12 @@ integer iparam2stage_(integer *ispec, char *name__, char *opts, integer *ni,
|
|||
ret_val = -1;
|
||||
|
||||
// s_copy(subnam, name__, (ftnlen)12, name_len);
|
||||
strncpy(subnam,name__,13);
|
||||
subnam[13]='\0';
|
||||
for (int i=0;i<13;i++) subnam[i]=toupper(subnam[i]);
|
||||
//fprintf(stderr,"iparam2stage, name__ gelesen #%s#\n",name__);
|
||||
//fprintf(stderr,"iparam2stage, subnam gelesen #%s#\n",subnam);
|
||||
strncpy(subnam,name__,13);
|
||||
subnam[13]='\0';
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<13;i++) subnam[i]=toupper(subnam[i]);
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
|
|
|
@ -40,27 +40,40 @@ ifneq ($(C_LAPACK), 1)
|
|||
$(FC) $(FFLAGS) -c -o $@ $<
|
||||
endif
|
||||
|
||||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" ""
|
||||
SCATGEN = slatm1.o slatm7.o slaran.o slarnd.o
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_SINGLE),1)
|
||||
SMATGEN = slatms.o slatme.o slatmr.o slatmt.o \
|
||||
slagge.o slagsy.o slakf2.o slarge.o slaror.o slarot.o slatm2.o \
|
||||
slatm3.o slatm5.o slatm6.o slahilb.o
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_COMPLEX),1)
|
||||
CMATGEN = clatms.o clatme.o clatmr.o clatmt.o \
|
||||
clagge.o claghe.o clagsy.o clakf2.o clarge.o claror.o clarot.o \
|
||||
clatm1.o clarnd.o clatm2.o clatm3.o clatm5.o clatm6.o clahilb.o
|
||||
endif
|
||||
|
||||
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
|
||||
DZATGEN = dlatm1.o dlatm7.o dlaran.o dlarnd.o
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_DOUBLE),1)
|
||||
DMATGEN = dlatms.o dlatme.o dlatmr.o dlatmt.o \
|
||||
dlagge.o dlagsy.o dlakf2.o dlarge.o dlaror.o dlarot.o dlatm2.o \
|
||||
dlatm3.o dlatm5.o dlatm6.o dlahilb.o
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_COMPLEX16),1)
|
||||
ZMATGEN = zlatms.o zlatme.o zlatmr.o zlatmt.o \
|
||||
zlagge.o zlaghe.o zlagsy.o zlakf2.o zlarge.o zlaror.o zlarot.o \
|
||||
zlatm1.o zlarnd.o zlatm2.o zlatm3.o zlatm5.o zlatm6.o zlahilb.o
|
||||
endif
|
||||
|
||||
.PHONY: all
|
||||
.NOTPARALLEL:
|
||||
all: $(TMGLIB)
|
||||
|
||||
ALLOBJ = $(SMATGEN) $(CMATGEN) $(SCATGEN) $(DMATGEN) $(ZMATGEN) \
|
||||
|
@ -107,9 +120,17 @@ cleanlib:
|
|||
rm -f $(TMGLIB)
|
||||
|
||||
ifneq ($(C_LAPACK), 1)
|
||||
ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),)
|
||||
slaran.o: slaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
|
||||
endif
|
||||
ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),)
|
||||
dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
|
||||
endif
|
||||
else
|
||||
ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),)
|
||||
slaran.o: slaran.c ; $(CC) $(CFLAGS) -O0 -c -o $@ $<
|
||||
endif
|
||||
ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),)
|
||||
dlaran.o: dlaran.c ; $(CC) $(CFLAGS) -O0 -c -o $@ $<
|
||||
endif
|
||||
endif
|
||||
|
|
2
param.h
2
param.h
|
@ -2951,7 +2951,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define SYMV_P 16
|
||||
#endif
|
||||
|
||||
#if defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500)
|
||||
#if defined(MIPS64_GENERIC) || defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500)
|
||||
#define SNUMOPT 2
|
||||
#define DNUMOPT 2
|
||||
|
||||
|
|
Loading…
Reference in New Issue