Merge branch 'develop' into loongarch64_rename_targets
This commit is contained in:
commit
acd48edf99
|
@ -257,3 +257,53 @@ jobs:
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
timeout-minutes: 60
|
timeout-minutes: 60
|
||||||
run: cd build && ctest
|
run: cd build && ctest
|
||||||
|
|
||||||
|
cross_build:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- target: mips64el
|
||||||
|
triple: mips64el-linux-gnuabi64
|
||||||
|
opts: DYNAMIC_ARCH=1
|
||||||
|
- target: riscv64
|
||||||
|
triple: riscv64-linux-gnu
|
||||||
|
opts: TARGET=RISCV64_GENERIC
|
||||||
|
- target: mipsel
|
||||||
|
triple: mipsel-linux-gnu
|
||||||
|
opts: TARGET=MIPS1004K
|
||||||
|
- target: alpha
|
||||||
|
triple: alpha-linux-gnu
|
||||||
|
opts: TARGET=EV4
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Install Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get install -y ccache gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-${{ matrix.target }}-cross
|
||||||
|
|
||||||
|
- name: Compilation cache
|
||||||
|
uses: actions/cache@v3
|
||||||
|
with:
|
||||||
|
path: ~/.ccache
|
||||||
|
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||||
|
restore-keys: |
|
||||||
|
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||||
|
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||||
|
|
||||||
|
- name: Configure ccache
|
||||||
|
run: |
|
||||||
|
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
|
||||||
|
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||||
|
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||||
|
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||||
|
ccache -s
|
||||||
|
|
||||||
|
|
||||||
|
- name: Build OpenBLAS
|
||||||
|
run: |
|
||||||
|
make -j$(nproc) HOSTCC="ccache gcc" CC="ccache ${{ matrix.triple }}-gcc" FC="ccache ${{ matrix.triple }}-gfortran" ARCH=${{ matrix.target }} ${{ matrix.opts }}
|
||||||
|
|
|
@ -30,7 +30,7 @@ matrix:
|
||||||
before_script: &common-before
|
before_script: &common-before
|
||||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32"
|
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32"
|
||||||
script:
|
script:
|
||||||
- travis_wait 20 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
- travis_wait 40 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||||
- make -C test $COMMON_FLAGS $BTYPE
|
- make -C test $COMMON_FLAGS $BTYPE
|
||||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||||
- make -C utest $COMMON_FLAGS $BTYPE
|
- make -C utest $COMMON_FLAGS $BTYPE
|
||||||
|
@ -104,7 +104,7 @@ matrix:
|
||||||
- sudo apt-get update
|
- sudo apt-get update
|
||||||
- sudo apt-get install gcc-9 gfortran-9 -y
|
- sudo apt-get install gcc-9 gfortran-9 -y
|
||||||
script:
|
script:
|
||||||
- travis_wait 20 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
- travis_wait 40 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||||
- make -C test $COMMON_FLAGS $BTYPE
|
- make -C test $COMMON_FLAGS $BTYPE
|
||||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||||
- make -C utest $COMMON_FLAGS $BTYPE
|
- make -C utest $COMMON_FLAGS $BTYPE
|
||||||
|
@ -121,7 +121,7 @@ matrix:
|
||||||
- sudo apt-get update
|
- sudo apt-get update
|
||||||
- sudo apt-get install gcc-9 gfortran-9 -y
|
- sudo apt-get install gcc-9 gfortran-9 -y
|
||||||
script:
|
script:
|
||||||
- travis_wait 20 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
- travis_wait 40 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||||
- make -C test $COMMON_FLAGS $BTYPE
|
- make -C test $COMMON_FLAGS $BTYPE
|
||||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||||
- make -C utest $COMMON_FLAGS $BTYPE
|
- make -C utest $COMMON_FLAGS $BTYPE
|
||||||
|
|
|
@ -211,4 +211,5 @@ In chronological order:
|
||||||
* PLCT Lab, Institute of Software Chinese Academy of Sciences
|
* PLCT Lab, Institute of Software Chinese Academy of Sciences
|
||||||
* [2022-03] Support RISC-V Vector Intrinisc 1.0 version.
|
* [2022-03] Support RISC-V Vector Intrinisc 1.0 version.
|
||||||
|
|
||||||
|
* Pablo Romero <https://github.com/pablorcum>
|
||||||
|
* [2022-08] Fix building from sources for QNX
|
7
Makefile
7
Makefile
|
@ -110,6 +110,10 @@ ifeq ($(OSNAME), Darwin)
|
||||||
@echo "\"make PREFIX=/your_installation_path/ install\"."
|
@echo "\"make PREFIX=/your_installation_path/ install\"."
|
||||||
@echo
|
@echo
|
||||||
@echo "(or set PREFIX in Makefile.rule and run make install."
|
@echo "(or set PREFIX in Makefile.rule and run make install."
|
||||||
|
@echo
|
||||||
|
@echo "Note that any flags passed to make during build should also be passed to make install"
|
||||||
|
@echo "to circumvent any install errors."
|
||||||
|
@echo
|
||||||
@echo "If you want to move the .dylib to a new location later, make sure you change"
|
@echo "If you want to move the .dylib to a new location later, make sure you change"
|
||||||
@echo "the internal name of the dylib with:"
|
@echo "the internal name of the dylib with:"
|
||||||
@echo
|
@echo
|
||||||
|
@ -118,6 +122,9 @@ endif
|
||||||
@echo
|
@echo
|
||||||
@echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"."
|
@echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"."
|
||||||
@echo
|
@echo
|
||||||
|
@echo "Note that any flags passed to make during build should also be passed to make install"
|
||||||
|
@echo "to circumvent any install errors."
|
||||||
|
@echo
|
||||||
|
|
||||||
shared :
|
shared :
|
||||||
ifneq ($(NO_SHARED), 1)
|
ifneq ($(NO_SHARED), 1)
|
||||||
|
|
|
@ -1,42 +1,24 @@
|
||||||
CPP = $(CC) -E
|
|
||||||
RANLIB = ranlib
|
|
||||||
|
|
||||||
ifeq ($(LIBSUBARCH), EV4)
|
|
||||||
LIBNAME = $(LIBPREFIX)_ev4.a
|
|
||||||
LIBNAME_P = $(LIBPREFIX)_ev4_p.a
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(LIBSUBARCH), EV5)
|
|
||||||
LIBNAME = $(LIBPREFIX)_ev5.a
|
|
||||||
LIBNAME_P = $(LIBPREFIX)_ev5_p.a
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(LIBSUBARCH), EV6)
|
|
||||||
LIBNAME = $(LIBPREFIX)_ev6.a
|
|
||||||
LIBNAME_P = $(LIBPREFIX)_ev6_p.a
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(COMPILER), NATIVE)
|
ifneq ($(COMPILER), NATIVE)
|
||||||
# GCC User
|
# GCC User
|
||||||
ifeq ($(LIBSUBARCH), EV4)
|
ifeq ($(CORE), EV4)
|
||||||
OPTION += -DEV4 -mcpu=ev4
|
CCOMMON_OPT += -mcpu=ev4
|
||||||
endif
|
endif
|
||||||
ifeq ($(LIBSUBARCH), EV5)
|
ifeq ($(CORE), EV5)
|
||||||
OPTION += -DEV5 -mcpu=ev5
|
CCOMMON_OPT += -mcpu=ev5
|
||||||
endif
|
endif
|
||||||
ifeq ($(LIBSUBARCH), EV6)
|
ifeq ($(CORE), EV6)
|
||||||
OPTION += -DEV6 -mcpu=ev6
|
CCOMMON_OPT += -mcpu=ev6
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
# Compaq Compiler User
|
# Compaq Compiler User
|
||||||
ifeq ($(LIBSUBARCH), EV4)
|
ifeq ($(CORE), EV4)
|
||||||
OPTION += -DEV4 -tune ev4 -arch ev4
|
CCOMMON_OPT += -tune ev4 -arch ev4
|
||||||
endif
|
endif
|
||||||
ifeq ($(LIBSUBARCH), EV5)
|
ifeq ($(CORE), EV5)
|
||||||
OPTION += -DEV5 -tune ev5 -arch ev5
|
CCOMMON_OPT += -tune ev5 -arch ev5
|
||||||
endif
|
endif
|
||||||
ifeq ($(LIBSUBARCH), EV6)
|
ifeq ($(CORE), EV6)
|
||||||
OPTION += -DEV6 -tune ev6 -arch ev6
|
CCOMMON_OPT += -tune ev6 -arch ev6
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
|
@ -60,9 +60,9 @@ all: getarch_2nd
|
||||||
./getarch_2nd 1 >> $(TARGET_CONF)
|
./getarch_2nd 1 >> $(TARGET_CONF)
|
||||||
|
|
||||||
$(TARGET_CONF): c_check$(SCRIPTSUFFIX) f_check$(SCRIPTSUFFIX) getarch
|
$(TARGET_CONF): c_check$(SCRIPTSUFFIX) f_check$(SCRIPTSUFFIX) getarch
|
||||||
./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) $(CFLAGS)
|
./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(CC)" "$(TARGET_FLAGS) $(CFLAGS)"
|
||||||
ifneq ($(ONLY_CBLAS), 1)
|
ifneq ($(ONLY_CBLAS), 1)
|
||||||
./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS)
|
./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(FC)" "$(TARGET_FLAGS)"
|
||||||
else
|
else
|
||||||
#When we only build CBLAS, we set NOFORTRAN=2
|
#When we only build CBLAS, we set NOFORTRAN=2
|
||||||
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
||||||
|
@ -77,8 +77,8 @@ endif
|
||||||
|
|
||||||
|
|
||||||
getarch : getarch.c cpuid.S dummy $(CPUIDEMU)
|
getarch : getarch.c cpuid.S dummy $(CPUIDEMU)
|
||||||
avx512=$$(./c_check$(SCRIPTSUFFIX) - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \
|
avx512=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" "$(TARGET_FLAGS) $(CFLAGS)" | grep NO_AVX512); \
|
||||||
rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_RV64GV); \
|
rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" "$(TARGET_FLAGS) $(CFLAGS)" | grep NO_RV64GV); \
|
||||||
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} $${rv64gv:+-D$${rv64gv}} -lm -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
|
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} $${rv64gv:+-D$${rv64gv}} -lm -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
|
||||||
|
|
||||||
getarch_2nd : getarch_2nd.c $(TARGET_CONF) dummy
|
getarch_2nd : getarch_2nd.c $(TARGET_CONF) dummy
|
||||||
|
|
|
@ -677,7 +677,12 @@ endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), mips64)
|
ifeq ($(ARCH), mips64)
|
||||||
DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4
|
DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4 MIPS64_GENERIC
|
||||||
|
ifdef DYNAMIC_LIST
|
||||||
|
override DYNAMIC_CORE = MIPS64_GENERIC $(DYNAMIC_LIST)
|
||||||
|
XCCOMMON_OPT = -DDYNAMIC_LIST -DDYN_MIPS64_GENERIC
|
||||||
|
XCCOMMON_OPT += $(foreach dcore,$(DYNAMIC_LIST),-DDYN_$(dcore))
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), loongarch64)
|
ifeq ($(ARCH), loongarch64)
|
||||||
|
@ -856,6 +861,11 @@ CCOMMON_OPT += -mabi=32
|
||||||
BINARY_DEFINED = 1
|
BINARY_DEFINED = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifneq (, $(filter $(CORE), MIPS64_GENERIC))
|
||||||
|
CCOMMON_OPT += -DNO_MSA
|
||||||
|
FCOMMON_OPT += -DNO_MSA
|
||||||
|
endif
|
||||||
|
|
||||||
ifneq (, $(filter $(CORE),LOONGSON3R3 LOONGSON3R4))
|
ifneq (, $(filter $(CORE),LOONGSON3R3 LOONGSON3R4))
|
||||||
CCOMMON_OPT += -march=loongson3a
|
CCOMMON_OPT += -march=loongson3a
|
||||||
FCOMMON_OPT += -march=loongson3a
|
FCOMMON_OPT += -march=loongson3a
|
||||||
|
|
|
@ -65,6 +65,7 @@ MIPS1004K
|
||||||
MIPS24K
|
MIPS24K
|
||||||
|
|
||||||
4.MIPS64 CPU:
|
4.MIPS64 CPU:
|
||||||
|
MIPS64_GENERIC
|
||||||
SICORTEX
|
SICORTEX
|
||||||
LOONGSON3A
|
LOONGSON3A
|
||||||
LOONGSON3B
|
LOONGSON3B
|
||||||
|
@ -128,3 +129,7 @@ LA264
|
||||||
12. Elbrus E2000:
|
12. Elbrus E2000:
|
||||||
E2K
|
E2K
|
||||||
|
|
||||||
|
13. Alpha
|
||||||
|
EV4
|
||||||
|
EV5
|
||||||
|
EV6
|
||||||
|
|
4
c_check
4
c_check
|
@ -31,8 +31,8 @@ flags="$*"
|
||||||
|
|
||||||
cross_suffix=""
|
cross_suffix=""
|
||||||
|
|
||||||
if [ "`dirname $compiler_name`" != '.' ]; then
|
if [ "`dirname \"$compiler_name\"`" != '.' ]; then
|
||||||
cross_suffix="$cross_suffix`dirname $compiler_name`/"
|
cross_suffix="$cross_suffix`dirname \"$compiler_name\"`/"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
bn=`basename $compiler_name`
|
bn=`basename $compiler_name`
|
||||||
|
|
4
common.h
4
common.h
|
@ -90,7 +90,7 @@ extern "C" {
|
||||||
#endif
|
#endif
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
#ifdef OS_LINUX
|
#if defined(OS_LINUX) || defined(OS_QNX)
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
#endif
|
#endif
|
||||||
|
@ -107,7 +107,7 @@ extern "C" {
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef OS_HAIKU
|
#if defined(OS_HAIKU) || defined(OS_QNX)
|
||||||
#define NO_SYSV_IPC
|
#define NO_SYSV_IPC
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -43,7 +43,7 @@
|
||||||
|
|
||||||
#define MB asm("mb")
|
#define MB asm("mb")
|
||||||
#define WMB asm("wmb")
|
#define WMB asm("wmb")
|
||||||
#define RMB asm("rmb")
|
#define RMB asm("mb")
|
||||||
|
|
||||||
static void __inline blas_lock(unsigned long *address){
|
static void __inline blas_lock(unsigned long *address){
|
||||||
#ifndef __DECC
|
#ifndef __DECC
|
||||||
|
|
|
@ -2612,7 +2612,7 @@
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
#if !defined(DYNAMIC_ARCH) \
|
#if !defined(DYNAMIC_ARCH) \
|
||||||
&& (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \
|
&& (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \
|
||||||
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K))
|
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) || defined(ARCH_ALPHA))
|
||||||
extern BLASLONG gemm_offset_a;
|
extern BLASLONG gemm_offset_a;
|
||||||
extern BLASLONG gemm_offset_b;
|
extern BLASLONG gemm_offset_b;
|
||||||
extern BLASLONG sbgemm_p;
|
extern BLASLONG sbgemm_p;
|
||||||
|
|
|
@ -86,7 +86,9 @@ static inline unsigned int rpcc(void){
|
||||||
//__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
|
//__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
|
||||||
//ret=tmp;
|
//ret=tmp;
|
||||||
__asm__ __volatile__(".set push \n"
|
__asm__ __volatile__(".set push \n"
|
||||||
|
#if !defined(__mips_isa_rev) || __mips_isa_rev < 2
|
||||||
".set mips32r2\n"
|
".set mips32r2\n"
|
||||||
|
#endif
|
||||||
"rdhwr %0, $2\n"
|
"rdhwr %0, $2\n"
|
||||||
".set pop": "=r"(ret):: "memory");
|
".set pop": "=r"(ret):: "memory");
|
||||||
|
|
||||||
|
@ -99,7 +101,9 @@ static inline unsigned int rpcc(void){
|
||||||
static inline int WhereAmI(void){
|
static inline int WhereAmI(void){
|
||||||
int ret=0;
|
int ret=0;
|
||||||
__asm__ __volatile__(".set push \n"
|
__asm__ __volatile__(".set push \n"
|
||||||
|
#if !defined(__mips_isa_rev) || __mips_isa_rev < 2
|
||||||
".set mips32r2\n"
|
".set mips32r2\n"
|
||||||
|
#endif
|
||||||
"rdhwr %0, $0\n"
|
"rdhwr %0, $0\n"
|
||||||
".set pop": "=r"(ret):: "memory");
|
".set pop": "=r"(ret):: "memory");
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -197,9 +201,15 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
|
|
||||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||||
|
|
||||||
|
#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
|
||||||
|
#define ASSEMBLER_ARCH mips64r6
|
||||||
|
#else
|
||||||
|
#define ASSEMBLER_ARCH mips64
|
||||||
|
#endif
|
||||||
|
|
||||||
#define PROLOGUE \
|
#define PROLOGUE \
|
||||||
.text ;\
|
.text ;\
|
||||||
.set mips64 ;\
|
.set ASSEMBLER_ARCH ;\
|
||||||
.align 5 ;\
|
.align 5 ;\
|
||||||
.globl REALNAME ;\
|
.globl REALNAME ;\
|
||||||
.ent REALNAME ;\
|
.ent REALNAME ;\
|
||||||
|
|
|
@ -59,6 +59,11 @@ void get_subarchitecture(void){
|
||||||
printf("ev%d", implver() + 4);
|
printf("ev%d", implver() + 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void get_corename(void){
|
||||||
|
printf("EV%d", implver() + 4);
|
||||||
|
}
|
||||||
|
|
||||||
void get_subdirname(void){
|
void get_subdirname(void){
|
||||||
printf("alpha");
|
printf("alpha");
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,16 +70,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#define CPU_UNKNOWN 0
|
#define CPU_UNKNOWN 0
|
||||||
#define CPU_SICORTEX 1
|
#define CPU_MIPS64_GENERIC 1
|
||||||
#define CPU_LOONGSON3R3 2
|
#define CPU_SICORTEX 2
|
||||||
#define CPU_LOONGSON3R4 3
|
#define CPU_LOONGSON3R3 3
|
||||||
#define CPU_I6400 4
|
#define CPU_LOONGSON3R4 4
|
||||||
#define CPU_P6600 5
|
#define CPU_I6400 5
|
||||||
#define CPU_I6500 6
|
#define CPU_P6600 6
|
||||||
|
#define CPU_I6500 7
|
||||||
|
|
||||||
static char *cpuname[] = {
|
static char *cpuname[] = {
|
||||||
"UNKNOWN",
|
"UNKNOWN",
|
||||||
|
"MIPS64_GENERIC"
|
||||||
"SICORTEX",
|
"SICORTEX",
|
||||||
"LOONGSON3R3",
|
"LOONGSON3R3",
|
||||||
"LOONGSON3R4",
|
"LOONGSON3R4",
|
||||||
|
@ -113,8 +115,11 @@ int detect(void){
|
||||||
return CPU_SICORTEX;
|
return CPU_SICORTEX;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return CPU_MIPS64_GENERIC;
|
||||||
|
#else
|
||||||
|
return CPU_UNKNOWN;
|
||||||
#endif
|
#endif
|
||||||
return CPU_UNKNOWN;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
char *get_corename(void){
|
char *get_corename(void){
|
||||||
|
@ -136,9 +141,11 @@ void get_subarchitecture(void){
|
||||||
printf("P6600");
|
printf("P6600");
|
||||||
}else if(detect()==CPU_I6500){
|
}else if(detect()==CPU_I6500){
|
||||||
printf("I6500");
|
printf("I6500");
|
||||||
}else{
|
}else if(detect()==CPU_SICORTEX){
|
||||||
printf("SICORTEX");
|
printf("SICORTEX");
|
||||||
}
|
}else{
|
||||||
|
printf("MIPS64_GENERIC");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_subdirname(void){
|
void get_subdirname(void){
|
||||||
|
@ -215,8 +222,8 @@ void get_libname(void){
|
||||||
printf("p6600\n");
|
printf("p6600\n");
|
||||||
}else if(detect()==CPU_I6500) {
|
}else if(detect()==CPU_I6500) {
|
||||||
printf("i6500\n");
|
printf("i6500\n");
|
||||||
}else{
|
}else {
|
||||||
printf("mips64\n");
|
printf("mips64_generic\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -40,7 +40,7 @@ else()
|
||||||
c_${float_char}blas1.c)
|
c_${float_char}blas1.c)
|
||||||
endif()
|
endif()
|
||||||
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME})
|
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME})
|
||||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD")
|
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
|
||||||
target_link_libraries(x${float_char}cblat1 m)
|
target_link_libraries(x${float_char}cblat1 m)
|
||||||
endif()
|
endif()
|
||||||
add_test(NAME "x${float_char}cblat1"
|
add_test(NAME "x${float_char}cblat1"
|
||||||
|
@ -65,7 +65,7 @@ else()
|
||||||
constant.c)
|
constant.c)
|
||||||
endif()
|
endif()
|
||||||
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME})
|
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME})
|
||||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD")
|
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
|
||||||
target_link_libraries(x${float_char}cblat2 m)
|
target_link_libraries(x${float_char}cblat2 m)
|
||||||
endif()
|
endif()
|
||||||
add_test(NAME "x${float_char}cblat2"
|
add_test(NAME "x${float_char}cblat2"
|
||||||
|
@ -90,7 +90,7 @@ else()
|
||||||
constant.c)
|
constant.c)
|
||||||
endif()
|
endif()
|
||||||
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME})
|
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME})
|
||||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD")
|
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
|
||||||
target_link_libraries(x${float_char}cblat3 m)
|
target_link_libraries(x${float_char}cblat3 m)
|
||||||
endif()
|
endif()
|
||||||
add_test(NAME "x${float_char}cblat3"
|
add_test(NAME "x${float_char}cblat3"
|
||||||
|
|
|
@ -38,22 +38,48 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
#if (defined OS_LINUX || defined OS_ANDROID)
|
||||||
|
#include <asm/hwcap.h>
|
||||||
|
#include <sys/auxv.h>
|
||||||
|
|
||||||
|
#ifndef HWCAP_LOONGSON_CPUCFG
|
||||||
|
#define HWCAP_LOONGSON_CPUCFG (1 << 14)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef DYNAMIC_LIST
|
||||||
|
extern gotoblas_t gotoblas_MIPS64_GENERIC;
|
||||||
|
#ifdef DYN_LOONGSON3R3
|
||||||
|
extern gotoblas_t gotoblas_LOONGSON3R3;
|
||||||
|
#else
|
||||||
|
#define gotoblas_LOONGSON3R3 gotoblas_MIPS64_GENERIC
|
||||||
|
#endif
|
||||||
|
#ifdef DYN_LOONGSON3R4
|
||||||
|
extern gotoblas_t gotoblas_LOONGSON3R4;
|
||||||
|
#else
|
||||||
|
#define gotoblas_LOONGSON3R4 gotoblas_MIPS64_GENERIC
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
extern gotoblas_t gotoblas_LOONGSON3R3;
|
extern gotoblas_t gotoblas_LOONGSON3R3;
|
||||||
extern gotoblas_t gotoblas_LOONGSON3R4;
|
extern gotoblas_t gotoblas_LOONGSON3R4;
|
||||||
|
extern gotoblas_t gotoblas_MIPS64_GENERIC;
|
||||||
|
#endif
|
||||||
|
|
||||||
extern void openblas_warning(int verbose, const char * msg);
|
extern void openblas_warning(int verbose, const char * msg);
|
||||||
|
|
||||||
#define NUM_CORETYPES 2
|
#define NUM_CORETYPES 3
|
||||||
|
|
||||||
static char *corename[] = {
|
static char *corename[] = {
|
||||||
|
"MIPS64_GENERIC"
|
||||||
"loongson3r3",
|
"loongson3r3",
|
||||||
"loongson3r4",
|
"loongson3r4",
|
||||||
"UNKNOWN"
|
"UNKNOWN"
|
||||||
};
|
};
|
||||||
|
|
||||||
char *gotoblas_corename(void) {
|
char *gotoblas_corename(void) {
|
||||||
if (gotoblas == &gotoblas_LOONGSON3R3) return corename[0];
|
if (gotoblas == &gotoblas_MIPS64_GENERIC) return corename[0];
|
||||||
if (gotoblas == &gotoblas_LOONGSON3R4) return corename[1];
|
if (gotoblas == &gotoblas_LOONGSON3R3) return corename[1];
|
||||||
|
if (gotoblas == &gotoblas_LOONGSON3R4) return corename[2];
|
||||||
return corename[NUM_CORETYPES];
|
return corename[NUM_CORETYPES];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,77 +99,32 @@ static gotoblas_t *force_coretype(char *coretype) {
|
||||||
|
|
||||||
switch (found)
|
switch (found)
|
||||||
{
|
{
|
||||||
case 0: return (&gotoblas_LOONGSON3R3);
|
case 0: return (&gotoblas_MIPS64_GENERIC);
|
||||||
case 1: return (&gotoblas_LOONGSON3R4);
|
case 1: return (&gotoblas_LOONGSON3R3);
|
||||||
|
case 2: return (&gotoblas_LOONGSON3R4);
|
||||||
}
|
}
|
||||||
snprintf(message, 128, "Core not found: %s\n", coretype);
|
snprintf(message, 128, "Core not found: %s\n", coretype);
|
||||||
openblas_warning(1, message);
|
openblas_warning(1, message);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if (defined OS_LINUX || defined OS_ANDROID)
|
||||||
#define MMI_MASK 0x00000010
|
#define MMI_MASK 0x00000010
|
||||||
#define MSA_MASK 0x00000020
|
#define MSA_MASK 0x00000020
|
||||||
|
|
||||||
int fd[2];
|
|
||||||
int support_cpucfg;
|
|
||||||
|
|
||||||
static void handler(int signum)
|
|
||||||
{
|
|
||||||
close(fd[1]);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Brief : Function to check if cpucfg supported on loongson
|
|
||||||
* Return: 1 supported
|
|
||||||
* 0 not supported
|
|
||||||
*/
|
|
||||||
static int cpucfg_test(void) {
|
|
||||||
pid_t pid;
|
|
||||||
int status = 0;
|
|
||||||
|
|
||||||
support_cpucfg = 0;
|
|
||||||
pipe(fd);
|
|
||||||
pid = fork();
|
|
||||||
if (pid == 0) { /* Subprocess */
|
|
||||||
struct sigaction act;
|
|
||||||
close(fd[0]);
|
|
||||||
/* Set signal action for SIGILL. */
|
|
||||||
act.sa_handler = handler;
|
|
||||||
sigaction(SIGILL,&act,NULL);
|
|
||||||
|
|
||||||
/* Execute cpucfg in subprocess. */
|
|
||||||
__asm__ volatile(
|
|
||||||
".insn \n\t"
|
|
||||||
".word (0xc8080118) \n\t"
|
|
||||||
:::
|
|
||||||
);
|
|
||||||
support_cpucfg = 1;
|
|
||||||
write(fd[1],&support_cpucfg,sizeof(support_cpucfg));
|
|
||||||
close(fd[1]);
|
|
||||||
exit(0);
|
|
||||||
} else if (pid > 0){ /* Parent process*/
|
|
||||||
close(fd[1]);
|
|
||||||
if ((waitpid(pid,&status,0) <= 0) ||
|
|
||||||
(read(fd[0],&support_cpucfg,sizeof(support_cpucfg)) <= 0))
|
|
||||||
support_cpucfg = 0;
|
|
||||||
close(fd[0]);
|
|
||||||
} else {
|
|
||||||
support_cpucfg = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return support_cpucfg;
|
|
||||||
}
|
|
||||||
|
|
||||||
static gotoblas_t *get_coretype_from_cpucfg(void) {
|
static gotoblas_t *get_coretype_from_cpucfg(void) {
|
||||||
int flag = 0;
|
int flag = 0;
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
|
".set push \n\t"
|
||||||
|
".set noat \n\t"
|
||||||
".insn \n\t"
|
".insn \n\t"
|
||||||
"dli $8, 0x01 \n\t"
|
"dli $1, 0x01 \n\t"
|
||||||
".word (0xc9084918) \n\t"
|
".word (0xc8080118) \n\t"
|
||||||
"usw $9, 0x00(%0) \n\t"
|
"move %0, $1 \n\t"
|
||||||
|
".set pop \n\t"
|
||||||
|
: "=r"(flag)
|
||||||
|
:
|
||||||
:
|
:
|
||||||
: "r"(&flag)
|
|
||||||
: "memory"
|
|
||||||
);
|
);
|
||||||
if (flag & MSA_MASK)
|
if (flag & MSA_MASK)
|
||||||
return (&gotoblas_LOONGSON3R4);
|
return (&gotoblas_LOONGSON3R4);
|
||||||
|
@ -153,7 +134,7 @@ static gotoblas_t *get_coretype_from_cpucfg(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static gotoblas_t *get_coretype_from_cpuinfo(void) {
|
static gotoblas_t *get_coretype_from_cpuinfo(void) {
|
||||||
#ifdef linux
|
#ifdef __linux
|
||||||
FILE *infile;
|
FILE *infile;
|
||||||
char buffer[512], *p;
|
char buffer[512], *p;
|
||||||
|
|
||||||
|
@ -176,17 +157,19 @@ static gotoblas_t *get_coretype_from_cpuinfo(void) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static gotoblas_t *get_coretype(void) {
|
static gotoblas_t *get_coretype(void) {
|
||||||
int ret = 0;
|
#if (!defined OS_LINUX && !defined OS_ANDROID)
|
||||||
|
return NULL;
|
||||||
ret = cpucfg_test();
|
#else
|
||||||
if (ret == 1)
|
if (!(getauxval(AT_HWCAP) & HWCAP_LOONGSON_CPUCFG))
|
||||||
return get_coretype_from_cpucfg();
|
return get_coretype_from_cpucfg();
|
||||||
else
|
else
|
||||||
return get_coretype_from_cpuinfo();
|
return get_coretype_from_cpuinfo();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void gotoblas_dynamic_init(void) {
|
void gotoblas_dynamic_init(void) {
|
||||||
|
@ -208,9 +191,9 @@ void gotoblas_dynamic_init(void) {
|
||||||
|
|
||||||
if (gotoblas == NULL)
|
if (gotoblas == NULL)
|
||||||
{
|
{
|
||||||
snprintf(coremsg, 128, "Falling back to loongson3r3 core\n");
|
snprintf(coremsg, 128, "Falling back to MIPS64_GENEIRC\n");
|
||||||
openblas_warning(1, coremsg);
|
openblas_warning(1, coremsg);
|
||||||
gotoblas = &gotoblas_LOONGSON3R3;
|
gotoblas = &gotoblas_MIPS64_GENERIC;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gotoblas && gotoblas->init) {
|
if (gotoblas && gotoblas->init) {
|
||||||
|
|
|
@ -4000,6 +4000,22 @@ case "$p1" in
|
||||||
no_underscore_objs="$no_underscore_objs $misc_common_objs"
|
no_underscore_objs="$no_underscore_objs $misc_common_objs"
|
||||||
|
|
||||||
printf 'int main(void){\n'
|
printf 'int main(void){\n'
|
||||||
|
for obj in $underscore_objs; do
|
||||||
|
[ "$obj" != "xerbla" ] && printf 'extern void %s%s%s%s();\n' \
|
||||||
|
"$symbolprefix" "$obj" "$bu" "$symbolsuffix"
|
||||||
|
done
|
||||||
|
|
||||||
|
for obj in $need_2underscore_objs; do
|
||||||
|
printf 'extern void %s%s%s%s%s();\n' \
|
||||||
|
"$symbolprefix" "$obj" "$bu" "$bu" "$symbolsuffix"
|
||||||
|
done
|
||||||
|
|
||||||
|
for obj in $no_underscore_objs; do
|
||||||
|
printf 'extern void %s%s%s();\n' \
|
||||||
|
"$symbolprefix" "$obj" "$symbolsuffix"
|
||||||
|
done
|
||||||
|
|
||||||
|
printf '\n'
|
||||||
for obj in $underscore_objs; do
|
for obj in $underscore_objs; do
|
||||||
[ "$obj" != "xerbla" ] && printf '%s%s%s%s();\n' \
|
[ "$obj" != "xerbla" ] && printf '%s%s%s%s();\n' \
|
||||||
"$symbolprefix" "$obj" "$bu" "$symbolsuffix"
|
"$symbolprefix" "$obj" "$bu" "$symbolsuffix"
|
||||||
|
|
|
@ -3955,6 +3955,18 @@ if ($ARGV[0] eq "linktest") {
|
||||||
@no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
|
@no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
|
||||||
|
|
||||||
print "int main(void){\n";
|
print "int main(void){\n";
|
||||||
|
foreach $objs (@underscore_objs) {
|
||||||
|
print "extern void ", $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach $objs (@need_2underscore_objs) {
|
||||||
|
print "extern void ", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "();\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach $objs (@no_underscore_objs) {
|
||||||
|
print "extern void ", $symbolprefix, $objs, $symbolsuffix, "();\n";
|
||||||
|
}
|
||||||
|
|
||||||
foreach $objs (@underscore_objs) {
|
foreach $objs (@underscore_objs) {
|
||||||
print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
|
print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
|
||||||
}
|
}
|
||||||
|
|
56
getarch.c
56
getarch.c
|
@ -131,6 +131,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
/* #define FORCE_PPC440 */
|
/* #define FORCE_PPC440 */
|
||||||
/* #define FORCE_PPC440FP2 */
|
/* #define FORCE_PPC440FP2 */
|
||||||
/* #define FORCE_CELL */
|
/* #define FORCE_CELL */
|
||||||
|
/* #define FORCE_MIPS64_GENERIC */
|
||||||
/* #define FORCE_SICORTEX */
|
/* #define FORCE_SICORTEX */
|
||||||
/* #define FORCE_LOONGSON3R3 */
|
/* #define FORCE_LOONGSON3R3 */
|
||||||
/* #define FORCE_LOONGSON3R4 */
|
/* #define FORCE_LOONGSON3R4 */
|
||||||
|
@ -146,6 +147,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
/* #define FORCE_SPARCV7 */
|
/* #define FORCE_SPARCV7 */
|
||||||
/* #define FORCE_ZARCH_GENERIC */
|
/* #define FORCE_ZARCH_GENERIC */
|
||||||
/* #define FORCE_Z13 */
|
/* #define FORCE_Z13 */
|
||||||
|
/* #define FORCE_EV4 */
|
||||||
|
/* #define FORCE_EV5 */
|
||||||
|
/* #define FORCE_EV6 */
|
||||||
/* #define FORCE_GENERIC */
|
/* #define FORCE_GENERIC */
|
||||||
|
|
||||||
#ifdef FORCE_P2
|
#ifdef FORCE_P2
|
||||||
|
@ -915,6 +919,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define CORENAME "CELL"
|
#define CORENAME "CELL"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_MIPS64_GENERIC
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "MIPS"
|
||||||
|
#define SUBARCHITECTURE "MIPS64_GENERIC"
|
||||||
|
#define SUBDIRNAME "mips64"
|
||||||
|
#define ARCHCONFIG "-DMIPS64_GENERIC " \
|
||||||
|
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||||
|
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||||
|
#define LIBNAME "mips64_generic"
|
||||||
|
#define CORENAME "MIPS64_GENERIC"
|
||||||
|
#else
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_SICORTEX
|
#ifdef FORCE_SICORTEX
|
||||||
#define FORCE
|
#define FORCE
|
||||||
#define ARCHITECTURE "MIPS"
|
#define ARCHITECTURE "MIPS"
|
||||||
|
@ -1601,6 +1619,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define CORENAME "Z14"
|
#define CORENAME "Z14"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_EV4
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ALPHA"
|
||||||
|
#define SUBARCHITECTURE "ev4"
|
||||||
|
#define ARCHCONFIG "-DEV4 " \
|
||||||
|
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
|
||||||
|
"-DL2_SIZE=2097152 -DL2_LINESIZE=32 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=8192 "
|
||||||
|
#define LIBNAME "ev4"
|
||||||
|
#define CORENAME "EV4"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_EV5
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ALPHA"
|
||||||
|
#define SUBARCHITECTURE "ev5"
|
||||||
|
#define ARCHCONFIG "-DEV5 " \
|
||||||
|
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
|
||||||
|
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=8192 "
|
||||||
|
#define LIBNAME "ev5"
|
||||||
|
#define CORENAME "EV5"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_EV6
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ALPHA"
|
||||||
|
#define SUBARCHITECTURE "ev6"
|
||||||
|
#define ARCHCONFIG "-DEV6 " \
|
||||||
|
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||||
|
"-DL2_SIZE=4194304 -DL2_LINESIZE=64 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=8192 "
|
||||||
|
#define LIBNAME "ev6"
|
||||||
|
#define CORENAME "EV6"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_C910V
|
#ifdef FORCE_C910V
|
||||||
#define FORCE
|
#define FORCE
|
||||||
#define ARCHITECTURE "RISCV64"
|
#define ARCHITECTURE "RISCV64"
|
||||||
|
@ -1777,7 +1831,7 @@ int main(int argc, char *argv[]){
|
||||||
#ifdef FORCE
|
#ifdef FORCE
|
||||||
printf("CORE=%s\n", CORENAME);
|
printf("CORE=%s\n", CORENAME);
|
||||||
#else
|
#else
|
||||||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv)
|
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__alpha__)
|
||||||
printf("CORE=%s\n", get_corename());
|
printf("CORE=%s\n", get_corename());
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define N $16
|
#define N $16
|
||||||
#define X $17
|
#define X $17
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define PREFETCHSIZE 88
|
#define PREFETCHSIZE 88
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define PREFETCHSIZE 40
|
#define PREFETCHSIZE 40
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
.set noat
|
.set noat
|
||||||
.set noreorder
|
.set noreorder
|
||||||
|
|
|
@ -39,7 +39,7 @@
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define PREFETCH_SIZE 80
|
#define PREFETCH_SIZE 80
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define N $16
|
#define N $16
|
||||||
#define X $17
|
#define X $17
|
||||||
|
|
|
@ -42,7 +42,7 @@
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
.globl NAME
|
.globl NAME
|
||||||
.ent NAME
|
.ent NAME
|
||||||
|
|
|
@ -39,7 +39,7 @@
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define PREFETCH_SIZE 80
|
#define PREFETCH_SIZE 80
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define PREFETCHSIZE 88
|
#define PREFETCHSIZE 88
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
.set noat
|
.set noat
|
||||||
.set noreorder
|
.set noreorder
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||||
#error "Architecture is not specified."
|
#error "Architecture is not specified."
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define STACKSIZE 64
|
#define STACKSIZE 64
|
||||||
#define PREFETCHSIZE 32
|
#define PREFETCHSIZE 32
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define STACKSIZE 64
|
#define STACKSIZE 64
|
||||||
#define PREFETCHSIZE 32
|
#define PREFETCHSIZE 32
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define N $16
|
#define N $16
|
||||||
#define X $17
|
#define X $17
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define N $16
|
#define N $16
|
||||||
#define X $17
|
#define X $17
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define N $16
|
#define N $16
|
||||||
#define X $17
|
#define X $17
|
||||||
|
|
|
@ -36,7 +36,7 @@
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
.set noat
|
.set noat
|
||||||
.set noreorder
|
.set noreorder
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define N $16
|
#define N $16
|
||||||
#define X $17
|
#define X $17
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define N $16
|
#define N $16
|
||||||
#define X $17
|
#define X $17
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define PREFETCHSIZE 88
|
#define PREFETCHSIZE 88
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define PREFETCH_SIZE 80
|
#define PREFETCH_SIZE 80
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define PREFETCHSIZE 88
|
#define PREFETCHSIZE 88
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||||
#error "Architecture is not specified."
|
#error "Architecture is not specified."
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||||
#error "Architecture is not specified."
|
#error "Architecture is not specified."
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||||
#error "Architecture is not specified."
|
#error "Architecture is not specified."
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define N $16
|
#define N $16
|
||||||
#define X $17
|
#define X $17
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define PREFETCHSIZE 88
|
#define PREFETCHSIZE 88
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define PREFETCHSIZE 40
|
#define PREFETCHSIZE 40
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define PREFETCHSIZE 88
|
#define PREFETCHSIZE 88
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
.set noat
|
.set noat
|
||||||
.set noreorder
|
.set noreorder
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||||
#error "Architecture is not specified."
|
#error "Architecture is not specified."
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define STACKSIZE 64
|
#define STACKSIZE 64
|
||||||
#define PREFETCHSIZE 32
|
#define PREFETCHSIZE 32
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define STACKSIZE 64
|
#define STACKSIZE 64
|
||||||
#define PREFETCHSIZE 32
|
#define PREFETCHSIZE 32
|
||||||
|
|
|
@ -39,7 +39,7 @@
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define PREFETCH_SIZE 80
|
#define PREFETCH_SIZE 80
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define N $16
|
#define N $16
|
||||||
#define X $17
|
#define X $17
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define PREFETCHSIZE 88
|
#define PREFETCHSIZE 88
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#define PREFETCHSIZE 88
|
#define PREFETCHSIZE 88
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||||
#error "Architecture is not specified."
|
#error "Architecture is not specified."
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||||
#error "Architecture is not specified."
|
#error "Architecture is not specified."
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
||||||
#error "Architecture is not specified."
|
#error "Architecture is not specified."
|
||||||
|
|
|
@ -42,50 +42,58 @@ endif
|
||||||
|
|
||||||
ifndef SGEMMKERNEL
|
ifndef SGEMMKERNEL
|
||||||
SGEMMKERNEL = gemm_kernel.S
|
SGEMMKERNEL = gemm_kernel.S
|
||||||
|
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||||
SGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
SGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
||||||
SGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
SGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
||||||
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
endif
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||||
SGEMMINCOPYOBJ = sgemm_incopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef DGEMMKERNEL
|
ifndef DGEMMKERNEL
|
||||||
DGEMMKERNEL = gemm_kernel.S
|
DGEMMKERNEL = gemm_kernel.S
|
||||||
|
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
|
||||||
DGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
DGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
||||||
DGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
DGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
||||||
|
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
endif
|
||||||
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef CGEMMKERNEL
|
ifndef CGEMMKERNEL
|
||||||
CGEMMKERNEL = zgemm_kernel.S
|
CGEMMKERNEL = zgemm_kernel.S
|
||||||
|
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
|
||||||
CGEMMINCOPY = ../generic/zgemm_ncopy_1.c
|
CGEMMINCOPY = ../generic/zgemm_ncopy_1.c
|
||||||
CGEMMITCOPY = ../generic/zgemm_tcopy_1.c
|
CGEMMITCOPY = ../generic/zgemm_tcopy_1.c
|
||||||
|
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
endif
|
||||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||||
CGEMMINCOPYOBJ = cgemm_incopy.o
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMITCOPYOBJ = cgemm_itcopy.o
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef ZGEMMKERNEL
|
ifndef ZGEMMKERNEL
|
||||||
ZGEMMKERNEL = zgemm_kernel.S
|
ZGEMMKERNEL = zgemm_kernel.S
|
||||||
|
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
|
||||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_1.c
|
ZGEMMINCOPY = ../generic/zgemm_ncopy_1.c
|
||||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_1.c
|
ZGEMMITCOPY = ../generic/zgemm_tcopy_1.c
|
||||||
|
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
endif
|
||||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||||
ZGEMMINCOPYOBJ = zgemm_incopy.o
|
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMITCOPYOBJ = zgemm_itcopy.o
|
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef SGEMM_BETA
|
ifndef SGEMM_BETA
|
||||||
|
|
|
@ -717,11 +717,12 @@ integer iparam2stage_(integer *ispec, char *name__, char *opts, integer *ni,
|
||||||
ret_val = -1;
|
ret_val = -1;
|
||||||
|
|
||||||
// s_copy(subnam, name__, (ftnlen)12, name_len);
|
// s_copy(subnam, name__, (ftnlen)12, name_len);
|
||||||
strncpy(subnam,name__,13);
|
strncpy(subnam,name__,13);
|
||||||
subnam[13]='\0';
|
subnam[13]='\0';
|
||||||
for (int i=0;i<13;i++) subnam[i]=toupper(subnam[i]);
|
{
|
||||||
//fprintf(stderr,"iparam2stage, name__ gelesen #%s#\n",name__);
|
int i;
|
||||||
//fprintf(stderr,"iparam2stage, subnam gelesen #%s#\n",subnam);
|
for (i=0;i<13;i++) subnam[i]=toupper(subnam[i]);
|
||||||
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
|
|
||||||
|
|
|
@ -40,27 +40,40 @@ ifneq ($(C_LAPACK), 1)
|
||||||
$(FC) $(FFLAGS) -c -o $@ $<
|
$(FC) $(FFLAGS) -c -o $@ $<
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" ""
|
||||||
SCATGEN = slatm1.o slatm7.o slaran.o slarnd.o
|
SCATGEN = slatm1.o slatm7.o slaran.o slarnd.o
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_SINGLE),1)
|
||||||
SMATGEN = slatms.o slatme.o slatmr.o slatmt.o \
|
SMATGEN = slatms.o slatme.o slatmr.o slatmt.o \
|
||||||
slagge.o slagsy.o slakf2.o slarge.o slaror.o slarot.o slatm2.o \
|
slagge.o slagsy.o slakf2.o slarge.o slaror.o slarot.o slatm2.o \
|
||||||
slatm3.o slatm5.o slatm6.o slahilb.o
|
slatm3.o slatm5.o slatm6.o slahilb.o
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_COMPLEX),1)
|
||||||
CMATGEN = clatms.o clatme.o clatmr.o clatmt.o \
|
CMATGEN = clatms.o clatme.o clatmr.o clatmt.o \
|
||||||
clagge.o claghe.o clagsy.o clakf2.o clarge.o claror.o clarot.o \
|
clagge.o claghe.o clagsy.o clakf2.o clarge.o claror.o clarot.o \
|
||||||
clatm1.o clarnd.o clatm2.o clatm3.o clatm5.o clatm6.o clahilb.o
|
clatm1.o clarnd.o clatm2.o clatm3.o clatm5.o clatm6.o clahilb.o
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
|
||||||
DZATGEN = dlatm1.o dlatm7.o dlaran.o dlarnd.o
|
DZATGEN = dlatm1.o dlatm7.o dlaran.o dlarnd.o
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_DOUBLE),1)
|
||||||
DMATGEN = dlatms.o dlatme.o dlatmr.o dlatmt.o \
|
DMATGEN = dlatms.o dlatme.o dlatmr.o dlatmt.o \
|
||||||
dlagge.o dlagsy.o dlakf2.o dlarge.o dlaror.o dlarot.o dlatm2.o \
|
dlagge.o dlagsy.o dlakf2.o dlarge.o dlaror.o dlarot.o dlatm2.o \
|
||||||
dlatm3.o dlatm5.o dlatm6.o dlahilb.o
|
dlatm3.o dlatm5.o dlatm6.o dlahilb.o
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_COMPLEX16),1)
|
||||||
ZMATGEN = zlatms.o zlatme.o zlatmr.o zlatmt.o \
|
ZMATGEN = zlatms.o zlatme.o zlatmr.o zlatmt.o \
|
||||||
zlagge.o zlaghe.o zlagsy.o zlakf2.o zlarge.o zlaror.o zlarot.o \
|
zlagge.o zlaghe.o zlagsy.o zlakf2.o zlarge.o zlaror.o zlarot.o \
|
||||||
zlatm1.o zlarnd.o zlatm2.o zlatm3.o zlatm5.o zlatm6.o zlahilb.o
|
zlatm1.o zlarnd.o zlatm2.o zlatm3.o zlatm5.o zlatm6.o zlahilb.o
|
||||||
|
endif
|
||||||
|
|
||||||
.PHONY: all
|
.PHONY: all
|
||||||
|
.NOTPARALLEL:
|
||||||
all: $(TMGLIB)
|
all: $(TMGLIB)
|
||||||
|
|
||||||
ALLOBJ = $(SMATGEN) $(CMATGEN) $(SCATGEN) $(DMATGEN) $(ZMATGEN) \
|
ALLOBJ = $(SMATGEN) $(CMATGEN) $(SCATGEN) $(DMATGEN) $(ZMATGEN) \
|
||||||
|
@ -107,9 +120,17 @@ cleanlib:
|
||||||
rm -f $(TMGLIB)
|
rm -f $(TMGLIB)
|
||||||
|
|
||||||
ifneq ($(C_LAPACK), 1)
|
ifneq ($(C_LAPACK), 1)
|
||||||
|
ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),)
|
||||||
slaran.o: slaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
|
slaran.o: slaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
|
||||||
|
endif
|
||||||
|
ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),)
|
||||||
dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
|
dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
|
ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),)
|
||||||
slaran.o: slaran.c ; $(CC) $(CFLAGS) -O0 -c -o $@ $<
|
slaran.o: slaran.c ; $(CC) $(CFLAGS) -O0 -c -o $@ $<
|
||||||
|
endif
|
||||||
|
ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),)
|
||||||
dlaran.o: dlaran.c ; $(CC) $(CFLAGS) -O0 -c -o $@ $<
|
dlaran.o: dlaran.c ; $(CC) $(CFLAGS) -O0 -c -o $@ $<
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
2
param.h
2
param.h
|
@ -2951,7 +2951,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SYMV_P 16
|
#define SYMV_P 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500)
|
#if defined(MIPS64_GENERIC) || defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500)
|
||||||
#define SNUMOPT 2
|
#define SNUMOPT 2
|
||||||
#define DNUMOPT 2
|
#define DNUMOPT 2
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue