diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 493747052..419f90dab 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -218,4 +218,7 @@ In chronological order: * [2022-08] Fix building from sources for QNX * Mark Seminatore - * [2023-11-09] Improve Windows threading performance scaling \ No newline at end of file + * [2023-11-09] Improve Windows threading performance scaling + +* Dirreke + * [2024-01-16] Add basic support for the CSKY architecture diff --git a/Makefile.csky b/Makefile.csky new file mode 100644 index 000000000..36162af2a --- /dev/null +++ b/Makefile.csky @@ -0,0 +1,4 @@ +ifeq ($(CORE), CK860FV) +CCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float +FCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float -static +endif diff --git a/Makefile.prebuild b/Makefile.prebuild index 0be4f1274..83da8e2ce 100644 --- a/Makefile.prebuild +++ b/Makefile.prebuild @@ -55,6 +55,10 @@ ifeq ($(TARGET), C910V) TARGET_FLAGS = -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d endif +ifeq ($(TARGET), CK860FV) +TARGET_FLAGS = -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float +endif + all: getarch_2nd ./getarch_2nd 0 >> $(TARGET_MAKE) ./getarch_2nd 1 >> $(TARGET_CONF) diff --git a/Makefile.system b/Makefile.system index e602eaf05..0088eaff5 100644 --- a/Makefile.system +++ b/Makefile.system @@ -873,6 +873,11 @@ endif endif endif +ifeq ($(ARCH), csky) +NO_BINARY_MODE = 1 +BINARY_DEFINED = 1 +endif + # # C Compiler dependent settings # diff --git a/TargetList.txt b/TargetList.txt index deef75819..c11b94fa5 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -133,3 +133,7 @@ E2K EV4 EV5 EV6 + +14.CSKY +CSKY +CK860FV diff --git a/c_check b/c_check index 3e507be81..59ab9bb13 100755 --- a/c_check +++ b/c_check @@ -91,6 +91,7 @@ case "$data" in *ARCH_ZARCH*) architecture=zarch ;; *ARCH_RISCV64*) architecture=riscv64 ;; *ARCH_LOONGARCH64*) architecture=loongarch64 ;; + *ARCH_CSKY*) architecture=csky ;; esac defined=0 @@ -236,6 +237,7 @@ case "$data" in *ARCH_ARM*) architecture=arm ;; *ARCH_ZARCH*) architecture=zarch ;; *ARCH_LOONGARCH64*) architecture=loongarch64 ;; + *ARCH_CSKY*) architecture=csky ;; esac binformat='bin32' diff --git a/c_check.pl b/c_check.pl index d9c36793c..6b89f06eb 100644 --- a/c_check.pl +++ b/c_check.pl @@ -97,6 +97,7 @@ $architecture = arm64 if ($data =~ /ARCH_ARM64/); $architecture = zarch if ($data =~ /ARCH_ZARCH/); $architecture = riscv64 if ($data =~ /ARCH_RISCV64/); $architecture = loongarch64 if ($data =~ /ARCH_LOONGARCH64/); +$architecture = csky if ($data =~ /ARCH_CSKY/); $defined = 0; @@ -156,6 +157,11 @@ if ($architecture eq "loongarch64") { $binary = 64; } +if ($architecture eq "csky") { + $defined = 1; + $binary = 32; +} + if ($compiler eq "PGI") { $compiler_name .= " -tp p7" if ($binary eq "32"); $compiler_name .= " -tp p7-64" if ($binary eq "64"); @@ -284,6 +290,7 @@ $architecture = arm if ($data =~ /ARCH_ARM/); $architecture = arm64 if ($data =~ /ARCH_ARM64/); $architecture = zarch if ($data =~ /ARCH_ZARCH/); $architecture = loongarch64 if ($data =~ /ARCH_LOONGARCH64/); +$architecture = csky if ($data =~ /ARCH_CSKY/); $binformat = bin32; $binformat = bin64 if ($data =~ /BINARY_64/); diff --git a/common.h b/common.h index 462c1d428..1f0b9e533 100644 --- a/common.h +++ b/common.h @@ -482,6 +482,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246 #include "common_e2k.h" #endif +#ifdef ARCH_CSKY +#include "common_csky.h" +#endif + #ifndef ASSEMBLER #ifdef OS_WINDOWSSTORE typedef char env_var_t[MAX_PATH]; diff --git a/common_csky.h b/common_csky.h new file mode 100644 index 000000000..3095dc781 --- /dev/null +++ b/common_csky.h @@ -0,0 +1,56 @@ +/***************************************************************************** +Copyright (c) 2011-2015, The OpenBLAS Project +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************************/ + +#ifndef COMMON_CSKY +#define COMMON_CSKY + +#define MB __sync_synchronize() +#define WMB __sync_synchronize() +#define RMB __sync_synchronize() + +#define INLINE inline + +#ifndef ASSEMBLER + + +static inline int blas_quickdivide(blasint x, blasint y){ + return x / y; +} + +#endif + + + +#define BUFFER_SIZE ( 32 << 20) +#define SEEK_ADDRESS + +#endif \ No newline at end of file diff --git a/ctest.c b/ctest.c index 2ccae8dcc..cbc15326f 100644 --- a/ctest.c +++ b/ctest.c @@ -173,6 +173,10 @@ HAVE_C11 ARCH_E2K #endif +#if defined(__csky__) +ARCH_CSKY +#endif + #if defined(__EMSCRIPTEN__) ARCH_RISCV64 OS_WINDOWS diff --git a/getarch.c b/getarch.c index 87384c084..2d26da079 100644 --- a/getarch.c +++ b/getarch.c @@ -150,6 +150,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* #define FORCE_EV4 */ /* #define FORCE_EV5 */ /* #define FORCE_EV6 */ +/* #define FORCE_CSKY */ +/* #define FORCE_CK860FV */ /* #define FORCE_GENERIC */ #ifdef FORCE_P2 @@ -1692,6 +1694,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "generic" #endif +#ifdef FORCE_CSKY +#define FORCE +#define ARCHITECTURE "CSKY" +#define SUBARCHITECTURE "CSKY" +#define SUBDIRNAME "csky" +#define ARCHCONFIG "-DCSKY" \ + "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=524288 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " +#define LIBNAME "csky" +#define CORENAME "CSKY" +#endif + +#ifdef FORCE_CK860FV +#define FORCE +#define ARCHITECTURE "CSKY" +#define SUBARCHITECTURE "CK860V" +#define SUBDIRNAME "csky" +#define ARCHCONFIG "-DCK860FV " \ + "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=524288 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " +#define LIBNAME "ck860fv" +#define CORENAME "CK860FV" +#endif + + #ifndef FORCE #ifdef USER_TARGET @@ -1766,7 +1795,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define OPENBLAS_SUPPORTED #endif - #ifndef OPENBLAS_SUPPORTED #error "This arch/CPU is not supported by OpenBLAS." #endif @@ -1831,7 +1859,7 @@ int main(int argc, char *argv[]){ #ifdef FORCE printf("CORE=%s\n", CORENAME); #else -#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__alpha__) +#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__alpha__) || defined(__csky__) printf("CORE=%s\n", get_corename()); #endif #endif @@ -1979,7 +2007,7 @@ printf("ELF_VERSION=2\n"); #ifdef FORCE printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); #else -#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) +#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__csky__) printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); #endif #endif diff --git a/kernel/csky/KERNEL b/kernel/csky/KERNEL new file mode 100644 index 000000000..afa8a0881 --- /dev/null +++ b/kernel/csky/KERNEL @@ -0,0 +1,149 @@ +SAMAXKERNEL = ../arm/amax.c +DAMAXKERNEL = ../arm/amax.c +CAMAXKERNEL = ../arm/zamax.c +ZAMAXKERNEL = ../arm/zamax.c + +SAMINKERNEL = ../arm/amin.c +DAMINKERNEL = ../arm/amin.c +CAMINKERNEL = ../arm/zamin.c +ZAMINKERNEL = ../arm/zamin.c + +SMAXKERNEL = ../arm/max.c +DMAXKERNEL = ../arm/max.c + +SMINKERNEL = ../arm/min.c +DMINKERNEL = ../arm/min.c + +ISAMAXKERNEL = ../arm/iamax.c +IDAMAXKERNEL = ../arm/iamax.c +ICAMAXKERNEL = ../arm/izamax.c +IZAMAXKERNEL = ../arm/izamax.c + +ISAMINKERNEL = ../arm/iamin.c +IDAMINKERNEL = ../arm/iamin.c +ICAMINKERNEL = ../arm/izamin.c +IZAMINKERNEL = ../arm/izamin.c + +ISMAXKERNEL = ../arm/imax.c +IDMAXKERNEL = ../arm/imax.c + +ISMINKERNEL = ../arm/imin.c +IDMINKERNEL = ../arm/imin.c + +SASUMKERNEL = ../arm/asum.c +DASUMKERNEL = ../arm/asum.c +CASUMKERNEL = ../arm/zasum.c +ZASUMKERNEL = ../arm/zasum.c + +SSUMKERNEL = ../arm/sum.c +DSUMKERNEL = ../arm/sum.c +CSUMKERNEL = ../arm/zsum.c +ZSUMKERNEL = ../arm/zsum.c + +SAXPYKERNEL = ../arm/axpy.c +DAXPYKERNEL = ../arm/axpy.c +CAXPYKERNEL = ../arm/zaxpy.c +ZAXPYKERNEL = ../arm/zaxpy.c + +SCOPYKERNEL = ../arm/copy.c +DCOPYKERNEL = ../arm/copy.c +CCOPYKERNEL = ../arm/zcopy.c +ZCOPYKERNEL = ../arm/zcopy.c + +SDOTKERNEL = ../arm/dot.c +DDOTKERNEL = ../arm/dot.c +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c +DSDOTKERNEL = ../generic/dot.c + +SNRM2KERNEL = ../arm/nrm2.c +DNRM2KERNEL = ../arm/nrm2.c +CNRM2KERNEL = ../arm/znrm2.c +ZNRM2KERNEL = ../arm/znrm2.c + +SROTKERNEL = ../arm/rot.c +DROTKERNEL = ../arm/rot.c +CROTKERNEL = ../arm/zrot.c +ZROTKERNEL = ../arm/zrot.c + +SSCALKERNEL = ../arm/scal.c +DSCALKERNEL = ../arm/scal.c +CSCALKERNEL = ../arm/zscal.c +ZSCALKERNEL = ../arm/zscal.c + +SSWAPKERNEL = ../arm/swap.c +DSWAPKERNEL = ../arm/swap.c +CSWAPKERNEL = ../arm/zswap.c +ZSWAPKERNEL = ../arm/zswap.c + +SGEMVNKERNEL = ../arm/gemv_n.c +DGEMVNKERNEL = ../arm/gemv_n.c +CGEMVNKERNEL = ../arm/zgemv_n.c +ZGEMVNKERNEL = ../arm/zgemv_n.c + +SGEMVTKERNEL = ../arm/gemv_t.c +DGEMVTKERNEL = ../arm/gemv_t.c +CGEMVTKERNEL = ../arm/zgemv_t.c +ZGEMVTKERNEL = ../arm/zgemv_t.c + +STRMMKERNEL = ../generic/trmmkernel_2x2.c +DTRMMKERNEL = ../generic/trmmkernel_2x2.c +CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c + +SGEMMKERNEL = ../generic/gemmkernel_2x2.c +SGEMMONCOPY = ../generic/gemm_ncopy_2.c +SGEMMOTCOPY = ../generic/gemm_tcopy_2.c +SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) +SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) + +DGEMMKERNEL = ../generic/gemmkernel_2x2.c +DGEMMONCOPY = ../generic/gemm_ncopy_2.c +DGEMMOTCOPY = ../generic/gemm_tcopy_2.c +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) + +CGEMMKERNEL = ../generic/zgemmkernel_2x2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) +CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) + +ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) +ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + + +SCABS_KERNEL = ../generic/cabs.c +DCABS_KERNEL = ../generic/cabs.c +QCABS_KERNEL = ../generic/cabs.c +LSAME_KERNEL = ../generic/lsame.c + +SGEMM_BETA = ../generic/gemm_beta.c +DGEMM_BETA = ../generic/gemm_beta.c +CGEMM_BETA = ../generic/zgemm_beta.c +ZGEMM_BETA = ../generic/zgemm_beta.c + + diff --git a/kernel/csky/Makefile b/kernel/csky/Makefile new file mode 100644 index 000000000..520349bd6 --- /dev/null +++ b/kernel/csky/Makefile @@ -0,0 +1 @@ +clean :: diff --git a/lapack/laswp/csky/Makefile b/lapack/laswp/csky/Makefile new file mode 100644 index 000000000..75411deb5 --- /dev/null +++ b/lapack/laswp/csky/Makefile @@ -0,0 +1,13 @@ +TOPDIR = ../../.. +include ../../../Makefile.system + +ifndef LASWP +LASWP = ../generic/laswp_k.c +endif + +ifndef ZLASWP +ZLASWP = ../generic/zlaswp_k.c +endif + +include ../generic/Makefile + diff --git a/param.h b/param.h index 469c38ce3..e048dabe7 100644 --- a/param.h +++ b/param.h @@ -3807,7 +3807,44 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout #define SYMV_P 16 #endif +#if defined(CSKY) || defined(CK860FV) +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL +#define SGEMM_DEFAULT_UNROLL_M 2 +#define SGEMM_DEFAULT_UNROLL_N 2 + +#define DGEMM_DEFAULT_UNROLL_M 2 +#define DGEMM_DEFAULT_UNROLL_N 2 + +#define CGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_N 2 + +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 + +#define SGEMM_DEFAULT_P 128 +#define DGEMM_DEFAULT_P 128 +#define CGEMM_DEFAULT_P 96 +#define ZGEMM_DEFAULT_P 64 + +#define SGEMM_DEFAULT_Q 240 +#define DGEMM_DEFAULT_Q 120 +#define CGEMM_DEFAULT_Q 120 +#define ZGEMM_DEFAULT_Q 120 + +#define SGEMM_DEFAULT_R 12288 +#define DGEMM_DEFAULT_R 8192 +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 + +#define SYMV_P 16 + +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 + +#endif #ifdef GENERIC