Merge branch 'develop' into arm64_cmake_small_matrix_opt
This commit is contained in:
commit
b4495a8fb8
|
@ -23,6 +23,15 @@ jobs:
|
|||
- target: LOONGSON2K1000
|
||||
triple: loongarch64-unknown-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
|
||||
- target: LA64_GENERIC
|
||||
triple: loongarch64-unknown-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC
|
||||
- target: LA464
|
||||
triple: loongarch64-unknown-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464
|
||||
- target: LA264
|
||||
triple: loongarch64-unknown-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264
|
||||
- target: DYNAMIC_ARCH
|
||||
triple: loongarch64-unknown-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
|
||||
|
|
|
@ -20,6 +20,12 @@ jobs:
|
|||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5
|
||||
- target: LOONGSON2K1000
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
|
||||
- target: LA64_GENERIC
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC
|
||||
- target: LA464
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464
|
||||
- target: LA264
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264
|
||||
- target: DYNAMIC_ARCH
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
|
||||
|
||||
|
|
|
@ -14,6 +14,9 @@ endif
|
|||
ifeq ($(INTERFACE64),1)
|
||||
USE_64BITINT=1
|
||||
endif
|
||||
ifeq ($(USE_OPENMP),1)
|
||||
FOMP_OPT:= -fopenmp
|
||||
endif
|
||||
|
||||
PREFIX ?= /opt/OpenBLAS
|
||||
|
||||
|
@ -178,6 +181,7 @@ endif
|
|||
@echo 'libnamesuffix='$(LIBNAMESUFFIX) >> "$(PKGFILE)"
|
||||
@echo 'libsuffix='$(SYMBOLSUFFIX) >> "$(PKGFILE)"
|
||||
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(PKGFILE)"
|
||||
@echo 'omp_opt='$(FOMP_OPT) >> "$(PKGFILE)"
|
||||
@echo 'openblas_config= USE_64BITINT='$(INTERFACE64) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(TARGET) 'MAX_THREADS='$(NUM_THREADS)>> "$(PKGFILE)"
|
||||
@echo 'version='$(VERSION) >> "$(PKGFILE)"
|
||||
@echo 'extralib='$(PKG_EXTRALIB) >> "$(PKGFILE)"
|
||||
|
|
|
@ -727,7 +727,7 @@ endif
|
|||
endif
|
||||
|
||||
ifeq ($(ARCH), loongarch64)
|
||||
DYNAMIC_CORE = LOONGSON3R5 LOONGSON2K1000 LOONGSONGENERIC
|
||||
DYNAMIC_CORE = LA64_GENERIC LA264 LA464
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), riscv64)
|
||||
|
@ -1720,8 +1720,8 @@ LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx
|
|||
override FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
|
||||
endif
|
||||
ifeq ($(F_COMPILER),FLANGNEW)
|
||||
LAPACK_FFLAGS := $(filter-out -m32 -m64 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
|
||||
override FFLAGS := $(filter-out -m32 -m64 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
|
||||
LAPACK_FFLAGS := $(filter-out -m32 -m64 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 -mtune=% -mabi=% ,$(FFLAGS))
|
||||
override FFLAGS := $(filter-out -m32 -m64 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 -mtune=% -mabi=% ,$(FFLAGS))
|
||||
endif
|
||||
|
||||
LAPACK_CFLAGS = $(CFLAGS)
|
||||
|
|
|
@ -126,9 +126,17 @@ x280
|
|||
RISCV64_ZVL256B
|
||||
|
||||
11.LOONGARCH64:
|
||||
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 are legacy names,
|
||||
// and it is recommended to use the more standardized naming conventions
|
||||
// LA64_GENERIC/LA264/LA464. You can still specify TARGET as
|
||||
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 during compilation or runtime,
|
||||
// and they will be internally relocated to LA64_GENERIC/LA264/LA464.
|
||||
LOONGSONGENERIC
|
||||
LOONGSON3R5
|
||||
LOONGSON2K1000
|
||||
LOONGSON3R5
|
||||
LA64_GENERIC
|
||||
LA264
|
||||
LA464
|
||||
|
||||
12. Elbrus E2000:
|
||||
E2K
|
||||
|
|
8
cblas.h
8
cblas.h
|
@ -407,13 +407,13 @@ void cblas_cimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum
|
|||
void cblas_zimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, double* a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
|
||||
void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta,
|
||||
void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta,
|
||||
float *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta,
|
||||
void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta,
|
||||
void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta,
|
||||
float *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
|
||||
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
|
||||
void cblas_sgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array,
|
||||
|
|
|
@ -94,6 +94,10 @@ if (DYNAMIC_ARCH)
|
|||
endif ()
|
||||
endif ()
|
||||
|
||||
if (LOONGARCH64)
|
||||
set(DYNAMIC_CORE LOONGSONGENERIC LOONGSON2K1000 LOONGSON3R5)
|
||||
endif ()
|
||||
|
||||
if (EXISTS ${PROJECT_SOURCE_DIR}/config_kernel.h)
|
||||
message (FATAL_ERROR "Your build directory contains a file config_kernel.h, probably from a previous compilation with make. This will conflict with the cmake compilation and cause strange compiler errors - please remove the file before trying again")
|
||||
endif ()
|
||||
|
|
|
@ -61,21 +61,25 @@ if (${F_COMPILER} STREQUAL "GFORTRAN" OR ${F_COMPILER} STREQUAL "F95" OR CMAKE_F
|
|||
endif ()
|
||||
if (LOONGARCH64)
|
||||
if (BINARY64)
|
||||
CHECK_C_COMPILER_FLAG("-mabi=lp64d" COMPILER_SUPPORT_LP64D_ABI)
|
||||
if(COMPILER_SUPPORT_LP64D_ABI)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64d")
|
||||
else()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64")
|
||||
endif ()
|
||||
if (NOT CMAKE_Fortran_COMPILER_ID MATCHES "LLVMFlang.*")
|
||||
CHECK_C_COMPILER_FLAG("-mabi=lp64d" COMPILER_SUPPORT_LP64D_ABI)
|
||||
if(COMPILER_SUPPORT_LP64D_ABI)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64d")
|
||||
else()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64")
|
||||
endif ()
|
||||
endif ()
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
|
||||
endif ()
|
||||
else ()
|
||||
CHECK_C_COMPILER_FLAG("-mabi=ilp32d" COMPILER_SUPPORT_ILP32D_ABI)
|
||||
if(COMPILER_SUPPORT_ILP32D_ABI)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=ilp32d")
|
||||
else()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp32")
|
||||
if (NOT CMAKE_Fortran_COMPILER_ID MATCHES "LLVMFlang.*")
|
||||
CHECK_C_COMPILER_FLAG("-mabi=ilp32d" COMPILER_SUPPORT_ILP32D_ABI)
|
||||
if(COMPILER_SUPPORT_ILP32D_ABI)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=ilp32d")
|
||||
else()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp32")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
|
|
@ -9,5 +9,5 @@ Name: OpenBLAS
|
|||
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
|
||||
Version: @OpenBLAS_VERSION@
|
||||
URL: https://github.com/OpenMathLib/OpenBLAS
|
||||
Libs: @OpenMP_C_FLAGS@ -L${libdir} -l${libnameprefix}openblas${libnamesuffix}${libsuffix}
|
||||
Cflags: -I${includedir}
|
||||
Libs: -L${libdir} -l${libnameprefix}openblas${libnamesuffix}${libsuffix}
|
||||
Cflags: -I${includedir} @OpenMP_C_FLAGS@
|
||||
|
|
|
@ -1349,6 +1349,54 @@ endif ()
|
|||
"#define DTB_DEFAULT_ENTRIES 128\n"
|
||||
"#define DTB_SIZE 4096\n"
|
||||
"#define L2_ASSOCIATIVE 4\n")
|
||||
elseif ("${TCORE}" STREQUAL "LOONGSONGENERIC")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define DTB_DEFAULT_ENTRIES 64\n")
|
||||
set(SGEMM_UNROLL_M 2)
|
||||
set(SGEMM_UNROLL_N 8)
|
||||
set(DGEMM_UNROLL_M 2)
|
||||
set(DGEMM_UNROLL_N 8)
|
||||
set(CGEMM_UNROLL_M 1)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 1)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(CGEMM3M_UNROLL_M 2)
|
||||
set(CGEMM3M_UNROLL_N 8)
|
||||
set(ZGEMM3M_UNROLL_M 2)
|
||||
set(ZGEMM3M_UNROLL_N 8)
|
||||
elseif ("${TCORE}" STREQUAL "LOONGSON2K1000")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define DTB_DEFAULT_ENTRIES 64\n")
|
||||
set(HAVE_LSX 1)
|
||||
set(SGEMM_UNROLL_M 2)
|
||||
set(SGEMM_UNROLL_N 8)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(CGEMM3M_UNROLL_M 2)
|
||||
set(CGEMM3M_UNROLL_N 8)
|
||||
set(ZGEMM3M_UNROLL_M 8)
|
||||
set(ZGEMM3M_UNROLL_N 4)
|
||||
elseif ("${TCORE}" STREQUAL "LOONGSON3R5")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define DTB_DEFAULT_ENTRIES 64\n")
|
||||
set(HAVE_LASX 1)
|
||||
set(HAVE_LSX 1)
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 8)
|
||||
set(DGEMM_UNROLL_M 16)
|
||||
set(DGEMM_UNROLL_N 6)
|
||||
set(CGEMM_UNROLL_M 16)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 8)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(CGEMM3M_UNROLL_M 16)
|
||||
set(CGEMM3M_UNROLL_N 8)
|
||||
set(ZGEMM3M_UNROLL_M 16)
|
||||
set(ZGEMM3M_UNROLL_N 6)
|
||||
endif()
|
||||
set(SBGEMM_UNROLL_M 8)
|
||||
set(SBGEMM_UNROLL_N 4)
|
||||
|
|
|
@ -388,7 +388,7 @@ if (NEED_PIC)
|
|||
endif()
|
||||
endif ()
|
||||
|
||||
if (X86_64 OR ${CORE} STREQUAL POWER10 OR ARM64)
|
||||
if (X86_64 OR ${CORE} STREQUAL POWER10 OR ARM64 OR LOONGARCH64)
|
||||
set(SMALL_MATRIX_OPT TRUE)
|
||||
endif ()
|
||||
if (ARM64)
|
||||
|
@ -403,7 +403,7 @@ if (SMALL_MATRIX_OPT)
|
|||
endif ()
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
if (X86 OR X86_64 OR ARM64 OR POWER OR RISCV64)
|
||||
if (X86 OR X86_64 OR ARM64 OR POWER OR RISCV64 OR LOONGARCH64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
|
||||
if (DYNAMIC_OLDER)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_OLDER")
|
||||
|
|
|
@ -104,6 +104,8 @@ elseif(ARM)
|
|||
set(ARCH "arm")
|
||||
elseif(ARM64)
|
||||
set(ARCH "arm64")
|
||||
elseif(LOONGARCH64)
|
||||
set(ARCH "loongarch64")
|
||||
else()
|
||||
set(ARCH ${CMAKE_SYSTEM_PROCESSOR} CACHE STRING "Target Architecture")
|
||||
endif ()
|
||||
|
|
|
@ -281,9 +281,13 @@ REALNAME: ;\
|
|||
#define GNUSTACK
|
||||
#endif /* defined(__linux__) && defined(__ELF__) */
|
||||
|
||||
#ifdef __clang__
|
||||
#define EPILOGUE .end
|
||||
#else
|
||||
#define EPILOGUE \
|
||||
.end REALNAME ;\
|
||||
GNUSTACK
|
||||
#endif
|
||||
|
||||
#define PROFCODE
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2011-2020, The OpenBLAS Project
|
||||
Copyright (c) 2011-2024, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -32,53 +32,299 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
**********************************************************************************/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <sys/auxv.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <sys/auxv.h>
|
||||
|
||||
/* If LASX extension instructions supported,
|
||||
* using core LOONGSON3R5
|
||||
* If only LSX extension instructions supported,
|
||||
* using core LOONGSON2K1000
|
||||
* If neither LASX nor LSX extension instructions supported,
|
||||
* using core LOONGSONGENERIC (As far as I know, there is no such
|
||||
* CPU yet)
|
||||
*/
|
||||
#define CPU_LA64_GENERIC 0
|
||||
#define CPU_LA264 1
|
||||
#define CPU_LA364 2
|
||||
#define CPU_LA464 3
|
||||
#define CPU_LA664 4
|
||||
|
||||
#define CPU_GENERIC 0
|
||||
#define CPU_LOONGSON3R5 1
|
||||
#define CPU_LOONGSON2K1000 2
|
||||
#define CORE_LA64_GENERIC 0
|
||||
#define CORE_LA264 1
|
||||
#define CORE_LA464 2
|
||||
|
||||
#define LA_HWCAP_LSX (1U << 4)
|
||||
#define LA_HWCAP_LASX (1U << 5)
|
||||
|
||||
#define LOONGARCH_CFG0 0x00
|
||||
#define LOONGARCH_CFG2 0x02
|
||||
#define LOONGARCH_CFG10 0x10
|
||||
#define LOONGARCH_CFG11 0x11
|
||||
#define LOONGARCH_CFG12 0x12
|
||||
#define LOONGARCH_CFG13 0x13
|
||||
#define LOONGARCH_CFG14 0x14
|
||||
#define LASX_MASK 1<<7
|
||||
#define LSX_MASK 1<<6
|
||||
#define PRID_SERIES_MASK 0xf000
|
||||
#define PRID_SERIES_LA264 0xa000
|
||||
#define PRID_SERIES_LA364 0xb000
|
||||
#define PRID_SERIES_LA464 0xc000
|
||||
#define PRID_SERIES_LA664 0xd000
|
||||
|
||||
#define CACHE_INFO_L1_IU 0
|
||||
#define CACHE_INFO_L1_D 1
|
||||
#define CACHE_INFO_L2_IU 2
|
||||
#define CACHE_INFO_L2_D 3
|
||||
#define CACHE_INFO_L3_IU 4
|
||||
#define CACHE_INFO_L3_D 5
|
||||
#define L1_IU_PRESENT_MASK 0x0001
|
||||
#define L1_IU_UNITY_MASK 0x0002
|
||||
#define L1_D_PRESENT_MASK 0x0004
|
||||
#define L2_IU_PRESENT_MASK 0x0008
|
||||
#define L2_IU_UNITY_MASK 0x0010
|
||||
#define L2_D_PRESENT_MASK 0x0080
|
||||
#define L3_IU_PRESENT_MASK 0x0400
|
||||
#define L3_IU_UNITY_MASK 0x0800
|
||||
#define L3_D_PRESENT_MASK 0x4000
|
||||
#define CACHE_WAY_MINUS_1_MASK 0x0000ffff
|
||||
#define CACHE_INDEX_LOG2_MASK 0x00ff0000
|
||||
#define CACHE_LINESIZE_LOG2_MASK 0x7f000000
|
||||
|
||||
typedef struct {
|
||||
int size;
|
||||
int associative;
|
||||
int linesize;
|
||||
int unify;
|
||||
int present;
|
||||
} cache_info_t;
|
||||
|
||||
/* Using microarchitecture representation */
|
||||
static char *cpuname[] = {
|
||||
"LOONGSONGENERIC",
|
||||
"LOONGSON3R5",
|
||||
"LOONGSON2K1000"
|
||||
"LA64_GENERIC",
|
||||
"LA264", /* Loongson 64bit, 2-issue, Like 2K1000LA */
|
||||
"LA364", /* Loongson 64bit, 3-issue, Like 2K2000 */
|
||||
"LA464", /* Loongson 64bit, 4-issue, Like 3A5000, 3C5000L, 3C5000 and 3D5000 */
|
||||
"LA664" /* Loongson 64bit, 6-issue, Like 3A6000, 3C6000 and 3D6000 */
|
||||
};
|
||||
|
||||
static char *cpuname_lower[] = {
|
||||
"loongsongeneric",
|
||||
"loongson3r5",
|
||||
"loongson2k1000"
|
||||
"la64_generic",
|
||||
"la264",
|
||||
"la364",
|
||||
"la464",
|
||||
"la664"
|
||||
};
|
||||
|
||||
int detect(void) {
|
||||
#ifdef __linux
|
||||
static char *corename[] = {
|
||||
"LA64_GENERIC", /* Implies using scalar instructions for optimization */
|
||||
"LA264", /* Implies using LSX instructions for optimization */
|
||||
"LA464", /* Implies using LASX instructions for optimization */
|
||||
};
|
||||
|
||||
static char *corename_lower[] = {
|
||||
"la64_generic",
|
||||
"la264",
|
||||
"la464",
|
||||
};
|
||||
|
||||
/*
|
||||
* Obtain cache and processor identification
|
||||
* through the cpucfg command.
|
||||
*/
|
||||
static void get_cacheinfo(int type, cache_info_t *cacheinfo) {
|
||||
cache_info_t cache_info;
|
||||
memset(&cache_info, 0, sizeof(cache_info));
|
||||
uint32_t reg_10 = 0;
|
||||
__asm__ volatile (
|
||||
"cpucfg %0, %1 \n\t"
|
||||
: "+&r"(reg_10)
|
||||
: "r"(LOONGARCH_CFG10)
|
||||
);
|
||||
|
||||
switch (type) {
|
||||
case CACHE_INFO_L1_IU:
|
||||
if (reg_10 & L1_IU_PRESENT_MASK) {
|
||||
uint32_t reg_11 = 0;
|
||||
cache_info.present = reg_10 & L1_IU_PRESENT_MASK;
|
||||
cache_info.unify = reg_10 & L1_IU_UNITY_MASK;
|
||||
__asm__ volatile (
|
||||
"cpucfg %0, %1 \n\t"
|
||||
: "+&r"(reg_11)
|
||||
: "r"(LOONGARCH_CFG11)
|
||||
);
|
||||
cache_info.associative = (reg_11 & CACHE_WAY_MINUS_1_MASK) + 1;
|
||||
cache_info.linesize = 1 << ((reg_11 & CACHE_LINESIZE_LOG2_MASK) >> 24);
|
||||
cache_info.size = cache_info.associative * cache_info.linesize *
|
||||
(1 << ((reg_11 & CACHE_INDEX_LOG2_MASK) >> 16));
|
||||
}
|
||||
break;
|
||||
|
||||
case CACHE_INFO_L1_D:
|
||||
if (reg_10 & L1_D_PRESENT_MASK) {
|
||||
uint32_t reg_12 = 0;
|
||||
cache_info.present = reg_10 & L1_D_PRESENT_MASK;
|
||||
__asm__ volatile (
|
||||
"cpucfg %0, %1 \n\t"
|
||||
: "+&r"(reg_12)
|
||||
: "r"(LOONGARCH_CFG12)
|
||||
);
|
||||
cache_info.associative = (reg_12 & CACHE_WAY_MINUS_1_MASK) + 1;
|
||||
cache_info.linesize = 1 << ((reg_12 & CACHE_LINESIZE_LOG2_MASK) >> 24);
|
||||
cache_info.size = cache_info.associative * cache_info.linesize *
|
||||
(1 << ((reg_12 & CACHE_INDEX_LOG2_MASK) >> 16));
|
||||
}
|
||||
break;
|
||||
|
||||
case CACHE_INFO_L2_IU:
|
||||
if (reg_10 & L2_IU_PRESENT_MASK) {
|
||||
uint32_t reg_13 = 0;
|
||||
cache_info.present = reg_10 & L2_IU_PRESENT_MASK;
|
||||
cache_info.unify = reg_10 & L2_IU_UNITY_MASK;
|
||||
__asm__ volatile (
|
||||
"cpucfg %0, %1 \n\t"
|
||||
: "+&r"(reg_13)
|
||||
: "r"(LOONGARCH_CFG13)
|
||||
);
|
||||
cache_info.associative = (reg_13 & CACHE_WAY_MINUS_1_MASK) + 1;
|
||||
cache_info.linesize = 1 << ((reg_13 & CACHE_LINESIZE_LOG2_MASK) >> 24);
|
||||
cache_info.size = cache_info.associative * cache_info.linesize *
|
||||
(1 << ((reg_13 & CACHE_INDEX_LOG2_MASK) >> 16));
|
||||
}
|
||||
break;
|
||||
|
||||
case CACHE_INFO_L2_D:
|
||||
if (reg_10 & L2_D_PRESENT_MASK) {
|
||||
cache_info.present = reg_10 & L2_D_PRESENT_MASK;
|
||||
// No date fetch
|
||||
}
|
||||
break;
|
||||
|
||||
case CACHE_INFO_L3_IU:
|
||||
if (reg_10 & L3_IU_PRESENT_MASK) {
|
||||
uint32_t reg_14 = 0;
|
||||
cache_info.present = reg_10 & L3_IU_PRESENT_MASK;
|
||||
cache_info.unify = reg_10 & L3_IU_UNITY_MASK;
|
||||
__asm__ volatile (
|
||||
"cpucfg %0, %1 \n\t"
|
||||
: "+&r"(reg_14)
|
||||
: "r"(LOONGARCH_CFG14)
|
||||
);
|
||||
cache_info.associative = (reg_14 & CACHE_WAY_MINUS_1_MASK) + 1;
|
||||
cache_info.linesize = 1 << ((reg_14 & CACHE_LINESIZE_LOG2_MASK) >> 24);
|
||||
cache_info.size = cache_info.associative * cache_info.linesize *
|
||||
(1 << ((reg_14 & CACHE_INDEX_LOG2_MASK) >> 16));
|
||||
}
|
||||
break;
|
||||
|
||||
case CACHE_INFO_L3_D:
|
||||
if (reg_10 & L3_D_PRESENT_MASK) {
|
||||
cache_info.present = reg_10 & L3_D_PRESENT_MASK;
|
||||
// No data fetch
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
*cacheinfo = cache_info;
|
||||
}
|
||||
|
||||
static uint32_t get_prid() {
|
||||
uint32_t reg = 0;
|
||||
__asm__ volatile (
|
||||
"cpucfg %0, %1 \n\t"
|
||||
: "+&r"(reg)
|
||||
: "r"(LOONGARCH_CFG0)
|
||||
);
|
||||
return reg;
|
||||
}
|
||||
|
||||
static void get_cpucount(uint32_t *count) {
|
||||
uint32_t num = 0;
|
||||
FILE *f = fopen("/proc/cpuinfo", "r");
|
||||
if (!f) return;
|
||||
char buf[200];
|
||||
while (fgets(buf, sizeof(buf), f))
|
||||
{
|
||||
if (!strncmp("processor", buf, 9))
|
||||
num ++;
|
||||
}
|
||||
fclose(f);
|
||||
*count = num;
|
||||
}
|
||||
|
||||
/* Detect whether the OS supports the LASX instruction set */
|
||||
static int os_support_lasx() {
|
||||
int hwcap = (int)getauxval(AT_HWCAP);
|
||||
|
||||
if (hwcap & LA_HWCAP_LASX)
|
||||
return CPU_LOONGSON3R5;
|
||||
else if (hwcap & LA_HWCAP_LSX)
|
||||
return CPU_LOONGSON2K1000;
|
||||
return 1;
|
||||
else
|
||||
return CPU_GENERIC;
|
||||
#endif
|
||||
return CPU_GENERIC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Detect whether the OS supports the LSX instruction set */
|
||||
static int os_support_lsx() {
|
||||
int hwcap = (int)getauxval(AT_HWCAP);
|
||||
|
||||
if (hwcap & LA_HWCAP_LSX)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_coretype(void) {
|
||||
uint32_t prid = get_prid();
|
||||
switch (prid & PRID_SERIES_MASK) {
|
||||
case (PRID_SERIES_LA464):
|
||||
case (PRID_SERIES_LA664):
|
||||
if (os_support_lasx())
|
||||
return CORE_LA464;
|
||||
else if (os_support_lsx())
|
||||
return CORE_LA264;
|
||||
else
|
||||
return CORE_LA64_GENERIC;
|
||||
break;
|
||||
|
||||
case (PRID_SERIES_LA264):
|
||||
case (PRID_SERIES_LA364):
|
||||
if (os_support_lsx())
|
||||
return CORE_LA264;
|
||||
else
|
||||
return CORE_LA64_GENERIC;
|
||||
break;
|
||||
|
||||
default:
|
||||
return CORE_LA64_GENERIC;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int get_cputype(void) {
|
||||
uint32_t prid = get_prid();
|
||||
switch (prid & PRID_SERIES_MASK) {
|
||||
case (PRID_SERIES_LA264):
|
||||
return CPU_LA264;
|
||||
break;
|
||||
|
||||
case (PRID_SERIES_LA364):
|
||||
return CPU_LA364;
|
||||
break;
|
||||
|
||||
case (PRID_SERIES_LA464):
|
||||
return CPU_LA464;
|
||||
break;
|
||||
|
||||
case (PRID_SERIES_LA664):
|
||||
return CPU_LA664;
|
||||
break;
|
||||
|
||||
default:
|
||||
return CPU_LA64_GENERIC;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
char *get_corename(void) {
|
||||
return cpuname[detect()];
|
||||
return corename[get_coretype()];
|
||||
}
|
||||
|
||||
void get_libname(void){
|
||||
printf("%s", corename_lower[get_coretype()]);
|
||||
}
|
||||
|
||||
void get_architecture(void) {
|
||||
|
@ -86,8 +332,7 @@ void get_architecture(void) {
|
|||
}
|
||||
|
||||
void get_subarchitecture(void) {
|
||||
int d = detect();
|
||||
printf("%s", cpuname[d]);
|
||||
printf("%s", cpuname[get_cputype()]);
|
||||
}
|
||||
|
||||
void get_subdirname(void) {
|
||||
|
@ -95,50 +340,69 @@ void get_subdirname(void) {
|
|||
}
|
||||
|
||||
void get_cpuconfig(void) {
|
||||
uint32_t hwcaps = 0;
|
||||
int d = detect();
|
||||
cache_info_t info;
|
||||
uint32_t num_cores = 0;
|
||||
|
||||
switch (d) {
|
||||
case CPU_LOONGSON3R5:
|
||||
printf("#define LOONGSON3R5\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 16\n");
|
||||
break;
|
||||
printf("#define %s\n", corename[get_coretype()]); // Core name
|
||||
|
||||
case CPU_LOONGSON2K1000:
|
||||
printf("#define LOONGSON2K1000\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L2_SIZE 262144\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 16\n");
|
||||
break;
|
||||
printf("#define CPU_NAME %s\n", cpuname[get_cputype()]); // Cpu microarchitecture name
|
||||
|
||||
default:
|
||||
printf("#define LOONGSONGENERIC\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L2_SIZE 262144\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 16\n");
|
||||
break;
|
||||
get_cacheinfo(CACHE_INFO_L1_IU, &info);
|
||||
if (info.present) {
|
||||
if (info.unify) { // Unified cache, without distinguishing between instructions and data
|
||||
printf("#define L1_SIZE %d\n", info.size);
|
||||
printf("#define L1_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define L1_LINESIZE %d\n", info.linesize);
|
||||
} else {
|
||||
printf("#define L1_CODE_SIZE %d\n", info.size);
|
||||
printf("#define L1_CODE_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define L1_CODE_LINESIZE %d\n", info.linesize);
|
||||
}
|
||||
}
|
||||
|
||||
hwcaps = (uint32_t)getauxval( AT_HWCAP );
|
||||
if (hwcaps & LA_HWCAP_LSX) printf("#define HAVE_LSX\n");
|
||||
if (hwcaps & LA_HWCAP_LASX) printf("#define HAVE_LASX\n");
|
||||
}
|
||||
if (!info.unify) {
|
||||
get_cacheinfo(CACHE_INFO_L1_D, &info);
|
||||
if (info.present) {
|
||||
printf("#define L1_DATA_SIZE %d\n", info.size);
|
||||
printf("#define L1_DATA_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define L1_DATA_LINESIZE %d\n", info.linesize);
|
||||
}
|
||||
}
|
||||
|
||||
void get_libname(void){
|
||||
int d = detect();
|
||||
printf("%s", cpuname_lower[d]);
|
||||
get_cacheinfo(CACHE_INFO_L2_IU, &info);
|
||||
if (info.present > 0) {
|
||||
if (info.unify) {
|
||||
printf("#define L2_SIZE %d\n", info.size);
|
||||
printf("#define L2_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define L2_LINESIZE %d\n", info.linesize);
|
||||
} else {
|
||||
printf("#define L2_CODE_SIZE %d\n", info.size);
|
||||
printf("#define L2_CODE_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define L2_CODE_LINESIZE %d\n", info.linesize);
|
||||
}
|
||||
}
|
||||
|
||||
get_cacheinfo(CACHE_INFO_L3_IU, &info);
|
||||
if (info.present > 0) {
|
||||
if (info.unify) {
|
||||
printf("#define L3_SIZE %d\n", info.size);
|
||||
printf("#define L3_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define L3_LINESIZE %d\n", info.linesize);
|
||||
} else {
|
||||
printf("#define L3_CODE_SIZE %d\n", info.size);
|
||||
printf("#define L3_CODE_ASSOCIATIVE %d\n", info.associative);
|
||||
printf("#define L3_CODE_LINESIZE %d\n", info.linesize);
|
||||
}
|
||||
}
|
||||
|
||||
if(os_support_lsx) printf("#define HAVE_LSX\n");
|
||||
if(os_support_lasx) printf("#define HAVE_LASX\n");
|
||||
|
||||
get_cpucount(&num_cores);
|
||||
if (num_cores)
|
||||
printf("#define NUM_CORES %d\n", num_cores);
|
||||
|
||||
//TODO: It’s unclear what this entry represents, but it is indeed necessary.
|
||||
//It has been set based on reference to other platforms.
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
}
|
||||
|
|
|
@ -54,6 +54,8 @@ if (DYNAMIC_ARCH)
|
|||
list(APPEND COMMON_SOURCES dynamic_power.c)
|
||||
elseif (RISCV64)
|
||||
list(APPEND COMMON_SOURCES dynamic_riscv64.c detect_riscv64.c)
|
||||
elseif (LOONGARCH64)
|
||||
list(APPEND COMMON_SOURCES dynamic_loongarch64.c)
|
||||
else ()
|
||||
list(APPEND COMMON_SOURCES dynamic.c)
|
||||
endif ()
|
||||
|
|
|
@ -1082,7 +1082,7 @@ if (buffer == NULL) {
|
|||
}
|
||||
|
||||
|
||||
//For target LOONGSON3R5, applying an offset to the buffer is essential
|
||||
//For LOONGARCH64, applying an offset to the buffer is essential
|
||||
//for minimizing cache conflicts and optimizing performance.
|
||||
#if defined(ARCH_LOONGARCH64) && !defined(NO_AFFINITY)
|
||||
if (sa == NULL) sa = (void *)((BLASLONG)buffer + (WhereAmI() & 0xf) * GEMM_OFFSET_A);
|
||||
|
|
|
@ -28,25 +28,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include <sys/auxv.h>
|
||||
#include "common.h"
|
||||
|
||||
extern gotoblas_t gotoblas_LOONGSON3R5;
|
||||
extern gotoblas_t gotoblas_LOONGSON2K1000;
|
||||
extern gotoblas_t gotoblas_LOONGSONGENERIC;
|
||||
#define NUM_CORETYPES 6
|
||||
#define LOONGARCH_CFG0 0x00
|
||||
#define LA_HWCAP_LSX (1U << 4)
|
||||
#define LA_HWCAP_LASX (1U << 5)
|
||||
#define PRID_SERIES_MASK 0xf000
|
||||
#define PRID_SERIES_LA264 0xa000
|
||||
#define PRID_SERIES_LA364 0xb000
|
||||
#define PRID_SERIES_LA464 0xc000
|
||||
#define PRID_SERIES_LA664 0xd000
|
||||
|
||||
extern gotoblas_t gotoblas_LA64_GENERIC;
|
||||
extern gotoblas_t gotoblas_LA264;
|
||||
extern gotoblas_t gotoblas_LA464;
|
||||
|
||||
extern void openblas_warning(int verbose, const char * msg);
|
||||
|
||||
#define NUM_CORETYPES 3
|
||||
|
||||
static char *corename[] = {
|
||||
"loongson3r5",
|
||||
"loongson2k1000",
|
||||
"la64_generic",
|
||||
"la264",
|
||||
"la464",
|
||||
"loongsongeneric",
|
||||
"loongson2k1000",
|
||||
"loongson3r5",
|
||||
"unknown"
|
||||
};
|
||||
|
||||
char *gotoblas_corename(void) {
|
||||
if (gotoblas == &gotoblas_LOONGSON3R5) return corename[0];
|
||||
if (gotoblas == &gotoblas_LOONGSON2K1000) return corename[1];
|
||||
if (gotoblas == &gotoblas_LOONGSONGENERIC) return corename[2];
|
||||
if (gotoblas == &gotoblas_LA64_GENERIC) return corename[0];
|
||||
if (gotoblas == &gotoblas_LA264) return corename[1];
|
||||
if (gotoblas == &gotoblas_LA464) return corename[2];
|
||||
return corename[NUM_CORETYPES];
|
||||
}
|
||||
|
||||
|
@ -66,27 +77,78 @@ static gotoblas_t *force_coretype(char *coretype) {
|
|||
|
||||
switch (found)
|
||||
{
|
||||
case 0: return (&gotoblas_LOONGSON3R5);
|
||||
case 1: return (&gotoblas_LOONGSON2K1000);
|
||||
case 2: return (&gotoblas_LOONGSONGENERIC);
|
||||
case 0: return (&gotoblas_LA64_GENERIC);
|
||||
case 1: return (&gotoblas_LA264);
|
||||
case 2: return (&gotoblas_LA464);
|
||||
case 3: return (&gotoblas_LA64_GENERIC);
|
||||
case 4: return (&gotoblas_LA264);
|
||||
case 5: return (&gotoblas_LA464);
|
||||
}
|
||||
snprintf(message, 128, "Core not found: %s\n", coretype);
|
||||
openblas_warning(1, message);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define LA_HWCAP_LSX (1U << 4)
|
||||
#define LA_HWCAP_LASX (1U << 5)
|
||||
|
||||
static gotoblas_t *get_coretype(void) {
|
||||
int hwcap = (int)getauxval(AT_HWCAP);
|
||||
/* Detect whether the OS supports the LASX instruction set */
|
||||
static int os_support_lasx() {
|
||||
int hwcap = (int)getauxval(AT_HWCAP);
|
||||
|
||||
if (hwcap & LA_HWCAP_LASX)
|
||||
return &gotoblas_LOONGSON3R5;
|
||||
else if (hwcap & LA_HWCAP_LSX)
|
||||
return &gotoblas_LOONGSON2K1000;
|
||||
return 1;
|
||||
else
|
||||
return &gotoblas_LOONGSONGENERIC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Detect whether the OS supports the LSX instruction set */
|
||||
static int os_support_lsx() {
|
||||
int hwcap = (int)getauxval(AT_HWCAP);
|
||||
|
||||
if (hwcap & LA_HWCAP_LSX)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t get_prid() {
|
||||
uint32_t reg = 0;
|
||||
__asm__ volatile (
|
||||
"cpucfg %0, %1 \n\t"
|
||||
: "+&r"(reg)
|
||||
: "r"(LOONGARCH_CFG0)
|
||||
);
|
||||
return reg;
|
||||
}
|
||||
|
||||
/* Select core at runtime based on the
|
||||
* cpu name and SIMD instructions supported
|
||||
* by the system
|
||||
*/
|
||||
static gotoblas_t *get_coretype(void) {
|
||||
uint32_t prid = get_prid();
|
||||
switch (prid & PRID_SERIES_MASK) {
|
||||
case (PRID_SERIES_LA464):
|
||||
case (PRID_SERIES_LA664):
|
||||
if (os_support_lasx())
|
||||
return &gotoblas_LA464;
|
||||
else if (os_support_lsx())
|
||||
return &gotoblas_LA264;
|
||||
else
|
||||
return &gotoblas_LA64_GENERIC;
|
||||
break;
|
||||
|
||||
case (PRID_SERIES_LA264):
|
||||
case (PRID_SERIES_LA364):
|
||||
if (os_support_lsx())
|
||||
return &gotoblas_LA264;
|
||||
else
|
||||
return &gotoblas_LA64_GENERIC;
|
||||
break;
|
||||
|
||||
default:
|
||||
return &gotoblas_LA64_GENERIC;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void gotoblas_dynamic_init(void) {
|
||||
|
|
|
@ -752,7 +752,7 @@ int get_L3_size() {
|
|||
}
|
||||
|
||||
void blas_set_parameter(void){
|
||||
#if defined(LOONGSON3R5)
|
||||
#if defined(LA464)
|
||||
int L3_size = get_L3_size();
|
||||
#ifdef SMP
|
||||
if(blas_num_threads == 1){
|
||||
|
|
99
getarch.c
99
getarch.c
|
@ -135,11 +135,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
/* #define FORCE_CELL */
|
||||
/* #define FORCE_MIPS64_GENERIC */
|
||||
/* #define FORCE_SICORTEX */
|
||||
/* #define FORCE_LOONGSON3R3 */
|
||||
/* #define FORCE_LOONGSON3R4 */
|
||||
/* #define FORCE_LOONGSON3R3 */
|
||||
/* #define FORCE_LOONGSON3R4 */
|
||||
/* #define FORCE_LOONGSON3R5 */
|
||||
/* #define FORCE_LOONGSON2K1000 */
|
||||
/* #define FORCE_LOONGSONGENERIC */
|
||||
/* #define FORCE_LA64_GENERIC */
|
||||
/* #define FORCE_LA264 */
|
||||
/* #define FORCE_LA464 */
|
||||
/* #define FORCE_I6400 */
|
||||
/* #define FORCE_P6600 */
|
||||
/* #define FORCE_P5600 */
|
||||
|
@ -153,7 +156,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
/* #define FORCE_EV5 */
|
||||
/* #define FORCE_EV6 */
|
||||
/* #define FORCE_CSKY */
|
||||
/* #define FORCE_CK860FV */
|
||||
/* #define FORCE_CK860FV */
|
||||
/* #define FORCE_GENERIC */
|
||||
|
||||
#ifdef FORCE_P2
|
||||
|
@ -979,46 +982,76 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_LOONGSON3R5
|
||||
#if defined(FORCE_LA464) || defined(FORCE_LOONGSON3R5)
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "LOONGARCH"
|
||||
#define SUBARCHITECTURE "LOONGSON3R5"
|
||||
#ifdef NO_LASX
|
||||
#ifdef NO_LSX
|
||||
#define SUBARCHITECTURE "LA64_GENERIC"
|
||||
#define SUBDIRNAME "loongarch64"
|
||||
#define ARCHCONFIG "-DLOONGSON3R5 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 -DHAVE_MSA"
|
||||
#define LIBNAME "loongson3r5"
|
||||
#define CORENAME "LOONGSON3R5"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_LOONGSON2K1000
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "LOONGARCH"
|
||||
#define SUBARCHITECTURE "LOONGSON2K1000"
|
||||
#define SUBDIRNAME "loongarch64"
|
||||
#define ARCHCONFIG "-DLOONGSON2K1000 " \
|
||||
#define ARCHCONFIG "-DLA64_GENERIC " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 -DHAVE_MSA"
|
||||
#define LIBNAME "loongson2k1000"
|
||||
#define CORENAME "LOONGSON2K1000"
|
||||
"-DDTB_DEFAULT_ENTRIES=64 "
|
||||
#define LIBNAME "la64_generic"
|
||||
#define CORENAME "LA64_GENERIC"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_LOONGSONGENERIC
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "LOONGARCH"
|
||||
#define SUBARCHITECTURE "LOONGSONGENERIC"
|
||||
#define SUBARCHITECTURE "LA264"
|
||||
#define SUBDIRNAME "loongarch64"
|
||||
#define ARCHCONFIG "-DLOONGSONGENERIC " \
|
||||
#define ARCHCONFIG "-DLA264 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 -DHAVE_MSA"
|
||||
#define LIBNAME "loongsongeneric"
|
||||
#define CORENAME "LOONGSONGENERIC"
|
||||
"-DDTB_DEFAULT_ENTRIES=64 "
|
||||
#define LIBNAME "la264"
|
||||
#define CORENAME "LA264"
|
||||
#endif
|
||||
#else
|
||||
#define SUBARCHITECTURE "LA464"
|
||||
#define SUBDIRNAME "loongarch64"
|
||||
#define ARCHCONFIG "-DLA464 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 "
|
||||
#define LIBNAME "la464"
|
||||
#define CORENAME "LA464"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(FORCE_LA264) || defined(FORCE_LOONGSON2K1000)
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "LOONGARCH"
|
||||
#ifdef NO_LSX
|
||||
#define SUBARCHITECTURE "LA64_GENERIC"
|
||||
#define SUBDIRNAME "loongarch64"
|
||||
#define ARCHCONFIG "-DLA64_GENERIC " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 "
|
||||
#define LIBNAME "la64_generic"
|
||||
#define CORENAME "LA64_GENERIC"
|
||||
#else
|
||||
#define SUBARCHITECTURE "LA264"
|
||||
#define SUBDIRNAME "loongarch64"
|
||||
#define ARCHCONFIG "-DLA264 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 "
|
||||
#define LIBNAME "la264"
|
||||
#define CORENAME "LA264"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(FORCE_LA64_GENERIC) || defined(FORCE_LOONGSONGENERIC)
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "LOONGARCH"
|
||||
#define SUBARCHITECTURE "LA64_GENERIC"
|
||||
#define SUBDIRNAME "loongarch64"
|
||||
#define ARCHCONFIG "-DLA64_GENERIC " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 "
|
||||
#define LIBNAME "la64_generic"
|
||||
#define CORENAME "LA64_GENERIC"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_I6400
|
||||
|
|
|
@ -572,7 +572,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
|
|||
|
||||
buffer = (XFLOAT *)blas_memory_alloc(0);
|
||||
|
||||
//For target LOONGSON3R5, applying an offset to the buffer is essential
|
||||
//For LOONGARCH64, applying an offset to the buffer is essential
|
||||
//for minimizing cache conflicts and optimizing performance.
|
||||
#if defined(ARCH_LOONGARCH64) && !defined(NO_AFFINITY)
|
||||
sa = (XFLOAT *)((BLASLONG)buffer + (WhereAmI() & 0xf) * GEMM_OFFSET_A);
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
include $(KERNELDIR)/KERNEL
|
||||
|
||||
STRMMKERNEL = gemm_kernel.S
|
||||
DTRMMKERNEL = gemm_kernel.S
|
||||
CTRMMKERNEL = zgemm_kernel.S
|
||||
ZTRMMKERNEL = zgemm_kernel.S
|
|
@ -1086,7 +1086,7 @@ static void init_parameter(void) {
|
|||
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
|
||||
#endif
|
||||
|
||||
#if defined(LOONGSON3R5)
|
||||
#if defined(LA464)
|
||||
int L3_size = get_L3_size();
|
||||
#ifdef SMP
|
||||
if(blas_num_threads == 1){
|
||||
|
|
|
@ -4,4 +4,4 @@ Version: ${version}
|
|||
URL: https://github.com/xianyi/OpenBLAS
|
||||
Libs: -L${libdir} -l${libprefix}openblas${libnamesuffix}
|
||||
Libs.private: ${extralib}
|
||||
Cflags: -I${includedir}
|
||||
Cflags: -I${includedir} ${omp_opt}
|
||||
|
|
6
param.h
6
param.h
|
@ -2838,7 +2838,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define SYMV_P 16
|
||||
#endif
|
||||
|
||||
#if defined (LOONGSON3R5)
|
||||
#if defined (LA464)
|
||||
#define SNUMOPT 2
|
||||
#define DNUMOPT 2
|
||||
|
||||
|
@ -2891,7 +2891,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define SYMV_P 16
|
||||
#endif
|
||||
|
||||
#ifdef LOONGSON2K1000
|
||||
#ifdef LA264
|
||||
#define GEMM_DEFAULT_OFFSET_A 0
|
||||
#define GEMM_DEFAULT_OFFSET_B 0
|
||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
|
||||
|
@ -2926,7 +2926,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define SYMV_P 16
|
||||
#endif
|
||||
|
||||
#ifdef LOONGSONGENERIC
|
||||
#ifdef LA64_GENERIC
|
||||
#define GEMM_DEFAULT_OFFSET_A 0
|
||||
#define GEMM_DEFAULT_OFFSET_B 0
|
||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
|
||||
|
|
Loading…
Reference in New Issue