Merge branch 'xianyi:develop' into cirrusjobs
This commit is contained in:
commit
d6a7809504
1
c_check
1
c_check
|
@ -40,6 +40,7 @@ bn=`basename \"$compiler_name\"`
|
||||||
case "$bn" in
|
case "$bn" in
|
||||||
*-*) if [ "$bn" != '-' ]; then
|
*-*) if [ "$bn" != '-' ]; then
|
||||||
cross_suffix="$cross_suffix${bn%-*}-"
|
cross_suffix="$cross_suffix${bn%-*}-"
|
||||||
|
cross_suffix=`echo $cross_suffix|sed -e 's/ -$//'`
|
||||||
fi
|
fi
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project. */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -45,6 +46,7 @@
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int dtb_entries;
|
int dtb_entries;
|
||||||
|
int switch_ratio;
|
||||||
int offsetA, offsetB, align;
|
int offsetA, offsetB, align;
|
||||||
|
|
||||||
#if BUILD_BFLOAT16 == 1
|
#if BUILD_BFLOAT16 == 1
|
||||||
|
|
|
@ -267,9 +267,9 @@ int detect(void)
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
sysctlbyname("hw.cpufamily",&value,&length,NULL,0);
|
sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0);
|
||||||
if (value ==131287967|| value == 458787763 ) return CPU_VORTEX; //A12/M1
|
if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1
|
||||||
if (value == 3660830781) return CPU_VORTEX; //A15/M2
|
if (value64 == 3660830781) return CPU_VORTEX; //A15/M2
|
||||||
#endif
|
#endif
|
||||||
return CPU_ARMV8;
|
return CPU_ARMV8;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project. */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -44,10 +45,6 @@
|
||||||
#define DIVIDE_RATE 2
|
#define DIVIDE_RATE 2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef SWITCH_RATIO
|
|
||||||
#define SWITCH_RATIO 2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//The array of job_t may overflow the stack.
|
//The array of job_t may overflow the stack.
|
||||||
//Instead, use malloc to alloc job_t.
|
//Instead, use malloc to alloc job_t.
|
||||||
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
|
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
|
||||||
|
@ -1015,6 +1012,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
BLASLONG divN, divT;
|
BLASLONG divN, divT;
|
||||||
int mode;
|
int mode;
|
||||||
|
|
||||||
|
#if defined(DYNAMIC_ARCH)
|
||||||
|
int switch_ratio = gotoblas->switch_ratio;
|
||||||
|
#else
|
||||||
|
int switch_ratio = SWITCH_RATIO;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (range_m) {
|
if (range_m) {
|
||||||
BLASLONG m_from = *(((BLASLONG *)range_m) + 0);
|
BLASLONG m_from = *(((BLASLONG *)range_m) + 0);
|
||||||
BLASLONG m_to = *(((BLASLONG *)range_m) + 1);
|
BLASLONG m_to = *(((BLASLONG *)range_m) + 1);
|
||||||
|
@ -1030,7 +1033,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if ((args -> m < nthreads * SWITCH_RATIO) || (args -> n < nthreads * SWITCH_RATIO)) {
|
if ((args -> m < nthreads * switch_ratio) || (args -> n < nthreads * switch_ratio)) {
|
||||||
GEMM3M_LOCAL(args, range_m, range_n, sa, sb, 0);
|
GEMM3M_LOCAL(args, range_m, range_n, sa, sb, 0);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1038,7 +1041,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
divT = nthreads;
|
divT = nthreads;
|
||||||
divN = 1;
|
divN = 1;
|
||||||
|
|
||||||
while ((GEMM3M_P * divT > m * SWITCH_RATIO) && (divT > 1)) {
|
while ((GEMM3M_P * divT > m * switch_ratio) && (divT > 1)) {
|
||||||
do {
|
do {
|
||||||
divT --;
|
divT --;
|
||||||
divN = 1;
|
divN = 1;
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project. */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -44,10 +45,6 @@
|
||||||
#define DIVIDE_RATE 2
|
#define DIVIDE_RATE 2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef SWITCH_RATIO
|
|
||||||
#define SWITCH_RATIO 2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//The array of job_t may overflow the stack.
|
//The array of job_t may overflow the stack.
|
||||||
//Instead, use malloc to alloc job_t.
|
//Instead, use malloc to alloc job_t.
|
||||||
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
|
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
|
||||||
|
@ -528,7 +525,13 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
int mode, mask;
|
int mode, mask;
|
||||||
double dnum, di, dinum;
|
double dnum, di, dinum;
|
||||||
|
|
||||||
if ((nthreads == 1) || (args -> n < nthreads * SWITCH_RATIO)) {
|
#if defined(DYNAMIC_ARCH)
|
||||||
|
int switch_ratio = gotoblas->switch_ratio;
|
||||||
|
#else
|
||||||
|
int switch_ratio = SWITCH_RATIO;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((nthreads == 1) || (args->n < nthreads * switch_ratio)) {
|
||||||
SYRK_LOCAL(args, range_m, range_n, sa, sb, 0);
|
SYRK_LOCAL(args, range_m, range_n, sa, sb, 0);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project. */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -44,10 +45,6 @@
|
||||||
#define DIVIDE_RATE 2
|
#define DIVIDE_RATE 2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef SWITCH_RATIO
|
|
||||||
#define SWITCH_RATIO 2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef GEMM_PREFERED_SIZE
|
#ifndef GEMM_PREFERED_SIZE
|
||||||
#define GEMM_PREFERED_SIZE 1
|
#define GEMM_PREFERED_SIZE 1
|
||||||
#endif
|
#endif
|
||||||
|
@ -577,6 +574,11 @@ InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
|
||||||
BLASLONG width, i, j, k, js;
|
BLASLONG width, i, j, k, js;
|
||||||
BLASLONG m, n, n_from, n_to;
|
BLASLONG m, n, n_from, n_to;
|
||||||
int mode;
|
int mode;
|
||||||
|
#if defined(DYNAMIC_ARCH)
|
||||||
|
int switch_ratio = gotoblas->switch_ratio;
|
||||||
|
#else
|
||||||
|
int switch_ratio = SWITCH_RATIO;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Get execution mode */
|
/* Get execution mode */
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
|
@ -698,8 +700,8 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
|
||||||
num_parts = 0;
|
num_parts = 0;
|
||||||
while (n > 0){
|
while (n > 0){
|
||||||
width = blas_quickdivide(n + nthreads - num_parts - 1, nthreads - num_parts);
|
width = blas_quickdivide(n + nthreads - num_parts - 1, nthreads - num_parts);
|
||||||
if (width < SWITCH_RATIO) {
|
if (width < switch_ratio) {
|
||||||
width = SWITCH_RATIO;
|
width = switch_ratio;
|
||||||
}
|
}
|
||||||
width = round_up(n, width, GEMM_PREFERED_SIZE);
|
width = round_up(n, width, GEMM_PREFERED_SIZE);
|
||||||
|
|
||||||
|
@ -746,6 +748,11 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF
|
||||||
BLASLONG m = args -> m;
|
BLASLONG m = args -> m;
|
||||||
BLASLONG n = args -> n;
|
BLASLONG n = args -> n;
|
||||||
BLASLONG nthreads_m, nthreads_n;
|
BLASLONG nthreads_m, nthreads_n;
|
||||||
|
#if defined(DYNAMIC_ARCH)
|
||||||
|
int switch_ratio = gotoblas->switch_ratio;
|
||||||
|
#else
|
||||||
|
int switch_ratio = SWITCH_RATIO;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Get dimensions from index ranges if available */
|
/* Get dimensions from index ranges if available */
|
||||||
if (range_m) {
|
if (range_m) {
|
||||||
|
@ -755,21 +762,21 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF
|
||||||
n = range_n[1] - range_n[0];
|
n = range_n[1] - range_n[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Partitions in m should have at least SWITCH_RATIO rows */
|
/* Partitions in m should have at least switch_ratio rows */
|
||||||
if (m < 2 * SWITCH_RATIO) {
|
if (m < 2 * switch_ratio) {
|
||||||
nthreads_m = 1;
|
nthreads_m = 1;
|
||||||
} else {
|
} else {
|
||||||
nthreads_m = args -> nthreads;
|
nthreads_m = args -> nthreads;
|
||||||
while (m < nthreads_m * SWITCH_RATIO) {
|
while (m < nthreads_m * switch_ratio) {
|
||||||
nthreads_m = nthreads_m / 2;
|
nthreads_m = nthreads_m / 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Partitions in n should have at most SWITCH_RATIO * nthreads_m columns */
|
/* Partitions in n should have at most switch_ratio * nthreads_m columns */
|
||||||
if (n < SWITCH_RATIO * nthreads_m) {
|
if (n < switch_ratio * nthreads_m) {
|
||||||
nthreads_n = 1;
|
nthreads_n = 1;
|
||||||
} else {
|
} else {
|
||||||
nthreads_n = (n + SWITCH_RATIO * nthreads_m - 1) / (SWITCH_RATIO * nthreads_m);
|
nthreads_n = (n + switch_ratio * nthreads_m - 1) / (switch_ratio * nthreads_m);
|
||||||
if (nthreads_m * nthreads_n > args -> nthreads) {
|
if (nthreads_m * nthreads_n > args -> nthreads) {
|
||||||
nthreads_n = blas_quickdivide(args -> nthreads, nthreads_m);
|
nthreads_n = blas_quickdivide(args -> nthreads, nthreads_m);
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define pCRow3 x15
|
#define pCRow3 x15
|
||||||
#define pA x16
|
#define pA x16
|
||||||
#define alphaR w17
|
#define alphaR w17
|
||||||
#define alphaI w18
|
#define alphaI w19
|
||||||
|
|
||||||
#define alpha0_R s10
|
#define alpha0_R s10
|
||||||
#define alphaV0_R v10.s[0]
|
#define alphaV0_R v10.s[0]
|
||||||
|
|
|
@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define pCRow3 x15
|
#define pCRow3 x15
|
||||||
#define pA x16
|
#define pA x16
|
||||||
#define alphaR w17
|
#define alphaR w17
|
||||||
#define alphaI w18
|
#define alphaI w19
|
||||||
|
|
||||||
#define alpha0_R s10
|
#define alpha0_R s10
|
||||||
#define alphaV0_R v10.s[0]
|
#define alphaV0_R v10.s[0]
|
||||||
|
|
|
@ -49,10 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define pCRow3 x15
|
#define pCRow3 x15
|
||||||
#define pA x16
|
#define pA x16
|
||||||
#define alphaR w17
|
#define alphaR w17
|
||||||
#define alphaI w18
|
#define alphaI w19
|
||||||
#define temp x19
|
#define temp x20
|
||||||
#define tempOffset x20
|
#define tempOffset x21
|
||||||
#define tempK x21
|
#define tempK x22
|
||||||
|
|
||||||
#define alpha0_R s10
|
#define alpha0_R s10
|
||||||
#define alphaV0_R v10.s[0]
|
#define alphaV0_R v10.s[0]
|
||||||
|
|
|
@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include <float.h>
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
|
|
||||||
#if defined(SMP)
|
#if defined(SMP)
|
||||||
|
@ -404,7 +404,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
#else
|
#else
|
||||||
nrm2_compute(n, x, inc_x, &ssq, &scale);
|
nrm2_compute(n, x, inc_x, &ssq, &scale);
|
||||||
#endif
|
#endif
|
||||||
if (fabs(scale) <1.e-300) return 0.;
|
volatile FLOAT sca = fabs(scale);
|
||||||
|
if (sca < DBL_MIN) return 0.;
|
||||||
ssq = sqrt(ssq) * scale;
|
ssq = sqrt(ssq) * scale;
|
||||||
|
|
||||||
return ssq;
|
return ssq;
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project. */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -51,6 +52,8 @@ static void init_parameter(void);
|
||||||
gotoblas_t TABLE_NAME = {
|
gotoblas_t TABLE_NAME = {
|
||||||
DTB_DEFAULT_ENTRIES,
|
DTB_DEFAULT_ENTRIES,
|
||||||
|
|
||||||
|
SWITCH_RATIO,
|
||||||
|
|
||||||
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
|
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
|
||||||
|
|
||||||
#ifdef BUILD_BFLOAT16
|
#ifdef BUILD_BFLOAT16
|
||||||
|
|
|
@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if (defined(OS_DARWIN) || defined(OS_WINDOWS)) && (defined(__GNUC__) && __GNUC__ > 11)
|
#if (defined(__GNUC__) && __GNUC__ > 11)
|
||||||
#pragma GCC optimize("no-tree-vectorize")
|
#pragma GCC optimize("no-tree-vectorize")
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if (defined(OS_DARWIN) || defined(OS_WINDOWS)) && (defined(__GNUC__) && __GNUC__ > 11)
|
#if (defined(__GNUC__) && __GNUC__ > 11)
|
||||||
#pragma GCC optimize("no-tree-vectorize")
|
#pragma GCC optimize("no-tree-vectorize")
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if (defined(OS_DARWIN) || defined(OS_WINDOWS)) && (defined(__GNUC__) && __GNUC__ > 11)
|
#if (defined(__GNUC__) && __GNUC__ > 11)
|
||||||
#pragma GCC optimize("no-tree-vectorize")
|
#pragma GCC optimize("no-tree-vectorize")
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if (defined(OS_DARWIN) || defined(OS_WINDOWS)) && (defined(__GNUC__) && __GNUC__ > 11)
|
#if (defined(__GNUC__) && __GNUC__ > 11)
|
||||||
#pragma GCC optimize("no-tree-vectorize")
|
#pragma GCC optimize("no-tree-vectorize")
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if (defined(OS_DARWIN) || defined(OS_WINDOWS)) && (defined(__GNUC__) && __GNUC__ > 11)
|
#if (defined(__GNUC__) && __GNUC__ > 11)
|
||||||
#pragma GCC optimize("no-tree-vectorize")
|
#pragma GCC optimize("no-tree-vectorize")
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if (defined(OS_DARWIN) || defined(OS_WINDOWS)) && (defined(__GNUC__) && __GNUC__ > 11)
|
#if (defined(__GNUC__) && __GNUC__ > 11)
|
||||||
#pragma GCC optimize("no-tree-vectorize")
|
#pragma GCC optimize("no-tree-vectorize")
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if (defined(OS_DARWIN) || defined(OS_WINDOWS)) && (defined(__GNUC__) && __GNUC__ > 11)
|
#if (defined(__GNUC__) && __GNUC__ > 11)
|
||||||
#pragma GCC optimize("no-tree-vectorize")
|
#pragma GCC optimize("no-tree-vectorize")
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -80,10 +80,6 @@ static FLOAT dm1 = -1.;
|
||||||
#define DIVIDE_RATE 2
|
#define DIVIDE_RATE 2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef SWITCH_RATIO
|
|
||||||
#define SWITCH_RATIO 2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef LOWER
|
#ifndef LOWER
|
||||||
#define TRANS
|
#define TRANS
|
||||||
#endif
|
#endif
|
||||||
|
|
22
param.h
22
param.h
|
@ -1,5 +1,5 @@
|
||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
Copyright (c) 2011-2023, The OpenBLAS Project
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -3338,6 +3338,12 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
||||||
|
|
||||||
#elif defined(NEOVERSEN1)
|
#elif defined(NEOVERSEN1)
|
||||||
|
|
||||||
|
#if defined(XDOUBLE) || defined(DOUBLE)
|
||||||
|
#define SWITCH_RATIO 8
|
||||||
|
#else
|
||||||
|
#define SWITCH_RATIO 16
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
|
||||||
|
@ -3367,7 +3373,11 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
||||||
|
|
||||||
#elif defined(NEOVERSEV1)
|
#elif defined(NEOVERSEV1)
|
||||||
|
|
||||||
|
#if defined(XDOUBLE) || defined(DOUBLE)
|
||||||
|
#define SWITCH_RATIO 8
|
||||||
|
#else
|
||||||
#define SWITCH_RATIO 16
|
#define SWITCH_RATIO 16
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
@ -3398,6 +3408,12 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
||||||
|
|
||||||
#elif defined(NEOVERSEN2)
|
#elif defined(NEOVERSEN2)
|
||||||
|
|
||||||
|
#if defined(XDOUBLE) || defined(DOUBLE)
|
||||||
|
#define SWITCH_RATIO 8
|
||||||
|
#else
|
||||||
|
#define SWITCH_RATIO 16
|
||||||
|
#endif
|
||||||
|
|
||||||
#undef SBGEMM_ALIGN_K
|
#undef SBGEMM_ALIGN_K
|
||||||
#define SBGEMM_ALIGN_K 4
|
#define SBGEMM_ALIGN_K 4
|
||||||
|
|
||||||
|
@ -3838,6 +3854,10 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef SWITCH_RATIO
|
||||||
|
#define SWITCH_RATIO 2
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef QGEMM_DEFAULT_UNROLL_M
|
#ifndef QGEMM_DEFAULT_UNROLL_M
|
||||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue