Merge pull request #3843 from Mousius/switch-ratio
Propagate SWITCH_RATIO to DYNAMIC_ARCH builds
This commit is contained in:
commit
437c0bf2b4
|
@ -1,5 +1,6 @@
|
|||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* Copyright 2023 The OpenBLAS Project. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
|
@ -45,6 +46,7 @@
|
|||
|
||||
typedef struct {
|
||||
int dtb_entries;
|
||||
int switch_ratio;
|
||||
int offsetA, offsetB, align;
|
||||
|
||||
#if BUILD_BFLOAT16 == 1
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* Copyright 2023 The OpenBLAS Project. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
|
@ -44,10 +45,6 @@
|
|||
#define DIVIDE_RATE 2
|
||||
#endif
|
||||
|
||||
#ifndef SWITCH_RATIO
|
||||
#define SWITCH_RATIO 2
|
||||
#endif
|
||||
|
||||
//The array of job_t may overflow the stack.
|
||||
//Instead, use malloc to alloc job_t.
|
||||
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
|
||||
|
@ -1015,6 +1012,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
BLASLONG divN, divT;
|
||||
int mode;
|
||||
|
||||
#if defined(DYNAMIC_ARCH)
|
||||
int switch_ratio = gotoblas->switch_ratio;
|
||||
#else
|
||||
int switch_ratio = SWITCH_RATIO;
|
||||
#endif
|
||||
|
||||
if (range_m) {
|
||||
BLASLONG m_from = *(((BLASLONG *)range_m) + 0);
|
||||
BLASLONG m_to = *(((BLASLONG *)range_m) + 1);
|
||||
|
@ -1030,7 +1033,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
}
|
||||
*/
|
||||
|
||||
if ((args -> m < nthreads * SWITCH_RATIO) || (args -> n < nthreads * SWITCH_RATIO)) {
|
||||
if ((args -> m < nthreads * switch_ratio) || (args -> n < nthreads * switch_ratio)) {
|
||||
GEMM3M_LOCAL(args, range_m, range_n, sa, sb, 0);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1038,7 +1041,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
divT = nthreads;
|
||||
divN = 1;
|
||||
|
||||
while ((GEMM3M_P * divT > m * SWITCH_RATIO) && (divT > 1)) {
|
||||
while ((GEMM3M_P * divT > m * switch_ratio) && (divT > 1)) {
|
||||
do {
|
||||
divT --;
|
||||
divN = 1;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* Copyright 2023 The OpenBLAS Project. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
|
@ -44,10 +45,6 @@
|
|||
#define DIVIDE_RATE 2
|
||||
#endif
|
||||
|
||||
#ifndef SWITCH_RATIO
|
||||
#define SWITCH_RATIO 2
|
||||
#endif
|
||||
|
||||
//The array of job_t may overflow the stack.
|
||||
//Instead, use malloc to alloc job_t.
|
||||
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
|
||||
|
@ -528,7 +525,13 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
int mode, mask;
|
||||
double dnum, di, dinum;
|
||||
|
||||
if ((nthreads == 1) || (args -> n < nthreads * SWITCH_RATIO)) {
|
||||
#if defined(DYNAMIC_ARCH)
|
||||
int switch_ratio = gotoblas->switch_ratio;
|
||||
#else
|
||||
int switch_ratio = SWITCH_RATIO;
|
||||
#endif
|
||||
|
||||
if ((nthreads == 1) || (args->n < nthreads * switch_ratio)) {
|
||||
SYRK_LOCAL(args, range_m, range_n, sa, sb, 0);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* Copyright 2023 The OpenBLAS Project. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
|
@ -44,10 +45,6 @@
|
|||
#define DIVIDE_RATE 2
|
||||
#endif
|
||||
|
||||
#ifndef SWITCH_RATIO
|
||||
#define SWITCH_RATIO 2
|
||||
#endif
|
||||
|
||||
#ifndef GEMM_PREFERED_SIZE
|
||||
#define GEMM_PREFERED_SIZE 1
|
||||
#endif
|
||||
|
@ -577,6 +574,11 @@ InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
|
|||
BLASLONG width, i, j, k, js;
|
||||
BLASLONG m, n, n_from, n_to;
|
||||
int mode;
|
||||
#if defined(DYNAMIC_ARCH)
|
||||
int switch_ratio = gotoblas->switch_ratio;
|
||||
#else
|
||||
int switch_ratio = SWITCH_RATIO;
|
||||
#endif
|
||||
|
||||
/* Get execution mode */
|
||||
#ifndef COMPLEX
|
||||
|
@ -698,8 +700,8 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
|
|||
num_parts = 0;
|
||||
while (n > 0){
|
||||
width = blas_quickdivide(n + nthreads - num_parts - 1, nthreads - num_parts);
|
||||
if (width < SWITCH_RATIO) {
|
||||
width = SWITCH_RATIO;
|
||||
if (width < switch_ratio) {
|
||||
width = switch_ratio;
|
||||
}
|
||||
width = round_up(n, width, GEMM_PREFERED_SIZE);
|
||||
|
||||
|
@ -746,6 +748,11 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF
|
|||
BLASLONG m = args -> m;
|
||||
BLASLONG n = args -> n;
|
||||
BLASLONG nthreads_m, nthreads_n;
|
||||
#if defined(DYNAMIC_ARCH)
|
||||
int switch_ratio = gotoblas->switch_ratio;
|
||||
#else
|
||||
int switch_ratio = SWITCH_RATIO;
|
||||
#endif
|
||||
|
||||
/* Get dimensions from index ranges if available */
|
||||
if (range_m) {
|
||||
|
@ -755,21 +762,21 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF
|
|||
n = range_n[1] - range_n[0];
|
||||
}
|
||||
|
||||
/* Partitions in m should have at least SWITCH_RATIO rows */
|
||||
if (m < 2 * SWITCH_RATIO) {
|
||||
/* Partitions in m should have at least switch_ratio rows */
|
||||
if (m < 2 * switch_ratio) {
|
||||
nthreads_m = 1;
|
||||
} else {
|
||||
nthreads_m = args -> nthreads;
|
||||
while (m < nthreads_m * SWITCH_RATIO) {
|
||||
while (m < nthreads_m * switch_ratio) {
|
||||
nthreads_m = nthreads_m / 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Partitions in n should have at most SWITCH_RATIO * nthreads_m columns */
|
||||
if (n < SWITCH_RATIO * nthreads_m) {
|
||||
/* Partitions in n should have at most switch_ratio * nthreads_m columns */
|
||||
if (n < switch_ratio * nthreads_m) {
|
||||
nthreads_n = 1;
|
||||
} else {
|
||||
nthreads_n = (n + SWITCH_RATIO * nthreads_m - 1) / (SWITCH_RATIO * nthreads_m);
|
||||
nthreads_n = (n + switch_ratio * nthreads_m - 1) / (switch_ratio * nthreads_m);
|
||||
if (nthreads_m * nthreads_n > args -> nthreads) {
|
||||
nthreads_n = blas_quickdivide(args -> nthreads, nthreads_m);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* Copyright 2023 The OpenBLAS Project. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
|
@ -49,7 +50,9 @@
|
|||
static void init_parameter(void);
|
||||
|
||||
gotoblas_t TABLE_NAME = {
|
||||
DTB_DEFAULT_ENTRIES ,
|
||||
DTB_DEFAULT_ENTRIES,
|
||||
|
||||
SWITCH_RATIO,
|
||||
|
||||
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
|
||||
|
||||
|
|
|
@ -80,10 +80,6 @@ static FLOAT dm1 = -1.;
|
|||
#define DIVIDE_RATE 2
|
||||
#endif
|
||||
|
||||
#ifndef SWITCH_RATIO
|
||||
#define SWITCH_RATIO 2
|
||||
#endif
|
||||
|
||||
#ifndef LOWER
|
||||
#define TRANS
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue