From 5b165420b5962b2b73319f55b747be4f6c697860 Mon Sep 17 00:00:00 2001 From: Chris Sidebottom Date: Mon, 5 Dec 2022 15:17:52 +0000 Subject: [PATCH] SWITCH_RATIO for Arm(R) Neoverse(TM) architecture This seems like a good balance of values for reasonably sized matrices. With `SWITCH_RATIO=16` the DGEMM scales better to bigger sizes but the better solution would be some kind of thread throttling so I've gone with `SWITCH_RATIO=8`. --- param.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/param.h b/param.h index f1f5cbdad..ae391dd3f 100644 --- a/param.h +++ b/param.h @@ -1,5 +1,5 @@ /***************************************************************************** -Copyright (c) 2011-2014, The OpenBLAS Project +Copyright (c) 2011-2023, The OpenBLAS Project All rights reserved. Redistribution and use in source and binary forms, with or without @@ -3338,6 +3338,12 @@ is a big desktop or server with abundant cache rather than a phone or embedded d #elif defined(NEOVERSEN1) +#if defined(XDOUBLE) || defined(DOUBLE) +#define SWITCH_RATIO 8 +#else +#define SWITCH_RATIO 16 +#endif + #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -3367,7 +3373,11 @@ is a big desktop or server with abundant cache rather than a phone or embedded d #elif defined(NEOVERSEV1) -#define SWITCH_RATIO 16 +#if defined(XDOUBLE) || defined(DOUBLE) +#define SWITCH_RATIO 8 +#else +#define SWITCH_RATIO 16 +#endif #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -3398,6 +3408,12 @@ is a big desktop or server with abundant cache rather than a phone or embedded d #elif defined(NEOVERSEN2) +#if defined(XDOUBLE) || defined(DOUBLE) +#define SWITCH_RATIO 8 +#else +#define SWITCH_RATIO 16 +#endif + #undef SBGEMM_ALIGN_K #define SBGEMM_ALIGN_K 4