Merge pull request #4003 from martin-frbg/issue3995

Fix instabilities in CGEMM/CTRMM/DNRM2 on Apple M1/M2 under OSX
This commit is contained in:
Martin Kroeker 2023-04-18 14:55:23 +02:00 committed by GitHub
commit efcf71255a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 12 additions and 11 deletions

View File

@ -267,9 +267,9 @@ int detect(void)
}
#else
#ifdef __APPLE__
sysctlbyname("hw.cpufamily",&value,&length,NULL,0);
if (value ==131287967|| value == 458787763 ) return CPU_VORTEX; //A12/M1
if (value == 3660830781) return CPU_VORTEX; //A15/M2
sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0);
if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1
if (value64 == 3660830781) return CPU_VORTEX; //A15/M2
#endif
return CPU_ARMV8;
#endif

View File

@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define pCRow3 x15
#define pA x16
#define alphaR w17
#define alphaI w18
#define alphaI w19
#define alpha0_R s10
#define alphaV0_R v10.s[0]

View File

@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define pCRow3 x15
#define pA x16
#define alphaR w17
#define alphaI w18
#define alphaI w19
#define alpha0_R s10
#define alphaV0_R v10.s[0]

View File

@ -49,10 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define pCRow3 x15
#define pA x16
#define alphaR w17
#define alphaI w18
#define temp x19
#define tempOffset x20
#define tempK x21
#define alphaI w19
#define temp x20
#define tempOffset x21
#define tempK x22
#define alpha0_R s10
#define alphaV0_R v10.s[0]

View File

@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
#include <float.h>
#include <arm_neon.h>
#if defined(SMP)
@ -404,7 +404,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
#else
nrm2_compute(n, x, inc_x, &ssq, &scale);
#endif
if (fabs(scale) <1.e-300) return 0.;
volatile FLOAT sca = fabs(scale);
if (sca < DBL_MIN) return 0.;
ssq = sqrt(ssq) * scale;
return ssq;