Merge pull request #4003 from martin-frbg/issue3995

Fix instabilities in CGEMM/CTRMM/DNRM2 on Apple M1/M2 under OSX
This commit is contained in:
Martin Kroeker 2023-04-18 14:55:23 +02:00 committed by GitHub
commit efcf71255a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 12 additions and 11 deletions

View File

@ -267,9 +267,9 @@ int detect(void)
} }
#else #else
#ifdef __APPLE__ #ifdef __APPLE__
sysctlbyname("hw.cpufamily",&value,&length,NULL,0); sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0);
if (value ==131287967|| value == 458787763 ) return CPU_VORTEX; //A12/M1 if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1
if (value == 3660830781) return CPU_VORTEX; //A15/M2 if (value64 == 3660830781) return CPU_VORTEX; //A15/M2
#endif #endif
return CPU_ARMV8; return CPU_ARMV8;
#endif #endif

View File

@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define pCRow3 x15 #define pCRow3 x15
#define pA x16 #define pA x16
#define alphaR w17 #define alphaR w17
#define alphaI w18 #define alphaI w19
#define alpha0_R s10 #define alpha0_R s10
#define alphaV0_R v10.s[0] #define alphaV0_R v10.s[0]

View File

@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define pCRow3 x15 #define pCRow3 x15
#define pA x16 #define pA x16
#define alphaR w17 #define alphaR w17
#define alphaI w18 #define alphaI w19
#define alpha0_R s10 #define alpha0_R s10
#define alphaV0_R v10.s[0] #define alphaV0_R v10.s[0]

View File

@ -49,10 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define pCRow3 x15 #define pCRow3 x15
#define pA x16 #define pA x16
#define alphaR w17 #define alphaR w17
#define alphaI w18 #define alphaI w19
#define temp x19 #define temp x20
#define tempOffset x20 #define tempOffset x21
#define tempK x21 #define tempK x22
#define alpha0_R s10 #define alpha0_R s10
#define alphaV0_R v10.s[0] #define alphaV0_R v10.s[0]

View File

@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"
#include <float.h>
#include <arm_neon.h> #include <arm_neon.h>
#if defined(SMP) #if defined(SMP)
@ -404,7 +404,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
#else #else
nrm2_compute(n, x, inc_x, &ssq, &scale); nrm2_compute(n, x, inc_x, &ssq, &scale);
#endif #endif
if (fabs(scale) <1.e-300) return 0.; volatile FLOAT sca = fabs(scale);
if (sca < DBL_MIN) return 0.;
ssq = sqrt(ssq) * scale; ssq = sqrt(ssq) * scale;
return ssq; return ssq;