Merge pull request #4003 from martin-frbg/issue3995
Fix instabilities in CGEMM/CTRMM/DNRM2 on Apple M1/M2 under OSX
This commit is contained in:
commit
efcf71255a
|
@ -267,9 +267,9 @@ int detect(void)
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
sysctlbyname("hw.cpufamily",&value,&length,NULL,0);
|
sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0);
|
||||||
if (value ==131287967|| value == 458787763 ) return CPU_VORTEX; //A12/M1
|
if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1
|
||||||
if (value == 3660830781) return CPU_VORTEX; //A15/M2
|
if (value64 == 3660830781) return CPU_VORTEX; //A15/M2
|
||||||
#endif
|
#endif
|
||||||
return CPU_ARMV8;
|
return CPU_ARMV8;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define pCRow3 x15
|
#define pCRow3 x15
|
||||||
#define pA x16
|
#define pA x16
|
||||||
#define alphaR w17
|
#define alphaR w17
|
||||||
#define alphaI w18
|
#define alphaI w19
|
||||||
|
|
||||||
#define alpha0_R s10
|
#define alpha0_R s10
|
||||||
#define alphaV0_R v10.s[0]
|
#define alphaV0_R v10.s[0]
|
||||||
|
|
|
@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define pCRow3 x15
|
#define pCRow3 x15
|
||||||
#define pA x16
|
#define pA x16
|
||||||
#define alphaR w17
|
#define alphaR w17
|
||||||
#define alphaI w18
|
#define alphaI w19
|
||||||
|
|
||||||
#define alpha0_R s10
|
#define alpha0_R s10
|
||||||
#define alphaV0_R v10.s[0]
|
#define alphaV0_R v10.s[0]
|
||||||
|
|
|
@ -49,10 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define pCRow3 x15
|
#define pCRow3 x15
|
||||||
#define pA x16
|
#define pA x16
|
||||||
#define alphaR w17
|
#define alphaR w17
|
||||||
#define alphaI w18
|
#define alphaI w19
|
||||||
#define temp x19
|
#define temp x20
|
||||||
#define tempOffset x20
|
#define tempOffset x21
|
||||||
#define tempK x21
|
#define tempK x22
|
||||||
|
|
||||||
#define alpha0_R s10
|
#define alpha0_R s10
|
||||||
#define alphaV0_R v10.s[0]
|
#define alphaV0_R v10.s[0]
|
||||||
|
|
|
@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include <float.h>
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
|
|
||||||
#if defined(SMP)
|
#if defined(SMP)
|
||||||
|
@ -404,7 +404,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
#else
|
#else
|
||||||
nrm2_compute(n, x, inc_x, &ssq, &scale);
|
nrm2_compute(n, x, inc_x, &ssq, &scale);
|
||||||
#endif
|
#endif
|
||||||
if (fabs(scale) <1.e-300) return 0.;
|
volatile FLOAT sca = fabs(scale);
|
||||||
|
if (sca < DBL_MIN) return 0.;
|
||||||
ssq = sqrt(ssq) * scale;
|
ssq = sqrt(ssq) * scale;
|
||||||
|
|
||||||
return ssq;
|
return ssq;
|
||||||
|
|
Loading…
Reference in New Issue