Merge pull request #4003 from martin-frbg/issue3995
Fix instabilities in CGEMM/CTRMM/DNRM2 on Apple M1/M2 under OSX
This commit is contained in:
commit
efcf71255a
|
@ -267,9 +267,9 @@ int detect(void)
|
|||
}
|
||||
#else
|
||||
#ifdef __APPLE__
|
||||
sysctlbyname("hw.cpufamily",&value,&length,NULL,0);
|
||||
if (value ==131287967|| value == 458787763 ) return CPU_VORTEX; //A12/M1
|
||||
if (value == 3660830781) return CPU_VORTEX; //A15/M2
|
||||
sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0);
|
||||
if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1
|
||||
if (value64 == 3660830781) return CPU_VORTEX; //A15/M2
|
||||
#endif
|
||||
return CPU_ARMV8;
|
||||
#endif
|
||||
|
|
|
@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define pCRow3 x15
|
||||
#define pA x16
|
||||
#define alphaR w17
|
||||
#define alphaI w18
|
||||
#define alphaI w19
|
||||
|
||||
#define alpha0_R s10
|
||||
#define alphaV0_R v10.s[0]
|
||||
|
|
|
@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define pCRow3 x15
|
||||
#define pA x16
|
||||
#define alphaR w17
|
||||
#define alphaI w18
|
||||
#define alphaI w19
|
||||
|
||||
#define alpha0_R s10
|
||||
#define alphaV0_R v10.s[0]
|
||||
|
|
|
@ -49,10 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define pCRow3 x15
|
||||
#define pA x16
|
||||
#define alphaR w17
|
||||
#define alphaI w18
|
||||
#define temp x19
|
||||
#define tempOffset x20
|
||||
#define tempK x21
|
||||
#define alphaI w19
|
||||
#define temp x20
|
||||
#define tempOffset x21
|
||||
#define tempK x22
|
||||
|
||||
#define alpha0_R s10
|
||||
#define alphaV0_R v10.s[0]
|
||||
|
|
|
@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#include <float.h>
|
||||
#include <arm_neon.h>
|
||||
|
||||
#if defined(SMP)
|
||||
|
@ -404,7 +404,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
#else
|
||||
nrm2_compute(n, x, inc_x, &ssq, &scale);
|
||||
#endif
|
||||
if (fabs(scale) <1.e-300) return 0.;
|
||||
volatile FLOAT sca = fabs(scale);
|
||||
if (sca < DBL_MIN) return 0.;
|
||||
ssq = sqrt(ssq) * scale;
|
||||
|
||||
return ssq;
|
||||
|
|
Loading…
Reference in New Issue