POWER10: Changing store instructions for Level1 functions

This patch changes 32 bytes stores to two 16 bytes stores
to fix a recent degradation due to 32 bytes stores.
This commit is contained in:
Rajalakshmi Srinivasaraghavan
2022-05-12 11:17:33 -05:00
parent ce814e84dc
commit b62173c5a0
16 changed files with 541 additions and 274 deletions

View File

@@ -120,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x,
#if defined(POWER10)
if ( n >= 64 )
{
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7;
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7;
for (i = 0; i < align; i++) {
temp = y[i];
y[i] = x[i];