POWER10: Changing store instructions for Level1 functions
This patch changes 32 bytes stores to two 16 bytes stores to fix a recent degradation due to 32 bytes stores.
This commit is contained in:
@@ -120,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x,
|
||||
#if defined(POWER10)
|
||||
if ( n >= 32 )
|
||||
{
|
||||
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3;
|
||||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3;
|
||||
for (i = 0; i < align; i++) {
|
||||
temp = y[i];
|
||||
y[i] = x[i];
|
||||
|
||||
Reference in New Issue
Block a user