Use "old" compute(24) function with clang due to register limitations

This commit is contained in:
Martin Kroeker 2021-04-06 19:58:32 +02:00 committed by GitHub
parent 0492f0f3f9
commit 2dfb24730d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 4 additions and 0 deletions

View File

@ -501,7 +501,11 @@ CNAME(BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float * __restrict__ A, f
int32_t permil[16] = {0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3};
BLASLONG n_count = n;
float *a_pointer = A,*b_pointer = B,*c_pointer = C,*ctemp = C,*next_b = B;
#if defined(__clang__)
for(;n_count>23;n_count-=24) COMPUTE(24)
#else
for(;n_count>23;n_count-=24) COMPUTE_n24
#endif
for(;n_count>19;n_count-=20) COMPUTE(20)
for(;n_count>15;n_count-=16) COMPUTE(16)
for(;n_count>11;n_count-=12) COMPUTE(12)