Support CONSISTENT_FPCSR on AARCH64

This commit is contained in:
Kai T. Ohlhus 2022-09-22 00:20:40 +09:00
parent 667d0e0b48
commit 84453b924f
No known key found for this signature in database
GPG Key ID: 993D6445BD46BBCE
2 changed files with 16 additions and 0 deletions

View File

@ -470,9 +470,13 @@ blas_queue_t *tscq;
#endif
#ifdef CONSISTENT_FPCSR
#ifdef __aarch64__
__asm__ __volatile__ ("msr fpcr, %0" : : "r" (queue -> sse_mode));
#else
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
#endif
#endif
#ifdef MONITOR
main_status[cpu] = MAIN_RUNNING1;
@ -746,9 +750,13 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
queue -> position = pos;
#ifdef CONSISTENT_FPCSR
#ifdef __aarch64__
__asm__ __volatile__ ("mrs %0, fpcr" : "=r" (queue -> sse_mode));
#else
__asm__ __volatile__ ("fnstcw %0" : "=m" (queue -> x87_mode));
__asm__ __volatile__ ("stmxcsr %0" : "=m" (queue -> sse_mode));
#endif
#endif
#if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST)

View File

@ -284,8 +284,12 @@ static void exec_threads(blas_queue_t *queue, int buf_index){
sb = queue -> sb;
#ifdef CONSISTENT_FPCSR
#ifdef __aarch64__
__asm__ __volatile__ ("msr fpcr, %0" : : "r" (queue -> sse_mode));
#else
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
#endif
#endif
if ((sa == NULL) && (sb == NULL) && ((queue -> mode & BLAS_PTHREAD) == 0)) {
@ -383,8 +387,12 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
#ifdef CONSISTENT_FPCSR
for (i = 0; i < num; i ++) {
#ifdef __aarch64__
__asm__ __volatile__ ("mrs %0, fpcr" : "=r" (queue[i].sse_mode));
#else
__asm__ __volatile__ ("fnstcw %0" : "=m" (queue[i].x87_mode));
__asm__ __volatile__ ("stmxcsr %0" : "=m" (queue[i].sse_mode));
#endif
}
#endif