From 6bd7c54af5ecc2004b8a6df0157fe72d55530927 Mon Sep 17 00:00:00 2001 From: Mark Seminatore Date: Mon, 11 Dec 2023 15:13:04 -0800 Subject: [PATCH 1/5] introduce MT_TRACE to clean up SMP_DEBUG code --- driver/others/blas_server_win32.c | 60 ++++++++++++------------------- 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/driver/others/blas_server_win32.c b/driver/others/blas_server_win32.c index 40ff85abc..5820a55f4 100644 --- a/driver/others/blas_server_win32.c +++ b/driver/others/blas_server_win32.c @@ -48,6 +48,12 @@ #endif #endif +#ifdef SMP_DEBUG +# define MT_TRACE(...) fprintf(stderr, __VA_ARGS__) +#else +# define MT_TRACE(...) +#endif + /* This is a thread implementation for Win32 lazy implementation */ /* Thread server common information */ @@ -213,29 +219,24 @@ static DWORD WINAPI blas_thread_server(void *arg){ /* Each server needs each buffer */ buffer = blas_memory_alloc(2); -#ifdef SMP_DEBUG - fprintf(STDERR, "Server[%2ld] Thread is started!\n", cpu); -#endif + MT_TRACE("Server[%2ld] Thread is started!\n", cpu); while (1){ /* Waiting for Queue */ -#ifdef SMP_DEBUG - fprintf(STDERR, "Server[%2ld] Waiting for Queue.\n", cpu); -#endif + MT_TRACE("Server[%2ld] Waiting for Queue.\n", cpu); + // event raised when work is added to the queue WaitForSingleObject(kickoff_event, INFINITE); if (cpu > thread_target - 2) { - //printf("thread [%d] exiting.\n", cpu); + //MT_TRACE("thread [%d] exiting.\n", cpu); break; // excess thread, so worker thread exits } -#ifdef SMP_DEBUG - fprintf(STDERR, "Server[%2ld] Got it.\n", cpu); -#endif + MT_TRACE("Server[%2ld] Got it.\n", cpu); #if 1 EnterCriticalSection(&queue_lock); @@ -270,10 +271,8 @@ static DWORD WINAPI blas_thread_server(void *arg){ __asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode)); #endif -#ifdef SMP_DEBUG - fprintf(STDERR, "Server[%2ld] Started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n", + MT_TRACE("Server[%2ld] Started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n", cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k); -#endif // fprintf(stderr, "queue start[%ld]!!!\n", cpu); @@ -342,19 +341,14 @@ static DWORD WINAPI blas_thread_server(void *arg){ continue; //if queue == NULL } -#ifdef SMP_DEBUG - fprintf(STDERR, "Server[%2ld] Finished!\n", cpu); -#endif + MT_TRACE("Server[%2ld] Finished!\n", cpu); queue->finished = 1; - } /* Shutdown procedure */ -#ifdef SMP_DEBUG - fprintf(STDERR, "Server[%2ld] Shutdown!\n", cpu); -#endif + MT_TRACE("Server[%2ld] Shutdown!\n", cpu); blas_memory_free(buffer); @@ -369,10 +363,7 @@ int blas_thread_init(void){ LOCK_COMMAND(&server_lock); -#ifdef SMP_DEBUG - fprintf(STDERR, "Initializing Thread(Num. threads = %d)\n", - blas_cpu_number); -#endif + MT_TRACE("Initializing Thread(Num. threads = %d)\n", blas_cpu_number); if (!blas_server_avail){ // create the kickoff Event @@ -383,7 +374,7 @@ int blas_thread_init(void){ InitializeCriticalSection(&queue_lock); for(i = 0; i < blas_cpu_number - 1; i++){ - //printf("thread_init: creating thread [%d]\n", i); + //MT_TRACE("thread_init: creating thread [%d]\n", i); blas_threads[i] = CreateThread(NULL, 0, blas_thread_server, (void *)i, @@ -458,14 +449,10 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){ int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){ -#ifdef SMP_DEBUG - fprintf(STDERR, "Synchronization Waiting.\n"); -#endif + MT_TRACE("Synchronization Waiting.\n"); while (num){ -#ifdef SMP_DEBUG - fprintf(STDERR, "Waiting Queue ..\n"); -#endif + MT_TRACE("Waiting Queue ..\n"); while (!queue->finished) YIELDING; @@ -473,9 +460,8 @@ int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){ num--; } -#ifdef SMP_DEBUG - fprintf(STDERR, "Completely Done.\n\n"); -#endif + MT_TRACE("Completely Done.\n\n"); + // if work was added to the queue after this batch we can't sleep the worker threads // by resetting the event EnterCriticalSection(&queue_lock); @@ -577,11 +563,11 @@ void goto_set_num_threads(int num_threads) SetEvent(kickoff_event); for (i = num_threads - 1; i < blas_num_threads - 1; i++) { - //printf("set_num_threads: waiting on thread [%d] to quit.\n", i); + //MT_TRACE("set_num_threads: waiting on thread [%d] to quit.\n", i); WaitForSingleObject(blas_threads[i], INFINITE); - //printf("set_num_threads: thread [%d] has quit.\n", i); + //MT_TRACE("set_num_threads: thread [%d] has quit.\n", i); CloseHandle(blas_threads[i]); } @@ -610,7 +596,7 @@ void goto_set_num_threads(int num_threads) } for(i = (blas_num_threads > 0) ? blas_num_threads - 1 : 0; i < num_threads - 1; i++){ - //printf("set_num_threads: creating thread [%d]\n", i); + //MT_TRACE("set_num_threads: creating thread [%d]\n", i); blas_threads[i] = CreateThread(NULL, 0, blas_thread_server, (void *)i, From 0d7fe5ea610d46afaed9f5164f6a11729e2429de Mon Sep 17 00:00:00 2001 From: Mark Seminatore Date: Mon, 29 Jan 2024 22:33:47 -0800 Subject: [PATCH 2/5] clean up whitespace --- driver/others/blas_server_win32.c | 192 +++++++++++++----------------- 1 file changed, 85 insertions(+), 107 deletions(-) diff --git a/driver/others/blas_server_win32.c b/driver/others/blas_server_win32.c index 5820a55f4..68dde584b 100644 --- a/driver/others/blas_server_win32.c +++ b/driver/others/blas_server_win32.c @@ -72,19 +72,9 @@ static HANDLE blas_threads [MAX_CPU_NUMBER]; static DWORD blas_threads_id[MAX_CPU_NUMBER]; static volatile int thread_target; // target num of live threads, volatile for cross-thread reads -#if defined (__GNUC__) && (__GNUC__ < 6) - #define WIN_CAS(dest, exch, comp) __sync_val_compare_and_swap(dest, comp, exch) -#else - #if defined(_WIN64) - #define WIN_CAS(dest, exch, comp) InterlockedCompareExchange64(dest, exch, comp) - #else - #define WIN_CAS(dest, exch, comp) InterlockedCompareExchange(dest, exch, comp) - #endif -#endif - static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ - if (!(mode & BLAS_COMPLEX)){ + if (!(mode & BLAS_COMPLEX)) { #ifdef EXPRECISION if ((mode & BLAS_PREC) == BLAS_XDOUBLE){ /* REAL / Extended Double */ @@ -99,7 +89,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ args -> c, args -> ldc, sb); } else #endif - if ((mode & BLAS_PREC) == BLAS_DOUBLE){ + if ((mode & BLAS_PREC) == BLAS_DOUBLE) { /* REAL / Double */ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, @@ -110,7 +100,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ args -> a, args -> lda, args -> b, args -> ldb, args -> c, args -> ldc, sb); - } else if ((mode & BLAS_PREC) == BLAS_SINGLE){ + } else if ((mode & BLAS_PREC) == BLAS_SINGLE) { /* REAL / Single */ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, @@ -122,7 +112,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ args -> b, args -> ldb, args -> c, args -> ldc, sb); #ifdef BUILD_BFLOAT16 - } else if ((mode & BLAS_PREC) == BLAS_BFLOAT16){ + } else if ((mode & BLAS_PREC) == BLAS_BFLOAT16) { /* REAL / BFLOAT16 */ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, @@ -133,7 +123,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ args -> a, args -> lda, args -> b, args -> ldb, args -> c, args -> ldc, sb); - } else if ((mode & BLAS_PREC) == BLAS_STOBF16){ + } else if ((mode & BLAS_PREC) == BLAS_STOBF16) { /* REAL / BLAS_STOBF16 */ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, bfloat16 *, BLASLONG, @@ -144,7 +134,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ args -> a, args -> lda, args -> b, args -> ldb, args -> c, args -> ldc, sb); - } else if ((mode & BLAS_PREC) == BLAS_DTOBF16){ + } else if ((mode & BLAS_PREC) == BLAS_DTOBF16) { /* REAL / BLAS_DTOBF16 */ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, bfloat16 *, BLASLONG, @@ -161,7 +151,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ } } else { #ifdef EXPRECISION - if ((mode & BLAS_PREC) == BLAS_XDOUBLE){ + if ((mode & BLAS_PREC) == BLAS_XDOUBLE) { /* COMPLEX / Extended Double */ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, @@ -175,7 +165,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ args -> c, args -> ldc, sb); } else #endif - if ((mode & BLAS_PREC) == BLAS_DOUBLE){ + if ((mode & BLAS_PREC) == BLAS_DOUBLE) { /* COMPLEX / Double */ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, @@ -205,10 +195,9 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ } } -/* This is a main routine of threads. Each thread waits until job is */ -/* queued. */ - -static DWORD WINAPI blas_thread_server(void *arg){ +// This is a main routine of threads. Each thread waits until job is +// queued. +static DWORD WINAPI blas_thread_server(void *arg) { /* Thread identifier */ BLASLONG cpu = (BLASLONG)arg; @@ -221,24 +210,22 @@ static DWORD WINAPI blas_thread_server(void *arg){ MT_TRACE("Server[%2ld] Thread is started!\n", cpu); - while (1){ + while (1) { /* Waiting for Queue */ MT_TRACE("Server[%2ld] Waiting for Queue.\n", cpu); - // event raised when work is added to the queue - WaitForSingleObject(kickoff_event, INFINITE); + // event raised when work is added to the queue + WaitForSingleObject(kickoff_event, INFINITE); - if (cpu > thread_target - 2) - { - //MT_TRACE("thread [%d] exiting.\n", cpu); - break; // excess thread, so worker thread exits - } + if (cpu > thread_target - 2) { + //MT_TRACE("thread [%d] exiting.\n", cpu); + break; // excess thread, so worker thread exits + } MT_TRACE("Server[%2ld] Got it.\n", cpu); -#if 1 EnterCriticalSection(&queue_lock); queue = work_queue; @@ -246,51 +233,39 @@ static DWORD WINAPI blas_thread_server(void *arg){ work_queue = work_queue->next; LeaveCriticalSection(&queue_lock); -#else - volatile blas_queue_t* queue_next; - INT_PTR prev_value; - do { - queue = (volatile blas_queue_t*)work_queue; - if (!queue) - break; - - queue_next = (volatile blas_queue_t*)queue->next; - prev_value = WIN_CAS((INT_PTR*)&work_queue, (INT_PTR)queue_next, (INT_PTR)queue); - } while (prev_value != queue); -#endif - - if (queue) { + if (queue) { int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine; sa = queue -> sa; sb = queue -> sb; -#ifdef CONSISTENT_FPCSR - __asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode)); - __asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode)); -#endif + #ifdef CONSISTENT_FPCSR + __asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode)); + __asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode)); + #endif MT_TRACE("Server[%2ld] Started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n", cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k); // fprintf(stderr, "queue start[%ld]!!!\n", cpu); -#ifdef MONITOR - main_status[cpu] = MAIN_RUNNING1; -#endif + #ifdef MONITOR + main_status[cpu] = MAIN_RUNNING1; + #endif - if (sa == NULL) sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A); + if (sa == NULL) + sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A); if (sb == NULL) { - if (!(queue -> mode & BLAS_COMPLEX)){ + if (!(queue -> mode & BLAS_COMPLEX)) { #ifdef EXPRECISION - if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){ + if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE) { sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * sizeof(xdouble) + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); } else #endif - if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ + if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) { #ifdef BUILD_DOUBLE sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double) + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); @@ -324,26 +299,25 @@ static DWORD WINAPI blas_thread_server(void *arg){ /* Other types in future */ } } - queue->sb=sb; + queue->sb=sb; } -#ifdef MONITOR - main_status[cpu] = MAIN_RUNNING2; -#endif + #ifdef MONITOR + main_status[cpu] = MAIN_RUNNING2; + #endif if (!(queue -> mode & BLAS_LEGACY)) { - - (routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position); + (routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position); } else { - legacy_exec(routine, queue -> mode, queue -> args, sb); + legacy_exec(routine, queue -> mode, queue -> args, sb); } - }else{ - continue; //if queue == NULL - } + } else { + continue; //if queue == NULL + } MT_TRACE("Server[%2ld] Finished!\n", cpu); - queue->finished = 1; + queue->finished = 1; } /* Shutdown procedure */ @@ -353,10 +327,12 @@ static DWORD WINAPI blas_thread_server(void *arg){ blas_memory_free(buffer); return 0; - } +} -/* Initializing routine */ -int blas_thread_init(void){ +// +// Initializing routine +// +int blas_thread_init(void) { BLASLONG i; if (blas_server_avail || (blas_cpu_number <= 1)) return 0; @@ -365,16 +341,16 @@ int blas_thread_init(void){ MT_TRACE("Initializing Thread(Num. threads = %d)\n", blas_cpu_number); - if (!blas_server_avail){ - // create the kickoff Event - kickoff_event = CreateEvent(NULL, TRUE, FALSE, NULL); + if (!blas_server_avail) { + // create the kickoff Event + kickoff_event = CreateEvent(NULL, TRUE, FALSE, NULL); - thread_target = blas_cpu_number; + thread_target = blas_cpu_number; InitializeCriticalSection(&queue_lock); - for(i = 0; i < blas_cpu_number - 1; i++){ - //MT_TRACE("thread_init: creating thread [%d]\n", i); + for(i = 0; i < blas_cpu_number - 1; i++) { + //MT_TRACE("thread_init: creating thread [%d]\n", i); blas_threads[i] = CreateThread(NULL, 0, blas_thread_server, (void *)i, @@ -391,13 +367,10 @@ int blas_thread_init(void){ /* User can call one of two routines. - exec_blas_async ... immediately returns after jobs are queued. - exec_blas ... returns after jobs are finished. */ - -int exec_blas_async(BLASLONG pos, blas_queue_t *queue){ +int exec_blas_async(BLASLONG pos, blas_queue_t *queue) { #if defined(SMP_SERVER) // Handle lazy re-init of the thread-pool after a POSIX fork @@ -417,7 +390,7 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){ __asm__ __volatile__ ("stmxcsr %0" : "=m" (current -> sse_mode)); #endif - current->finished = 0; + current->finished = 0; current = current -> next; pos ++; } @@ -426,18 +399,18 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){ if (!work_queue) { - work_queue = queue; + work_queue = queue; } else { blas_queue_t *next_item = work_queue; - // find the end of the work queue - while (next_item) - next_item = next_item->next; + // find the end of the work queue + while (next_item) + next_item = next_item->next; - // add new work to the end - next_item = queue; + // add new work to the end + next_item = queue; } LeaveCriticalSection(&queue_lock); @@ -447,20 +420,24 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){ return 0; } -int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){ +// +// Join. Wait for all queued tasks to complete +// +int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue) { - MT_TRACE("Synchronization Waiting.\n"); + MT_TRACE("Synchronization Waiting.\n"); - while (num){ - MT_TRACE("Waiting Queue ..\n"); - while (!queue->finished) - YIELDING; + while (num) { + MT_TRACE("Waiting Queue ..\n"); - queue = queue->next; - num--; - } + while (!queue->finished) + YIELDING; - MT_TRACE("Completely Done.\n\n"); + queue = queue->next; + num--; + } + + MT_TRACE("Completely Done.\n\n"); // if work was added to the queue after this batch we can't sleep the worker threads // by resetting the event @@ -474,8 +451,10 @@ int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){ return 0; } -/* Execute Threads */ -int exec_blas(BLASLONG num, blas_queue_t *queue){ +// +// Execute Threads +// +int exec_blas(BLASLONG num, blas_queue_t *queue) { #if defined(SMP_SERVER) && defined(OS_CYGWIN_NT) // Handle lazy re-init of the thread-pool after a POSIX fork @@ -507,9 +486,8 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){ return 0; } -/* Shutdown procedure, but user don't have to call this routine. The */ -/* kernel automatically kill threads. */ - +// Shutdown procedure, but user don't have to call this routine. The +// kernel automatically kill threads. int BLASFUNC(blas_thread_shutdown)(void){ int i; @@ -518,9 +496,9 @@ int BLASFUNC(blas_thread_shutdown)(void){ LOCK_COMMAND(&server_lock); - if (blas_server_avail){ + if (blas_server_avail) { - for(i = 0; i < blas_num_threads - 1; i++){ + for(i = 0; i < blas_num_threads - 1; i++) { // Could also just use WaitForMultipleObjects DWORD wait_thread_value = WaitForSingleObject(blas_threads[i], 50); @@ -555,7 +533,7 @@ void goto_set_num_threads(int num_threads) if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER; - if (blas_server_avail && num_threads < blas_num_threads) { + if (blas_server_avail && num_threads < blas_num_threads) { LOCK_COMMAND(&server_lock); thread_target = num_threads; @@ -586,7 +564,7 @@ void goto_set_num_threads(int num_threads) thread_target = num_threads; //increased_threads = 1; - if (!blas_server_avail){ + if (!blas_server_avail) { // create the kickoff Event kickoff_event = CreateEvent(NULL, TRUE, FALSE, NULL); @@ -595,7 +573,7 @@ void goto_set_num_threads(int num_threads) blas_server_avail = 1; } - for(i = (blas_num_threads > 0) ? blas_num_threads - 1 : 0; i < num_threads - 1; i++){ + for(i = (blas_num_threads > 0) ? blas_num_threads - 1 : 0; i < num_threads - 1; i++) { //MT_TRACE("set_num_threads: creating thread [%d]\n", i); blas_threads[i] = CreateThread(NULL, 0, From 42cb567f0f9f6c8ef27558e5b61251b0805aae6d Mon Sep 17 00:00:00 2001 From: Mark Seminatore Date: Wed, 31 Jan 2024 13:24:28 -0800 Subject: [PATCH 3/5] more cleanup --- driver/others/blas_server_win32.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/driver/others/blas_server_win32.c b/driver/others/blas_server_win32.c index 68dde584b..ee6d08f8c 100644 --- a/driver/others/blas_server_win32.c +++ b/driver/others/blas_server_win32.c @@ -72,7 +72,10 @@ static HANDLE blas_threads [MAX_CPU_NUMBER]; static DWORD blas_threads_id[MAX_CPU_NUMBER]; static volatile int thread_target; // target num of live threads, volatile for cross-thread reads -static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ +// +// +// +static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb) { if (!(mode & BLAS_COMPLEX)) { #ifdef EXPRECISION @@ -195,8 +198,9 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ } } -// This is a main routine of threads. Each thread waits until job is -// queued. +// +// This is a main routine of threads. Each thread waits until job is queued. +// static DWORD WINAPI blas_thread_server(void *arg) { /* Thread identifier */ @@ -488,7 +492,7 @@ int exec_blas(BLASLONG num, blas_queue_t *queue) { // Shutdown procedure, but user don't have to call this routine. The // kernel automatically kill threads. -int BLASFUNC(blas_thread_shutdown)(void){ +int BLASFUNC(blas_thread_shutdown)(void) { int i; @@ -563,7 +567,7 @@ void goto_set_num_threads(int num_threads) thread_target = num_threads; - //increased_threads = 1; + //increased_threads = 1; if (!blas_server_avail) { // create the kickoff Event kickoff_event = CreateEvent(NULL, TRUE, FALSE, NULL); From 98c56a7314dbc0032152b7658c73c203124963f9 Mon Sep 17 00:00:00 2001 From: Mark Seminatore Date: Thu, 8 Feb 2024 13:50:15 -0800 Subject: [PATCH 4/5] more cleanup --- driver/others/blas_server_win32.c | 35 ++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/driver/others/blas_server_win32.c b/driver/others/blas_server_win32.c index ee6d08f8c..89ce9e656 100644 --- a/driver/others/blas_server_win32.c +++ b/driver/others/blas_server_win32.c @@ -73,7 +73,7 @@ static DWORD blas_threads_id[MAX_CPU_NUMBER]; static volatile int thread_target; // target num of live threads, volatile for cross-thread reads // -// +// Legacy code path // static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb) { @@ -369,11 +369,11 @@ int blas_thread_init(void) { return 0; } -/* - User can call one of two routines. - exec_blas_async ... immediately returns after jobs are queued. - exec_blas ... returns after jobs are finished. -*/ +// +// User can call one of two routines. +// exec_blas_async ... immediately returns after jobs are queued. +// exec_blas ... returns after jobs are finished. +// int exec_blas_async(BLASLONG pos, blas_queue_t *queue) { #if defined(SMP_SERVER) @@ -471,27 +471,32 @@ int exec_blas(BLASLONG num, blas_queue_t *queue) { if ((num <= 0) || (queue == NULL)) return 0; - if ((num > 1) && queue -> next) exec_blas_async(1, queue -> next); + if ((num > 1) && queue -> next) + exec_blas_async(1, queue -> next); routine = queue -> routine; if (queue -> mode & BLAS_LEGACY) { legacy_exec(routine, queue -> mode, queue -> args, queue -> sb); - } else + } else { if (queue -> mode & BLAS_PTHREAD) { void (*pthreadcompat)(void *) = queue -> routine; (pthreadcompat)(queue -> args); } else (routine)(queue -> args, queue -> range_m, queue -> range_n, - queue -> sa, queue -> sb, 0); + queue -> sa, queue -> sb, 0); + } - if ((num > 1) && queue -> next) exec_blas_async_wait(num - 1, queue -> next); + if ((num > 1) && queue -> next) + exec_blas_async_wait(num - 1, queue -> next); return 0; } +// // Shutdown procedure, but user don't have to call this routine. The // kernel automatically kill threads. +// int BLASFUNC(blas_thread_shutdown)(void) { int i; @@ -502,7 +507,7 @@ int BLASFUNC(blas_thread_shutdown)(void) { if (blas_server_avail) { - for(i = 0; i < blas_num_threads - 1; i++) { + for (i = 0; i < blas_num_threads - 1; i++) { // Could also just use WaitForMultipleObjects DWORD wait_thread_value = WaitForSingleObject(blas_threads[i], 50); @@ -524,6 +529,9 @@ int BLASFUNC(blas_thread_shutdown)(void) { return 0; } +// +// Legacy function to set numbef of threads +// void goto_set_num_threads(int num_threads) { long i; @@ -577,7 +585,7 @@ void goto_set_num_threads(int num_threads) blas_server_avail = 1; } - for(i = (blas_num_threads > 0) ? blas_num_threads - 1 : 0; i < num_threads - 1; i++) { + for (i = (blas_num_threads > 0) ? blas_num_threads - 1 : 0; i < num_threads - 1; i++) { //MT_TRACE("set_num_threads: creating thread [%d]\n", i); blas_threads[i] = CreateThread(NULL, 0, @@ -593,6 +601,9 @@ void goto_set_num_threads(int num_threads) blas_cpu_number = num_threads; } +// +// Openblas function to set thread count +// void openblas_set_num_threads(int num) { goto_set_num_threads(num); From 10548a0460d0b6abd160e69cd7ca727d41681584 Mon Sep 17 00:00:00 2001 From: Mark Seminatore Date: Mon, 12 Feb 2024 10:22:12 -0800 Subject: [PATCH 5/5] update contributors --- CONTRIBUTORS.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 493747052..8f7abc5f8 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -218,4 +218,6 @@ In chronological order: * [2022-08] Fix building from sources for QNX * Mark Seminatore - * [2023-11-09] Improve Windows threading performance scaling \ No newline at end of file + * [2023-11-09] Improve Windows threading performance scaling + * [2024-02-09] Introduce MT_TRACE facility and improve code consistency + \ No newline at end of file