diff --git a/driver/others/memory.c b/driver/others/memory.c index b7a681312..6f7a7db82 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -87,28 +87,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #endif -/* Memory buffer must fit two matrix subblocks of maximal size */ -#if BUFFER_SIZE < (SGEMM_DEFAULT_P * SGEMM_DEFAULT_Q * 4 * 2) || \ - BUFFER_SIZE < (SGEMM_DEFAULT_P * SGEMM_DEFAULT_R * 4 * 2) || \ - BUFFER_SIZE < (SGEMM_DEFAULT_R * SGEMM_DEFAULT_Q * 4 * 2) -#error BUFFER_SIZE is too small for P, Q, and R of SGEMM -#endif -#if BUFFER_SIZE < (DGEMM_DEFAULT_P * DGEMM_DEFAULT_Q * 8 * 2) || \ - BUFFER_SIZE < (DGEMM_DEFAULT_P * DGEMM_DEFAULT_R * 8 * 2) || \ - BUFFER_SIZE < (DGEMM_DEFAULT_R * DGEMM_DEFAULT_Q * 8 * 2) -#error BUFFER_SIZE is too small for P, Q, and R of DGEMM -#endif -#if BUFFER_SIZE < (CGEMM_DEFAULT_P * CGEMM_DEFAULT_Q * 8 * 2) || \ - BUFFER_SIZE < (CGEMM_DEFAULT_P * CGEMM_DEFAULT_R * 8 * 2) || \ - BUFFER_SIZE < (CGEMM_DEFAULT_R * CGEMM_DEFAULT_Q * 8 * 2) -#error BUFFER_SIZE is too small for P, Q, and R of CGEMM -#endif -#if BUFFER_SIZE < (ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_Q * 16 * 2) || \ - BUFFER_SIZE < (ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_R * 16 * 2) || \ - BUFFER_SIZE < (ZGEMM_DEFAULT_R * ZGEMM_DEFAULT_Q * 16 * 2) -#error BUFFER_SIZE is too small for P, Q, and R of ZGEMM -#endif - #if defined(COMPILE_TLS) #include @@ -151,7 +129,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #endif -#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) +#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) #include #include #endif @@ -214,74 +192,51 @@ void goto_set_num_threads(int num_threads) {}; #else -#if defined(OS_LINUX) || defined(OS_SUNOS) +#if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD) #ifndef NO_AFFINITY int get_num_procs(void); #else int get_num_procs(void) { static int nums = 0; - cpu_set_t cpuset,*cpusetp; - size_t size; - int ret; - -#if defined(__GLIBC_PREREQ) -#if !__GLIBC_PREREQ(2, 7) - int i; -#if !__GLIBC_PREREQ(2, 6) - int n; -#endif -#endif -#endif +cpu_set_t *cpusetp; +size_t size; +int ret; +int i,n; if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); #if !defined(OS_LINUX) - return nums; + return nums; #endif #if !defined(__GLIBC_PREREQ) - return nums; + return nums; #else #if !__GLIBC_PREREQ(2, 3) - return nums; + return nums; #endif #if !__GLIBC_PREREQ(2, 7) - ret = sched_getaffinity(0,sizeof(cpuset), &cpuset); + ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); if (ret!=0) return nums; n=0; #if !__GLIBC_PREREQ(2, 6) for (i=0;i= CPU_SETSIZE) { - cpusetp = CPU_ALLOC(nums); - if (cpusetp == NULL) { - return nums; - } - size = CPU_ALLOC_SIZE(nums); - ret = sched_getaffinity(0,size,cpusetp); - if (ret!=0) { - CPU_FREE(cpusetp); - return nums; - } - ret = CPU_COUNT_S(size,cpusetp); - if (ret > 0 && ret < nums) nums = ret; - CPU_FREE(cpusetp); - return nums; - } else { - ret = sched_getaffinity(0,sizeof(cpuset),&cpuset); - if (ret!=0) { - return nums; - } - ret = CPU_COUNT(&cpuset); - if (ret > 0 && ret < nums) nums = ret; - return nums; - } + cpusetp = CPU_ALLOC(nums); + if (cpusetp == NULL) return nums; + size = CPU_ALLOC_SIZE(nums); + ret = sched_getaffinity(0,size,cpusetp); + if (ret!=0) return nums; + ret = CPU_COUNT_S(size,cpusetp); + if (ret > 0 && ret < nums) nums = ret; + CPU_FREE(cpusetp); + return nums; #endif #endif } @@ -334,7 +289,7 @@ int get_num_procs(void) { #endif -#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) +#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) int get_num_procs(void) { @@ -426,7 +381,7 @@ extern int openblas_goto_num_threads_env(); extern int openblas_omp_num_threads_env(); int blas_get_cpu_number(void){ -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) int max_num; #endif int blas_goto_num = 0; @@ -434,7 +389,7 @@ int blas_get_cpu_number(void){ if (blas_num_threads) return blas_num_threads; -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) max_num = get_num_procs(); #endif @@ -458,7 +413,7 @@ int blas_get_cpu_number(void){ else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; else blas_num_threads = MAX_CPU_NUMBER; -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) if (blas_num_threads > max_num) blas_num_threads = max_num; #endif @@ -844,7 +799,7 @@ static void *alloc_qalloc(void *address){ static void alloc_windows_free(struct alloc_t *alloc_info){ - VirtualFree(alloc_info, 0, MEM_RELEASE); + VirtualFree(alloc_info, allocation_block_size, MEM_DECOMMIT); } @@ -957,7 +912,7 @@ static void alloc_hugetlb_free(struct alloc_t *alloc_info){ #ifdef OS_WINDOWS - VirtualFree(alloc_info, 0, MEM_LARGE_PAGES | MEM_RELEASE); + VirtualFree(alloc_info, allocation_block_size, MEM_LARGE_PAGES | MEM_DECOMMIT); #endif @@ -1118,6 +1073,11 @@ static volatile int memory_initialized = 0; } free(table); } +#if defined(OS_WINDOWS) + TlsFree(local_storage_key); +#else + pthread_key_delete(local_storage_key); +#endif } static void blas_memory_init(){ @@ -1335,13 +1295,6 @@ void blas_memory_free_nolock(void * map_address) { free(map_address); } -#ifdef SMP -void blas_thread_memory_cleanup(void) { - blas_memory_cleanup((void*)get_memory_table()); -} -#endif - - void blas_shutdown(void){ #ifdef SMP BLASFUNC(blas_thread_shutdown)(); @@ -1351,7 +1304,7 @@ void blas_shutdown(void){ /* Only cleanupIf we were built for threading and TLS was initialized */ if (local_storage_key) #endif - blas_thread_memory_cleanup(); + blas_memory_cleanup((void*)get_memory_table()); #ifdef SEEK_ADDRESS base_address = 0UL; @@ -1538,14 +1491,6 @@ void DESTRUCTOR gotoblas_quit(void) { blas_shutdown(); -#if defined(SMP) -#if defined(OS_WINDOWS) - TlsFree(local_storage_key); -#else - pthread_key_delete(local_storage_key); -#endif -#endif - #ifdef PROFILE moncontrol (0); #endif @@ -1581,7 +1526,7 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReser break; case DLL_THREAD_DETACH: #if defined(SMP) - blas_thread_memory_cleanup(); + blas_memory_cleanup((void*)get_memory_table()); #endif break; case DLL_PROCESS_DETACH: @@ -1644,7 +1589,6 @@ void gotoblas_dummy_for_PGI(void) { gotoblas_init(); gotoblas_quit(); -#if __PGIC__ < 19 #if 0 asm ("\t.section\t.ctors,\"aw\",@progbits; .align 8; .quad gotoblas_init; .section .text"); asm ("\t.section\t.dtors,\"aw\",@progbits; .align 8; .quad gotoblas_quit; .section .text"); @@ -1652,16 +1596,13 @@ void gotoblas_dummy_for_PGI(void) { asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text"); asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text"); #endif -#endif } #endif #else -/* USE_TLS / COMPILE_TLS not set */ - #include -#if defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT) +#ifdef OS_WINDOWS #define ALLOC_WINDOWS #ifndef MEM_LARGE_PAGES #define MEM_LARGE_PAGES 0x20000000 @@ -1675,7 +1616,7 @@ void gotoblas_dummy_for_PGI(void) { #include #include -#if !defined(OS_WINDOWS) || defined(OS_CYGWIN_NT) +#ifndef OS_WINDOWS #include #ifndef NO_SYSV_IPC #include @@ -1695,7 +1636,7 @@ void gotoblas_dummy_for_PGI(void) { #include #endif -#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) +#if defined(OS_FREEBSD) || defined(OS_DARWIN) #include #include #endif @@ -1734,12 +1675,9 @@ void gotoblas_dummy_for_PGI(void) { #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) #define CONSTRUCTOR __attribute__ ((constructor)) #define DESTRUCTOR __attribute__ ((destructor)) -#elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900)) +#else #define CONSTRUCTOR __attribute__ ((constructor(101))) #define DESTRUCTOR __attribute__ ((destructor(101))) -#else -#define CONSTRUCTOR __attribute__ ((constructor)) -#define DESTRUCTOR __attribute__ ((destructor)) #endif #ifdef DYNAMIC_ARCH @@ -1758,75 +1696,50 @@ void goto_set_num_threads(int num_threads) {}; #else -#if defined(OS_LINUX) || defined(OS_SUNOS) +#if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD) #ifndef NO_AFFINITY int get_num_procs(void); #else int get_num_procs(void) { - static int nums = 0; - cpu_set_t cpuset,*cpusetp; - size_t size; - int ret; - -#if defined(__GLIBC_PREREQ) -#if !__GLIBC_PREREQ(2, 7) - int i; -#if !__GLIBC_PREREQ(2, 6) - int n; -#endif -#endif -#endif +cpu_set_t *cpusetp; +size_t size; +int ret; +int i,n; if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); #if !defined(OS_LINUX) - return nums; + return nums; #endif #if !defined(__GLIBC_PREREQ) - return nums; + return nums; #else #if !__GLIBC_PREREQ(2, 3) - return nums; + return nums; #endif #if !__GLIBC_PREREQ(2, 7) - ret = sched_getaffinity(0,sizeof(cpuset), &cpuset); + ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); if (ret!=0) return nums; n=0; #if !__GLIBC_PREREQ(2, 6) for (i=0;i= CPU_SETSIZE) { - cpusetp = CPU_ALLOC(nums); - if (cpusetp == NULL) { - return nums; - } - size = CPU_ALLOC_SIZE(nums); - ret = sched_getaffinity(0,size,cpusetp); - if (ret!=0) { - CPU_FREE(cpusetp); - return nums; - } - ret = CPU_COUNT_S(size,cpusetp); - if (ret > 0 && ret < nums) nums = ret; - CPU_FREE(cpusetp); - return nums; - } else { - ret = sched_getaffinity(0,sizeof(cpuset),&cpuset); - if (ret!=0) { - return nums; - } - ret = CPU_COUNT(&cpuset); - if (ret > 0 && ret < nums) nums = ret; - return nums; - } + cpusetp = CPU_ALLOC(nums); + if (cpusetp == NULL) return nums; + size = CPU_ALLOC_SIZE(nums); + ret = sched_getaffinity(0,size,cpusetp); + if (ret!=0) return nums; + nums = CPU_COUNT_S(size,cpusetp); + CPU_FREE(cpusetp); + return nums; #endif #endif } @@ -1840,7 +1753,7 @@ int get_num_procs(void) { return nums; } #endif - + #ifdef OS_HAIKU int get_num_procs(void) { static int nums = 0; @@ -1877,7 +1790,7 @@ int get_num_procs(void) { #endif -#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) +#if defined(OS_FREEBSD) int get_num_procs(void) { @@ -1954,7 +1867,7 @@ void openblas_fork_handler() // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035 // In the mean time build with USE_OPENMP=0 or link against another // implementation of OpenMP. -#if !((defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)) || defined(OS_ANDROID)) && defined(SMP_SERVER) +#if !(defined(OS_WINDOWS) || defined(OS_ANDROID)) && defined(SMP_SERVER) int err; err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, NULL); if(err != 0) @@ -1967,7 +1880,7 @@ extern int openblas_goto_num_threads_env(); extern int openblas_omp_num_threads_env(); int blas_get_cpu_number(void){ -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) int max_num; #endif int blas_goto_num = 0; @@ -1975,11 +1888,11 @@ int blas_get_cpu_number(void){ if (blas_num_threads) return blas_num_threads; -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) max_num = get_num_procs(); #endif - // blas_goto_num = 0; + blas_goto_num = 0; #ifndef USE_OPENMP blas_goto_num=openblas_num_threads_env(); if (blas_goto_num < 0) blas_goto_num = 0; @@ -1991,7 +1904,7 @@ int blas_get_cpu_number(void){ #endif - // blas_omp_num = 0; + blas_omp_num = 0; blas_omp_num=openblas_omp_num_threads_env(); if (blas_omp_num < 0) blas_omp_num = 0; @@ -1999,7 +1912,7 @@ int blas_get_cpu_number(void){ else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; else blas_num_threads = MAX_CPU_NUMBER; -#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) if (blas_num_threads > max_num) blas_num_threads = max_num; #endif @@ -2063,12 +1976,8 @@ static BLASULONG alloc_lock = 0UL; static void alloc_mmap_free(struct release_t *release){ -if (!release->address) return; - if (munmap(release -> address, BUFFER_SIZE)) { - int errsv=errno; - perror("OpenBLAS : munmap failed:"); - printf("error code=%d,\trelease->address=%lx\n",errsv,release->address); + printf("OpenBLAS : munmap failed\n"); } } @@ -2090,21 +1999,11 @@ static void *alloc_mmap(void *address){ } if (map_address != (void *)-1) { -#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); -#endif release_info[release_pos].address = map_address; release_info[release_pos].func = alloc_mmap_free; release_pos ++; -#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); -#endif - } else { -#ifdef DEBUG - int errsv=errno; - perror("OpenBLAS : mmap failed:"); - printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); -#endif } #ifdef OS_LINUX @@ -2246,18 +2145,14 @@ static void *alloc_mmap(void *address){ #if defined(OS_LINUX) && !defined(NO_WARMUP) } #endif + LOCK_COMMAND(&alloc_lock); if (map_address != (void *)-1) { -#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) - LOCK_COMMAND(&alloc_lock); -#endif release_info[release_pos].address = map_address; release_info[release_pos].func = alloc_mmap_free; release_pos ++; -#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) - UNLOCK_COMMAND(&alloc_lock); -#endif } + UNLOCK_COMMAND(&alloc_lock); return map_address; } @@ -2332,7 +2227,7 @@ static void *alloc_qalloc(void *address){ static void alloc_windows_free(struct release_t *release){ - VirtualFree(release -> address, 0, MEM_RELEASE); + VirtualFree(release -> address, BUFFER_SIZE, MEM_DECOMMIT); } @@ -2454,7 +2349,7 @@ static void alloc_hugetlb_free(struct release_t *release){ #ifdef OS_WINDOWS - VirtualFree(release -> address, 0, MEM_LARGE_PAGES | MEM_RELEASE); + VirtualFree(release -> address, BUFFER_SIZE, MEM_LARGE_PAGES | MEM_DECOMMIT); #endif @@ -2625,7 +2520,7 @@ void *blas_memory_alloc(int procpos){ int position; #if defined(WHEREAMI) && !defined(USE_OPENMP) - int mypos = 0; + int mypos; #endif void *map_address; @@ -2656,11 +2551,6 @@ void *blas_memory_alloc(int procpos){ NULL, }; void *(**func)(void *address); - -#if defined(USE_OPENMP) - if (!memory_initialized) { -#endif - LOCK_COMMAND(&alloc_lock); if (!memory_initialized) { @@ -2696,9 +2586,6 @@ void *blas_memory_alloc(int procpos){ } UNLOCK_COMMAND(&alloc_lock); -#if defined(USE_OPENMP) - } -#endif #ifdef DEBUG printf("Alloc Start ...\n"); @@ -2713,17 +2600,13 @@ void *blas_memory_alloc(int procpos){ do { if (!memory[position].used && (memory[position].pos == mypos)) { -#if defined(SMP) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); -#else - blas_lock(&memory[position].lock); -#endif +// blas_lock(&memory[position].lock); + if (!memory[position].used) goto allocation; -#if defined(SMP) && !defined(USE_OPENMP) + UNLOCK_COMMAND(&alloc_lock); -#else - blas_unlock(&memory[position].lock); -#endif +// blas_unlock(&memory[position].lock); } position ++; @@ -2735,26 +2618,21 @@ void *blas_memory_alloc(int procpos){ position = 0; -#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); -#endif do { -#if defined(USE_OPENMP) - if (!memory[position].used) { - blas_lock(&memory[position].lock); -#endif +/* if (!memory[position].used) { */ +/* blas_lock(&memory[position].lock);*/ + if (!memory[position].used) goto allocation; -#if defined(USE_OPENMP) - blas_unlock(&memory[position].lock); - } -#endif +/* blas_unlock(&memory[position].lock);*/ +/* } */ + position ++; } while (position < NUM_BUFFERS); -#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) - UNLOCK_COMMAND(&alloc_lock); -#endif + UNLOCK_COMMAND(&alloc_lock); + goto error; allocation : @@ -2764,11 +2642,10 @@ void *blas_memory_alloc(int procpos){ #endif memory[position].used = 1; -#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) + UNLOCK_COMMAND(&alloc_lock); -#else - blas_unlock(&memory[position].lock); -#endif +/* blas_unlock(&memory[position].lock);*/ + if (!memory[position].addr) { do { #ifdef DEBUG @@ -2785,7 +2662,7 @@ void *blas_memory_alloc(int procpos){ #ifdef ALLOC_DEVICEDRIVER if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { - fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n"); + fprintf(stderr, "OpenBLAS Warning ... Physically contigous allocation was failed.\n"); } #endif @@ -2813,13 +2690,9 @@ void *blas_memory_alloc(int procpos){ } while ((BLASLONG)map_address == -1); -#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); -#endif memory[position].addr = map_address; -#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); -#endif #ifdef DEBUG printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position); @@ -2873,9 +2746,8 @@ void blas_memory_free(void *free_area){ #endif position = 0; -#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); -#endif + while ((position < NUM_BUFFERS) && (memory[position].addr != free_area)) position++; @@ -2889,9 +2761,7 @@ void blas_memory_free(void *free_area){ WMB; memory[position].used = 0; -#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); -#endif #ifdef DEBUG printf("Unmap Succeeded.\n\n"); @@ -2906,9 +2776,8 @@ void blas_memory_free(void *free_area){ for (position = 0; position < NUM_BUFFERS; position++) printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used); #endif -#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); -#endif + return; } @@ -2958,7 +2827,7 @@ void blas_shutdown(void){ #if defined(OS_LINUX) && !defined(NO_WARMUP) -#if defined(SMP) || defined(USE_LOCKING) +#ifdef SMP #if defined(USE_PTHREAD_LOCK) static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER; #elif defined(USE_PTHREAD_SPINLOCK) @@ -2983,7 +2852,7 @@ static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, if (hot_alloc != 2) { #endif -#if defined(SMP) || defined(USE_LOCKING) +#ifdef SMP LOCK_COMMAND(&init_lock); #endif @@ -2993,7 +2862,7 @@ static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, size -= PAGESIZE; } -#if defined(SMP) || defined(USE_LOCKING) +#ifdef SMP UNLOCK_COMMAND(&init_lock); #endif @@ -3226,7 +3095,7 @@ void gotoblas_dummy_for_PGI(void) { gotoblas_init(); gotoblas_quit(); -#if __PGIC__ < 19 + #if 0 asm ("\t.section\t.ctors,\"aw\",@progbits; .align 8; .quad gotoblas_init; .section .text"); asm ("\t.section\t.dtors,\"aw\",@progbits; .align 8; .quad gotoblas_quit; .section .text"); @@ -3234,7 +3103,6 @@ void gotoblas_dummy_for_PGI(void) { asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text"); asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text"); #endif -#endif } #endif