print the current values when buffer_size is too small

This commit is contained in:
Martin Kroeker 2020-04-02 23:27:10 +02:00 committed by GitHub
parent f03b667dd2
commit 07d59c0455
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 93 additions and 225 deletions

View File

@ -87,28 +87,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
/* Memory buffer must fit two matrix subblocks of maximal size */
#if BUFFER_SIZE < (SGEMM_DEFAULT_P * SGEMM_DEFAULT_Q * 4 * 2) || \
BUFFER_SIZE < (SGEMM_DEFAULT_P * SGEMM_DEFAULT_R * 4 * 2) || \
BUFFER_SIZE < (SGEMM_DEFAULT_R * SGEMM_DEFAULT_Q * 4 * 2)
#error BUFFER_SIZE is too small for P, Q, and R of SGEMM
#endif
#if BUFFER_SIZE < (DGEMM_DEFAULT_P * DGEMM_DEFAULT_Q * 8 * 2) || \
BUFFER_SIZE < (DGEMM_DEFAULT_P * DGEMM_DEFAULT_R * 8 * 2) || \
BUFFER_SIZE < (DGEMM_DEFAULT_R * DGEMM_DEFAULT_Q * 8 * 2)
#error BUFFER_SIZE is too small for P, Q, and R of DGEMM
#endif
#if BUFFER_SIZE < (CGEMM_DEFAULT_P * CGEMM_DEFAULT_Q * 8 * 2) || \
BUFFER_SIZE < (CGEMM_DEFAULT_P * CGEMM_DEFAULT_R * 8 * 2) || \
BUFFER_SIZE < (CGEMM_DEFAULT_R * CGEMM_DEFAULT_Q * 8 * 2)
#error BUFFER_SIZE is too small for P, Q, and R of CGEMM
#endif
#if BUFFER_SIZE < (ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_Q * 16 * 2) || \
BUFFER_SIZE < (ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_R * 16 * 2) || \
BUFFER_SIZE < (ZGEMM_DEFAULT_R * ZGEMM_DEFAULT_Q * 16 * 2)
#error BUFFER_SIZE is too small for P, Q, and R of ZGEMM
#endif
#if defined(COMPILE_TLS) #if defined(COMPILE_TLS)
#include <errno.h> #include <errno.h>
@ -151,7 +129,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h> #include <unistd.h>
#endif #endif
#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN)
#include <sys/sysctl.h> #include <sys/sysctl.h>
#include <sys/resource.h> #include <sys/resource.h>
#endif #endif
@ -214,74 +192,51 @@ void goto_set_num_threads(int num_threads) {};
#else #else
#if defined(OS_LINUX) || defined(OS_SUNOS) #if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD)
#ifndef NO_AFFINITY #ifndef NO_AFFINITY
int get_num_procs(void); int get_num_procs(void);
#else #else
int get_num_procs(void) { int get_num_procs(void) {
static int nums = 0; static int nums = 0;
cpu_set_t cpuset,*cpusetp; cpu_set_t *cpusetp;
size_t size; size_t size;
int ret; int ret;
int i,n;
#if defined(__GLIBC_PREREQ)
#if !__GLIBC_PREREQ(2, 7)
int i;
#if !__GLIBC_PREREQ(2, 6)
int n;
#endif
#endif
#endif
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
#if !defined(OS_LINUX) #if !defined(OS_LINUX)
return nums; return nums;
#endif #endif
#if !defined(__GLIBC_PREREQ) #if !defined(__GLIBC_PREREQ)
return nums; return nums;
#else #else
#if !__GLIBC_PREREQ(2, 3) #if !__GLIBC_PREREQ(2, 3)
return nums; return nums;
#endif #endif
#if !__GLIBC_PREREQ(2, 7) #if !__GLIBC_PREREQ(2, 7)
ret = sched_getaffinity(0,sizeof(cpuset), &cpuset); ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp);
if (ret!=0) return nums; if (ret!=0) return nums;
n=0; n=0;
#if !__GLIBC_PREREQ(2, 6) #if !__GLIBC_PREREQ(2, 6)
for (i=0;i<nums;i++) for (i=0;i<nums;i++)
if (CPU_ISSET(i,&cpuset)) n++; if (CPU_ISSET(i,cpusetp)) n++;
nums=n; nums=n;
#else #else
nums = CPU_COUNT(sizeof(cpuset),&cpuset); nums = CPU_COUNT(sizeof(cpu_set_t),cpusetp);
#endif #endif
return nums; return nums;
#else #else
if (nums >= CPU_SETSIZE) { cpusetp = CPU_ALLOC(nums);
cpusetp = CPU_ALLOC(nums); if (cpusetp == NULL) return nums;
if (cpusetp == NULL) { size = CPU_ALLOC_SIZE(nums);
return nums; ret = sched_getaffinity(0,size,cpusetp);
} if (ret!=0) return nums;
size = CPU_ALLOC_SIZE(nums); ret = CPU_COUNT_S(size,cpusetp);
ret = sched_getaffinity(0,size,cpusetp); if (ret > 0 && ret < nums) nums = ret;
if (ret!=0) { CPU_FREE(cpusetp);
CPU_FREE(cpusetp); return nums;
return nums;
}
ret = CPU_COUNT_S(size,cpusetp);
if (ret > 0 && ret < nums) nums = ret;
CPU_FREE(cpusetp);
return nums;
} else {
ret = sched_getaffinity(0,sizeof(cpuset),&cpuset);
if (ret!=0) {
return nums;
}
ret = CPU_COUNT(&cpuset);
if (ret > 0 && ret < nums) nums = ret;
return nums;
}
#endif #endif
#endif #endif
} }
@ -334,7 +289,7 @@ int get_num_procs(void) {
#endif #endif
#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY)
int get_num_procs(void) { int get_num_procs(void) {
@ -426,7 +381,7 @@ extern int openblas_goto_num_threads_env();
extern int openblas_omp_num_threads_env(); extern int openblas_omp_num_threads_env();
int blas_get_cpu_number(void){ int blas_get_cpu_number(void){
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
int max_num; int max_num;
#endif #endif
int blas_goto_num = 0; int blas_goto_num = 0;
@ -434,7 +389,7 @@ int blas_get_cpu_number(void){
if (blas_num_threads) return blas_num_threads; if (blas_num_threads) return blas_num_threads;
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
max_num = get_num_procs(); max_num = get_num_procs();
#endif #endif
@ -458,7 +413,7 @@ int blas_get_cpu_number(void){
else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; else if (blas_omp_num > 0) blas_num_threads = blas_omp_num;
else blas_num_threads = MAX_CPU_NUMBER; else blas_num_threads = MAX_CPU_NUMBER;
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
if (blas_num_threads > max_num) blas_num_threads = max_num; if (blas_num_threads > max_num) blas_num_threads = max_num;
#endif #endif
@ -844,7 +799,7 @@ static void *alloc_qalloc(void *address){
static void alloc_windows_free(struct alloc_t *alloc_info){ static void alloc_windows_free(struct alloc_t *alloc_info){
VirtualFree(alloc_info, 0, MEM_RELEASE); VirtualFree(alloc_info, allocation_block_size, MEM_DECOMMIT);
} }
@ -957,7 +912,7 @@ static void alloc_hugetlb_free(struct alloc_t *alloc_info){
#ifdef OS_WINDOWS #ifdef OS_WINDOWS
VirtualFree(alloc_info, 0, MEM_LARGE_PAGES | MEM_RELEASE); VirtualFree(alloc_info, allocation_block_size, MEM_LARGE_PAGES | MEM_DECOMMIT);
#endif #endif
@ -1118,6 +1073,11 @@ static volatile int memory_initialized = 0;
} }
free(table); free(table);
} }
#if defined(OS_WINDOWS)
TlsFree(local_storage_key);
#else
pthread_key_delete(local_storage_key);
#endif
} }
static void blas_memory_init(){ static void blas_memory_init(){
@ -1335,13 +1295,6 @@ void blas_memory_free_nolock(void * map_address) {
free(map_address); free(map_address);
} }
#ifdef SMP
void blas_thread_memory_cleanup(void) {
blas_memory_cleanup((void*)get_memory_table());
}
#endif
void blas_shutdown(void){ void blas_shutdown(void){
#ifdef SMP #ifdef SMP
BLASFUNC(blas_thread_shutdown)(); BLASFUNC(blas_thread_shutdown)();
@ -1351,7 +1304,7 @@ void blas_shutdown(void){
/* Only cleanupIf we were built for threading and TLS was initialized */ /* Only cleanupIf we were built for threading and TLS was initialized */
if (local_storage_key) if (local_storage_key)
#endif #endif
blas_thread_memory_cleanup(); blas_memory_cleanup((void*)get_memory_table());
#ifdef SEEK_ADDRESS #ifdef SEEK_ADDRESS
base_address = 0UL; base_address = 0UL;
@ -1538,14 +1491,6 @@ void DESTRUCTOR gotoblas_quit(void) {
blas_shutdown(); blas_shutdown();
#if defined(SMP)
#if defined(OS_WINDOWS)
TlsFree(local_storage_key);
#else
pthread_key_delete(local_storage_key);
#endif
#endif
#ifdef PROFILE #ifdef PROFILE
moncontrol (0); moncontrol (0);
#endif #endif
@ -1581,7 +1526,7 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReser
break; break;
case DLL_THREAD_DETACH: case DLL_THREAD_DETACH:
#if defined(SMP) #if defined(SMP)
blas_thread_memory_cleanup(); blas_memory_cleanup((void*)get_memory_table());
#endif #endif
break; break;
case DLL_PROCESS_DETACH: case DLL_PROCESS_DETACH:
@ -1644,7 +1589,6 @@ void gotoblas_dummy_for_PGI(void) {
gotoblas_init(); gotoblas_init();
gotoblas_quit(); gotoblas_quit();
#if __PGIC__ < 19
#if 0 #if 0
asm ("\t.section\t.ctors,\"aw\",@progbits; .align 8; .quad gotoblas_init; .section .text"); asm ("\t.section\t.ctors,\"aw\",@progbits; .align 8; .quad gotoblas_init; .section .text");
asm ("\t.section\t.dtors,\"aw\",@progbits; .align 8; .quad gotoblas_quit; .section .text"); asm ("\t.section\t.dtors,\"aw\",@progbits; .align 8; .quad gotoblas_quit; .section .text");
@ -1652,16 +1596,13 @@ void gotoblas_dummy_for_PGI(void) {
asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text"); asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text");
asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text"); asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text");
#endif #endif
#endif
} }
#endif #endif
#else #else
/* USE_TLS / COMPILE_TLS not set */
#include <errno.h> #include <errno.h>
#if defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT) #ifdef OS_WINDOWS
#define ALLOC_WINDOWS #define ALLOC_WINDOWS
#ifndef MEM_LARGE_PAGES #ifndef MEM_LARGE_PAGES
#define MEM_LARGE_PAGES 0x20000000 #define MEM_LARGE_PAGES 0x20000000
@ -1675,7 +1616,7 @@ void gotoblas_dummy_for_PGI(void) {
#include <stdio.h> #include <stdio.h>
#include <fcntl.h> #include <fcntl.h>
#if !defined(OS_WINDOWS) || defined(OS_CYGWIN_NT) #ifndef OS_WINDOWS
#include <sys/mman.h> #include <sys/mman.h>
#ifndef NO_SYSV_IPC #ifndef NO_SYSV_IPC
#include <sys/shm.h> #include <sys/shm.h>
@ -1695,7 +1636,7 @@ void gotoblas_dummy_for_PGI(void) {
#include <sys/resource.h> #include <sys/resource.h>
#endif #endif
#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) #if defined(OS_FREEBSD) || defined(OS_DARWIN)
#include <sys/sysctl.h> #include <sys/sysctl.h>
#include <sys/resource.h> #include <sys/resource.h>
#endif #endif
@ -1734,12 +1675,9 @@ void gotoblas_dummy_for_PGI(void) {
#elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC)
#define CONSTRUCTOR __attribute__ ((constructor)) #define CONSTRUCTOR __attribute__ ((constructor))
#define DESTRUCTOR __attribute__ ((destructor)) #define DESTRUCTOR __attribute__ ((destructor))
#elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900)) #else
#define CONSTRUCTOR __attribute__ ((constructor(101))) #define CONSTRUCTOR __attribute__ ((constructor(101)))
#define DESTRUCTOR __attribute__ ((destructor(101))) #define DESTRUCTOR __attribute__ ((destructor(101)))
#else
#define CONSTRUCTOR __attribute__ ((constructor))
#define DESTRUCTOR __attribute__ ((destructor))
#endif #endif
#ifdef DYNAMIC_ARCH #ifdef DYNAMIC_ARCH
@ -1758,75 +1696,50 @@ void goto_set_num_threads(int num_threads) {};
#else #else
#if defined(OS_LINUX) || defined(OS_SUNOS) #if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD)
#ifndef NO_AFFINITY #ifndef NO_AFFINITY
int get_num_procs(void); int get_num_procs(void);
#else #else
int get_num_procs(void) { int get_num_procs(void) {
static int nums = 0; static int nums = 0;
cpu_set_t cpuset,*cpusetp; cpu_set_t *cpusetp;
size_t size; size_t size;
int ret; int ret;
int i,n;
#if defined(__GLIBC_PREREQ)
#if !__GLIBC_PREREQ(2, 7)
int i;
#if !__GLIBC_PREREQ(2, 6)
int n;
#endif
#endif
#endif
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
#if !defined(OS_LINUX) #if !defined(OS_LINUX)
return nums; return nums;
#endif #endif
#if !defined(__GLIBC_PREREQ) #if !defined(__GLIBC_PREREQ)
return nums; return nums;
#else #else
#if !__GLIBC_PREREQ(2, 3) #if !__GLIBC_PREREQ(2, 3)
return nums; return nums;
#endif #endif
#if !__GLIBC_PREREQ(2, 7) #if !__GLIBC_PREREQ(2, 7)
ret = sched_getaffinity(0,sizeof(cpuset), &cpuset); ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp);
if (ret!=0) return nums; if (ret!=0) return nums;
n=0; n=0;
#if !__GLIBC_PREREQ(2, 6) #if !__GLIBC_PREREQ(2, 6)
for (i=0;i<nums;i++) for (i=0;i<nums;i++)
if (CPU_ISSET(i,&cpuset)) n++; if (CPU_ISSET(i,cpusetp)) n++;
nums=n; nums=n;
#else #else
nums = CPU_COUNT(sizeof(cpuset),&cpuset); nums = CPU_COUNT(sizeof(cpu_set_t),cpusetp);
#endif #endif
return nums; return nums;
#else #else
if (nums >= CPU_SETSIZE) { cpusetp = CPU_ALLOC(nums);
cpusetp = CPU_ALLOC(nums); if (cpusetp == NULL) return nums;
if (cpusetp == NULL) { size = CPU_ALLOC_SIZE(nums);
return nums; ret = sched_getaffinity(0,size,cpusetp);
} if (ret!=0) return nums;
size = CPU_ALLOC_SIZE(nums); nums = CPU_COUNT_S(size,cpusetp);
ret = sched_getaffinity(0,size,cpusetp); CPU_FREE(cpusetp);
if (ret!=0) { return nums;
CPU_FREE(cpusetp);
return nums;
}
ret = CPU_COUNT_S(size,cpusetp);
if (ret > 0 && ret < nums) nums = ret;
CPU_FREE(cpusetp);
return nums;
} else {
ret = sched_getaffinity(0,sizeof(cpuset),&cpuset);
if (ret!=0) {
return nums;
}
ret = CPU_COUNT(&cpuset);
if (ret > 0 && ret < nums) nums = ret;
return nums;
}
#endif #endif
#endif #endif
} }
@ -1877,7 +1790,7 @@ int get_num_procs(void) {
#endif #endif
#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) #if defined(OS_FREEBSD)
int get_num_procs(void) { int get_num_procs(void) {
@ -1954,7 +1867,7 @@ void openblas_fork_handler()
// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035 // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035
// In the mean time build with USE_OPENMP=0 or link against another // In the mean time build with USE_OPENMP=0 or link against another
// implementation of OpenMP. // implementation of OpenMP.
#if !((defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)) || defined(OS_ANDROID)) && defined(SMP_SERVER) #if !(defined(OS_WINDOWS) || defined(OS_ANDROID)) && defined(SMP_SERVER)
int err; int err;
err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, NULL); err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, NULL);
if(err != 0) if(err != 0)
@ -1967,7 +1880,7 @@ extern int openblas_goto_num_threads_env();
extern int openblas_omp_num_threads_env(); extern int openblas_omp_num_threads_env();
int blas_get_cpu_number(void){ int blas_get_cpu_number(void){
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID)
int max_num; int max_num;
#endif #endif
int blas_goto_num = 0; int blas_goto_num = 0;
@ -1975,11 +1888,11 @@ int blas_get_cpu_number(void){
if (blas_num_threads) return blas_num_threads; if (blas_num_threads) return blas_num_threads;
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID)
max_num = get_num_procs(); max_num = get_num_procs();
#endif #endif
// blas_goto_num = 0; blas_goto_num = 0;
#ifndef USE_OPENMP #ifndef USE_OPENMP
blas_goto_num=openblas_num_threads_env(); blas_goto_num=openblas_num_threads_env();
if (blas_goto_num < 0) blas_goto_num = 0; if (blas_goto_num < 0) blas_goto_num = 0;
@ -1991,7 +1904,7 @@ int blas_get_cpu_number(void){
#endif #endif
// blas_omp_num = 0; blas_omp_num = 0;
blas_omp_num=openblas_omp_num_threads_env(); blas_omp_num=openblas_omp_num_threads_env();
if (blas_omp_num < 0) blas_omp_num = 0; if (blas_omp_num < 0) blas_omp_num = 0;
@ -1999,7 +1912,7 @@ int blas_get_cpu_number(void){
else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; else if (blas_omp_num > 0) blas_num_threads = blas_omp_num;
else blas_num_threads = MAX_CPU_NUMBER; else blas_num_threads = MAX_CPU_NUMBER;
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID)
if (blas_num_threads > max_num) blas_num_threads = max_num; if (blas_num_threads > max_num) blas_num_threads = max_num;
#endif #endif
@ -2063,12 +1976,8 @@ static BLASULONG alloc_lock = 0UL;
static void alloc_mmap_free(struct release_t *release){ static void alloc_mmap_free(struct release_t *release){
if (!release->address) return;
if (munmap(release -> address, BUFFER_SIZE)) { if (munmap(release -> address, BUFFER_SIZE)) {
int errsv=errno; printf("OpenBLAS : munmap failed\n");
perror("OpenBLAS : munmap failed:");
printf("error code=%d,\trelease->address=%lx\n",errsv,release->address);
} }
} }
@ -2090,21 +1999,11 @@ static void *alloc_mmap(void *address){
} }
if (map_address != (void *)-1) { if (map_address != (void *)-1) {
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock); LOCK_COMMAND(&alloc_lock);
#endif
release_info[release_pos].address = map_address; release_info[release_pos].address = map_address;
release_info[release_pos].func = alloc_mmap_free; release_info[release_pos].func = alloc_mmap_free;
release_pos ++; release_pos ++;
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock); UNLOCK_COMMAND(&alloc_lock);
#endif
} else {
#ifdef DEBUG
int errsv=errno;
perror("OpenBLAS : mmap failed:");
printf("error code=%d,\tmap_address=%lx\n",errsv,map_address);
#endif
} }
#ifdef OS_LINUX #ifdef OS_LINUX
@ -2246,18 +2145,14 @@ static void *alloc_mmap(void *address){
#if defined(OS_LINUX) && !defined(NO_WARMUP) #if defined(OS_LINUX) && !defined(NO_WARMUP)
} }
#endif #endif
LOCK_COMMAND(&alloc_lock);
if (map_address != (void *)-1) { if (map_address != (void *)-1) {
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#endif
release_info[release_pos].address = map_address; release_info[release_pos].address = map_address;
release_info[release_pos].func = alloc_mmap_free; release_info[release_pos].func = alloc_mmap_free;
release_pos ++; release_pos ++;
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#endif
} }
UNLOCK_COMMAND(&alloc_lock);
return map_address; return map_address;
} }
@ -2332,7 +2227,7 @@ static void *alloc_qalloc(void *address){
static void alloc_windows_free(struct release_t *release){ static void alloc_windows_free(struct release_t *release){
VirtualFree(release -> address, 0, MEM_RELEASE); VirtualFree(release -> address, BUFFER_SIZE, MEM_DECOMMIT);
} }
@ -2454,7 +2349,7 @@ static void alloc_hugetlb_free(struct release_t *release){
#ifdef OS_WINDOWS #ifdef OS_WINDOWS
VirtualFree(release -> address, 0, MEM_LARGE_PAGES | MEM_RELEASE); VirtualFree(release -> address, BUFFER_SIZE, MEM_LARGE_PAGES | MEM_DECOMMIT);
#endif #endif
@ -2625,7 +2520,7 @@ void *blas_memory_alloc(int procpos){
int position; int position;
#if defined(WHEREAMI) && !defined(USE_OPENMP) #if defined(WHEREAMI) && !defined(USE_OPENMP)
int mypos = 0; int mypos;
#endif #endif
void *map_address; void *map_address;
@ -2656,11 +2551,6 @@ void *blas_memory_alloc(int procpos){
NULL, NULL,
}; };
void *(**func)(void *address); void *(**func)(void *address);
#if defined(USE_OPENMP)
if (!memory_initialized) {
#endif
LOCK_COMMAND(&alloc_lock); LOCK_COMMAND(&alloc_lock);
if (!memory_initialized) { if (!memory_initialized) {
@ -2696,9 +2586,6 @@ void *blas_memory_alloc(int procpos){
} }
UNLOCK_COMMAND(&alloc_lock); UNLOCK_COMMAND(&alloc_lock);
#if defined(USE_OPENMP)
}
#endif
#ifdef DEBUG #ifdef DEBUG
printf("Alloc Start ...\n"); printf("Alloc Start ...\n");
@ -2713,17 +2600,13 @@ void *blas_memory_alloc(int procpos){
do { do {
if (!memory[position].used && (memory[position].pos == mypos)) { if (!memory[position].used && (memory[position].pos == mypos)) {
#if defined(SMP) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock); LOCK_COMMAND(&alloc_lock);
#else // blas_lock(&memory[position].lock);
blas_lock(&memory[position].lock);
#endif
if (!memory[position].used) goto allocation; if (!memory[position].used) goto allocation;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock); UNLOCK_COMMAND(&alloc_lock);
#else // blas_unlock(&memory[position].lock);
blas_unlock(&memory[position].lock);
#endif
} }
position ++; position ++;
@ -2735,26 +2618,21 @@ void *blas_memory_alloc(int procpos){
position = 0; position = 0;
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock); LOCK_COMMAND(&alloc_lock);
#endif
do { do {
#if defined(USE_OPENMP) /* if (!memory[position].used) { */
if (!memory[position].used) { /* blas_lock(&memory[position].lock);*/
blas_lock(&memory[position].lock);
#endif
if (!memory[position].used) goto allocation; if (!memory[position].used) goto allocation;
#if defined(USE_OPENMP) /* blas_unlock(&memory[position].lock);*/
blas_unlock(&memory[position].lock); /* } */
}
#endif
position ++; position ++;
} while (position < NUM_BUFFERS); } while (position < NUM_BUFFERS);
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock); UNLOCK_COMMAND(&alloc_lock);
#endif
goto error; goto error;
allocation : allocation :
@ -2764,11 +2642,10 @@ void *blas_memory_alloc(int procpos){
#endif #endif
memory[position].used = 1; memory[position].used = 1;
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock); UNLOCK_COMMAND(&alloc_lock);
#else /* blas_unlock(&memory[position].lock);*/
blas_unlock(&memory[position].lock);
#endif
if (!memory[position].addr) { if (!memory[position].addr) {
do { do {
#ifdef DEBUG #ifdef DEBUG
@ -2785,7 +2662,7 @@ void *blas_memory_alloc(int procpos){
#ifdef ALLOC_DEVICEDRIVER #ifdef ALLOC_DEVICEDRIVER
if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) {
fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n"); fprintf(stderr, "OpenBLAS Warning ... Physically contigous allocation was failed.\n");
} }
#endif #endif
@ -2813,13 +2690,9 @@ void *blas_memory_alloc(int procpos){
} while ((BLASLONG)map_address == -1); } while ((BLASLONG)map_address == -1);
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock); LOCK_COMMAND(&alloc_lock);
#endif
memory[position].addr = map_address; memory[position].addr = map_address;
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock); UNLOCK_COMMAND(&alloc_lock);
#endif
#ifdef DEBUG #ifdef DEBUG
printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position); printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position);
@ -2873,9 +2746,8 @@ void blas_memory_free(void *free_area){
#endif #endif
position = 0; position = 0;
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock); LOCK_COMMAND(&alloc_lock);
#endif
while ((position < NUM_BUFFERS) && (memory[position].addr != free_area)) while ((position < NUM_BUFFERS) && (memory[position].addr != free_area))
position++; position++;
@ -2889,9 +2761,7 @@ void blas_memory_free(void *free_area){
WMB; WMB;
memory[position].used = 0; memory[position].used = 0;
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock); UNLOCK_COMMAND(&alloc_lock);
#endif
#ifdef DEBUG #ifdef DEBUG
printf("Unmap Succeeded.\n\n"); printf("Unmap Succeeded.\n\n");
@ -2906,9 +2776,8 @@ void blas_memory_free(void *free_area){
for (position = 0; position < NUM_BUFFERS; position++) for (position = 0; position < NUM_BUFFERS; position++)
printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used); printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used);
#endif #endif
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock); UNLOCK_COMMAND(&alloc_lock);
#endif
return; return;
} }
@ -2958,7 +2827,7 @@ void blas_shutdown(void){
#if defined(OS_LINUX) && !defined(NO_WARMUP) #if defined(OS_LINUX) && !defined(NO_WARMUP)
#if defined(SMP) || defined(USE_LOCKING) #ifdef SMP
#if defined(USE_PTHREAD_LOCK) #if defined(USE_PTHREAD_LOCK)
static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
#elif defined(USE_PTHREAD_SPINLOCK) #elif defined(USE_PTHREAD_SPINLOCK)
@ -2983,7 +2852,7 @@ static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
if (hot_alloc != 2) { if (hot_alloc != 2) {
#endif #endif
#if defined(SMP) || defined(USE_LOCKING) #ifdef SMP
LOCK_COMMAND(&init_lock); LOCK_COMMAND(&init_lock);
#endif #endif
@ -2993,7 +2862,7 @@ static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
size -= PAGESIZE; size -= PAGESIZE;
} }
#if defined(SMP) || defined(USE_LOCKING) #ifdef SMP
UNLOCK_COMMAND(&init_lock); UNLOCK_COMMAND(&init_lock);
#endif #endif
@ -3226,7 +3095,7 @@ void gotoblas_dummy_for_PGI(void) {
gotoblas_init(); gotoblas_init();
gotoblas_quit(); gotoblas_quit();
#if __PGIC__ < 19
#if 0 #if 0
asm ("\t.section\t.ctors,\"aw\",@progbits; .align 8; .quad gotoblas_init; .section .text"); asm ("\t.section\t.ctors,\"aw\",@progbits; .align 8; .quad gotoblas_init; .section .text");
asm ("\t.section\t.dtors,\"aw\",@progbits; .align 8; .quad gotoblas_quit; .section .text"); asm ("\t.section\t.dtors,\"aw\",@progbits; .align 8; .quad gotoblas_quit; .section .text");
@ -3234,7 +3103,6 @@ void gotoblas_dummy_for_PGI(void) {
asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text"); asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text");
asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text"); asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text");
#endif #endif
#endif
} }
#endif #endif