This commit is contained in:
oon3m0oo 2018-08-22 12:56:56 +00:00 committed by GitHub
commit 4d3f572f69
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 90 additions and 121 deletions

View File

@ -18,7 +18,7 @@ jobs:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32" - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
script: script:
- set -e - set -e
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE - make -s $COMMON_FLAGS $BTYPE
- make -C test $COMMON_FLAGS $BTYPE - make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE - make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE - make -C utest $COMMON_FLAGS $BTYPE
@ -69,7 +69,7 @@ jobs:
- gfortran-mingw-w64-x86-64 - gfortran-mingw-w64-x86-64
before_script: *common-before before_script: *common-before
script: script:
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE - make -s $COMMON_FLAGS $BTYPE
env: env:
- TARGET_BOX=WIN64 - TARGET_BOX=WIN64
- BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran" - BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
@ -94,7 +94,7 @@ jobs:
script: script:
- set -e - set -e
# XXX: Disable some warnings for now to avoid exceeding Travis limit for log size. # XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
- alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE - alpine make -s $COMMON_FLAGS $BTYPE
CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types" CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
- alpine make -C test $COMMON_FLAGS $BTYPE - alpine make -C test $COMMON_FLAGS $BTYPE
- alpine make -C ctest $COMMON_FLAGS $BTYPE - alpine make -C ctest $COMMON_FLAGS $BTYPE
@ -160,7 +160,7 @@ jobs:
- brew update - brew update
- brew install gcc # for gfortran - brew install gcc # for gfortran
script: script:
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE - make -s $COMMON_FLAGS $BTYPE
env: env:
- BTYPE="BINARY=64 INTERFACE64=1" - BTYPE="BINARY=64 INTERFACE64=1"

View File

@ -143,14 +143,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define FIXED_PAGESIZE 4096 #define FIXED_PAGESIZE 4096
#endif #endif
#ifndef BUFFERS_PER_THREAD
#ifdef USE_OPENMP_UNUSED
#define BUFFERS_PER_THREAD (MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER)
#else
#define BUFFERS_PER_THREAD NUM_BUFFERS
#endif
#endif
#define BITMASK(a, b, c) ((((a) >> (b)) & (c))) #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
#if defined(_MSC_VER) && !defined(__clang__) #if defined(_MSC_VER) && !defined(__clang__)
@ -338,7 +330,8 @@ int goto_get_num_procs (void) {
return blas_cpu_number; return blas_cpu_number;
} }
static void blas_memory_init(); static void blas_memory_init_from_fork();
static void blas_tls_init();
void openblas_fork_handler() void openblas_fork_handler()
{ {
@ -351,7 +344,7 @@ void openblas_fork_handler()
// implementation of OpenMP. // implementation of OpenMP.
#if !((defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)) || defined(OS_ANDROID)) && defined(SMP_SERVER) #if !((defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)) || defined(OS_ANDROID)) && defined(SMP_SERVER)
int err; int err;
err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, blas_memory_init); err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, blas_memory_init_from_fork);
if(err != 0) if(err != 0)
openblas_warning(0, "OpenBLAS Warning ... cannot install fork handler. You may meet hang after fork.\n"); openblas_warning(0, "OpenBLAS Warning ... cannot install fork handler. You may meet hang after fork.\n");
#endif #endif
@ -432,10 +425,8 @@ int openblas_get_num_threads(void) {
int hugetlb_allocated = 0; int hugetlb_allocated = 0;
#if defined(OS_WINDOWS) #if defined(OS_WINDOWS)
#define THREAD_LOCAL __declspec(thread)
#define LIKELY_ONE(x) (x) #define LIKELY_ONE(x) (x)
#else #else
#define THREAD_LOCAL __thread
#define LIKELY_ONE(x) (__builtin_expect(x, 1)) #define LIKELY_ONE(x) (__builtin_expect(x, 1))
#endif #endif
@ -471,105 +462,65 @@ struct alloc_t {
for an auxiliary tracking structure. */ for an auxiliary tracking structure. */
static const int allocation_block_size = BUFFER_SIZE + sizeof(struct alloc_t); static const int allocation_block_size = BUFFER_SIZE + sizeof(struct alloc_t);
/* Clang supports TLS from version 2.8 */ #if defined(SMP)
#if defined(__clang__) && __clang_major__ > 2 || \ # if defined(OS_WINDOWS)
(__clang_minor__ == 2 || __clang_minor__ == 8) static DWORD local_storage_key = 0;
#define HAS_COMPILER_TLS
#endif
/* GCC supports TLS from version 4.1 */
#if !defined(__clang__) && defined(__GNUC__) && \
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
#define HAS_COMPILER_TLS
#endif
/* MSVC supports TLS from version 2005 */
#if defined(_MSC_VER) && _MSC_VER >= 1400
#define HAS_COMPILER_TLS
#endif
/* Versions of XCode before 8 did not properly support TLS */
#if defined(__apple_build_version__) && __apple_build_version__ < 8000042
#undef HAS_COMPILER_TLS
#endif
/* Android NDK's before version 12b did not support TLS */
#if defined(__ANDROID__) && defined(__clang__)
#if __has_include(<android/ndk-version.h>)
#include <android/ndk-version.h>
#endif
#if defined(__ANDROID__) && defined(__clang__) && defined(__NDK_MAJOR__) && \
defined(__NDK_MINOR__) && \
((__NDK_MAJOR__ < 12) || ((__NDK_MAJOR__ == 12) && (__NDK_MINOR__ < 1)))
#undef HAS_COMPILER_TLS
#endif
#endif
/* Holds pointers to allocated memory */
#if defined(SMP) && !defined(USE_OPENMP_UNUSED)
/* This is the number of threads than can be spawned by the server, which is the
server plus the number of threads in the thread pool */
# define MAX_ALLOCATING_THREADS MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER * 2
static int next_memory_table_pos = 0;
# if defined(HAS_COMPILER_TLS)
/* Use compiler generated thread-local-storage */
static int THREAD_LOCAL local_memory_table_pos = 0;
# else # else
/* Use system-dependent thread-local-storage */ static pthread_key_t local_storage_key = 0;
# if defined(OS_WINDOWS) # endif /* defined(OS_WINDOWS) */
static DWORD local_storage_key; #endif /* defined(SMP) */
# else
static pthread_key_t local_storage_key;
# endif /* defined(OS_WINDOWS) */
# endif /* defined(HAS_COMPILER_TLS) */
#else
/* There is only one allocating thread when in single-threaded mode and when using OpenMP */
# define MAX_ALLOCATING_THREADS 1
#endif /* defined(SMP) && !defined(USE_OPENMP) */
static struct alloc_t * local_memory_table[MAX_ALLOCATING_THREADS][BUFFERS_PER_THREAD];
#if defined(OS_LINUX) && !defined(NO_WARMUP) #if defined(OS_LINUX) && !defined(NO_WARMUP)
static int hot_alloc = 0; static int hot_alloc = 0;
#endif #endif
/* Global lock for memory allocation */ /* Global locks for memory allocation */
#if defined(USE_PTHREAD_LOCK) #if defined(USE_PTHREAD_LOCK)
static pthread_mutex_t alloc_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t alloc_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t tls_lock = PTHREAD_MUTEX_INITIALIZER;
#elif defined(USE_PTHREAD_SPINLOCK) #elif defined(USE_PTHREAD_SPINLOCK)
static pthread_spinlock_t alloc_lock = 0; static pthread_spinlock_t alloc_lock = 0;
static pthread_spinlock_t tls_lock = 0;
#else #else
static BLASULONG alloc_lock = 0UL; static BLASULONG alloc_lock = 0UL;
static BLASULONG tls_lock = 0UL;
#endif #endif
/* Returns a pointer to the start of the per-thread memory allocation data */ /* Returns a pointer to the start of the per-thread memory allocation data */
static __inline struct alloc_t ** get_memory_table() { static __inline struct alloc_t ** get_memory_table() {
#if defined(SMP) && !defined(USE_OPENMP_UNUSED) #if defined(SMP)
# if !defined(HAS_COMPILER_TLS) static int tls_initialized = 0;
# if defined(OS_WINDOWS) if (!LIKELY_ONE(tls_initialized)) {
int local_memory_table_pos = (int)::TlsGetValue(local_storage_key); LOCK_COMMAND(&tls_lock);
# else /* Only one thread can get here at a time, so we are guaranteed to only do this initialization once */
int local_memory_table_pos = (int)pthread_getspecific(local_storage_key); if (!tls_initialized) {
# endif /* defined(OS_WINDOWS) */ blas_tls_init();
# endif /* !defined(HAS_COMPILER_TLS) */ /* Now any new thread entering the outer block will either do the TLS init, or nothing */
if (!local_memory_table_pos) { tls_initialized = 1;
LOCK_COMMAND(&alloc_lock); }
local_memory_table_pos = next_memory_table_pos++; UNLOCK_COMMAND(&tls_lock);
if (next_memory_table_pos > MAX_ALLOCATING_THREADS)
printf("OpenBLAS : Program will terminate because you tried to start too many threads.\n");
UNLOCK_COMMAND(&alloc_lock);
# if !defined(HAS_COMPILER_TLS)
# if defined(OS_WINDOWS)
::TlsSetValue(local_storage_key, (void*)local_memory_table_pos);
# else
pthread_setspecific(local_storage_key, (void*)local_memory_table_pos);
# endif /* defined(OS_WINDOWS) */
# endif /* !defined(HAS_COMPILER_TLS) */
} }
return local_memory_table[local_memory_table_pos]; # if defined(OS_WINDOWS)
struct alloc_t ** local_memory_table = (struct alloc_t **)TlsGetValue(local_storage_key);
# else
struct alloc_t ** local_memory_table = (struct alloc_t **)pthread_getspecific(local_storage_key);
# endif /* defined(OS_WINDOWS) */
#else #else
return local_memory_table[0]; static struct alloc_t ** local_memory_table = NULL;
#endif /* defined(SMP) && !defined(USE_OPENMP) */ #endif /* defined(SMP) */
if (local_storage_key && !local_memory_table) {
local_memory_table = (struct alloc_t **)malloc(sizeof(struct alloc_t *) * NUM_BUFFERS);
memset(local_memory_table, 0, sizeof(struct alloc_t *) * NUM_BUFFERS);
#if defined(SMP)
# if defined(OS_WINDOWS)
TlsSetValue(local_storage_key, (void*)local_memory_table);
# else
pthread_setspecific(local_storage_key, (void*)local_memory_table);
# endif /* defined(OS_WINDOWS) */
#endif /* defined(SMP) */
}
return local_memory_table;
} }
#ifdef ALLOC_MMAP #ifdef ALLOC_MMAP
@ -1069,18 +1020,40 @@ static volatile int memory_initialized = 0;
/* 1 : Level 2 functions */ /* 1 : Level 2 functions */
/* 2 : Thread */ /* 2 : Thread */
static void blas_memory_cleanup(void* ptr){
if (ptr) {
struct alloc_t ** table = (struct alloc_t **)ptr;
int pos;
for (pos = 0; pos < NUM_BUFFERS; pos ++){
struct alloc_t *alloc_info = table[pos];
if (alloc_info) {
alloc_info->release_func(alloc_info);
table[pos] = (void *)0;
}
}
free(table);
}
}
static void blas_tls_init(){
#if defined(SMP)
# if defined(OS_WINDOWS)
local_storage_key = TlsAlloc();
TlsSetValue(local_storage_key, (void*)0);
# else
pthread_key_create(&local_storage_key, blas_memory_cleanup);
pthread_setspecific(local_storage_key, (void*)0);
# endif /* defined(OS_WINDOWS) */
#endif /* defined(SMP) */
}
static void blas_memory_init(){ static void blas_memory_init(){
#if defined(SMP) && !defined(USE_OPENMP_UNUSED) memset(get_memory_table(), 0, sizeof(struct alloc_t *) * NUM_BUFFERS);
next_memory_table_pos = 0; }
# if !defined(HAS_COMPILER_TLS)
# if defined(OS_WINDOWS) static void blas_memory_init_from_fork(){
local_storage_key = ::TlsAlloc(); blas_tls_init();
# else blas_memory_init();
pthread_key_create(&local_storage_key, NULL);
# endif /* defined(OS_WINDOWS) */
# endif /* defined(HAS_COMPILER_TLS) */
#endif /* defined(SMP) && !defined(USE_OPENMP) */
memset(local_memory_table, 0, sizeof(local_memory_table));
} }
void *blas_memory_alloc(int procpos){ void *blas_memory_alloc(int procpos){
@ -1162,7 +1135,7 @@ void *blas_memory_alloc(int procpos){
if (!alloc_table[position] || !alloc_table[position]->used) goto allocation; if (!alloc_table[position] || !alloc_table[position]->used) goto allocation;
position ++; position ++;
} while (position < BUFFERS_PER_THREAD); } while (position < NUM_BUFFERS);
goto error; goto error;
@ -1260,7 +1233,7 @@ void blas_memory_free(void *buffer){
#ifdef DEBUG #ifdef DEBUG
alloc_table = get_memory_table(); alloc_table = get_memory_table();
for (position = 0; position < BUFFERS_PER_THREAD; position++){ for (position = 0; position < NUM_BUFFERS; position++){
if (alloc_table[position]) { if (alloc_table[position]) {
printf("%4ld %p : %d\n", position, alloc_table[position], alloc_table[position]->used); printf("%4ld %p : %d\n", position, alloc_table[position], alloc_table[position]->used);
} }
@ -1280,22 +1253,15 @@ void blas_memory_free_nolock(void * map_address) {
} }
void blas_shutdown(void){ void blas_shutdown(void){
int pos, thread;
#ifdef SMP #ifdef SMP
BLASFUNC(blas_thread_shutdown)(); BLASFUNC(blas_thread_shutdown)();
#endif #endif
for (thread = 0; thread < MAX_ALLOCATING_THREADS; thread ++){ #ifdef SMP
for (pos = 0; pos < BUFFERS_PER_THREAD; pos ++){ /* Only cleanupIf we were built for threading and TLS was initialized */
struct alloc_t *alloc_info = local_memory_table[thread][pos]; if (local_storage_key)
if (alloc_info) { #endif
alloc_info->release_func(alloc_info); blas_memory_cleanup((void*)get_memory_table());
local_memory_table[thread][pos] = (void *)0;
}
}
}
#ifdef SEEK_ADDRESS #ifdef SEEK_ADDRESS
base_address = 0UL; base_address = 0UL;
@ -1516,6 +1482,9 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReser
case DLL_THREAD_ATTACH: case DLL_THREAD_ATTACH:
break; break;
case DLL_THREAD_DETACH: case DLL_THREAD_DETACH:
#if defined(SMP)
blas_memory_cleanup((void*)get_memory_table());
#endif
break; break;
case DLL_PROCESS_DETACH: case DLL_PROCESS_DETACH:
gotoblas_quit(); gotoblas_quit();