Merge 2c47e6d152
into 52d3f7af50
This commit is contained in:
commit
4d3f572f69
|
@ -18,7 +18,7 @@ jobs:
|
|||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
|
||||
script:
|
||||
- set -e
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- make -s $COMMON_FLAGS $BTYPE
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
|
@ -69,7 +69,7 @@ jobs:
|
|||
- gfortran-mingw-w64-x86-64
|
||||
before_script: *common-before
|
||||
script:
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- make -s $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- TARGET_BOX=WIN64
|
||||
- BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
|
||||
|
@ -94,7 +94,7 @@ jobs:
|
|||
script:
|
||||
- set -e
|
||||
# XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
|
||||
- alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- alpine make -s $COMMON_FLAGS $BTYPE
|
||||
CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
|
||||
- alpine make -C test $COMMON_FLAGS $BTYPE
|
||||
- alpine make -C ctest $COMMON_FLAGS $BTYPE
|
||||
|
@ -160,7 +160,7 @@ jobs:
|
|||
- brew update
|
||||
- brew install gcc # for gfortran
|
||||
script:
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- make -s $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- BTYPE="BINARY=64 INTERFACE64=1"
|
||||
|
||||
|
|
|
@ -143,14 +143,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define FIXED_PAGESIZE 4096
|
||||
#endif
|
||||
|
||||
#ifndef BUFFERS_PER_THREAD
|
||||
#ifdef USE_OPENMP_UNUSED
|
||||
#define BUFFERS_PER_THREAD (MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER)
|
||||
#else
|
||||
#define BUFFERS_PER_THREAD NUM_BUFFERS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
|
@ -338,7 +330,8 @@ int goto_get_num_procs (void) {
|
|||
return blas_cpu_number;
|
||||
}
|
||||
|
||||
static void blas_memory_init();
|
||||
static void blas_memory_init_from_fork();
|
||||
static void blas_tls_init();
|
||||
|
||||
void openblas_fork_handler()
|
||||
{
|
||||
|
@ -351,7 +344,7 @@ void openblas_fork_handler()
|
|||
// implementation of OpenMP.
|
||||
#if !((defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)) || defined(OS_ANDROID)) && defined(SMP_SERVER)
|
||||
int err;
|
||||
err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, blas_memory_init);
|
||||
err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, blas_memory_init_from_fork);
|
||||
if(err != 0)
|
||||
openblas_warning(0, "OpenBLAS Warning ... cannot install fork handler. You may meet hang after fork.\n");
|
||||
#endif
|
||||
|
@ -432,10 +425,8 @@ int openblas_get_num_threads(void) {
|
|||
int hugetlb_allocated = 0;
|
||||
|
||||
#if defined(OS_WINDOWS)
|
||||
#define THREAD_LOCAL __declspec(thread)
|
||||
#define LIKELY_ONE(x) (x)
|
||||
#else
|
||||
#define THREAD_LOCAL __thread
|
||||
#define LIKELY_ONE(x) (__builtin_expect(x, 1))
|
||||
#endif
|
||||
|
||||
|
@ -471,105 +462,65 @@ struct alloc_t {
|
|||
for an auxiliary tracking structure. */
|
||||
static const int allocation_block_size = BUFFER_SIZE + sizeof(struct alloc_t);
|
||||
|
||||
/* Clang supports TLS from version 2.8 */
|
||||
#if defined(__clang__) && __clang_major__ > 2 || \
|
||||
(__clang_minor__ == 2 || __clang_minor__ == 8)
|
||||
#define HAS_COMPILER_TLS
|
||||
#endif
|
||||
|
||||
/* GCC supports TLS from version 4.1 */
|
||||
#if !defined(__clang__) && defined(__GNUC__) && \
|
||||
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
|
||||
#define HAS_COMPILER_TLS
|
||||
#endif
|
||||
|
||||
/* MSVC supports TLS from version 2005 */
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1400
|
||||
#define HAS_COMPILER_TLS
|
||||
#endif
|
||||
|
||||
/* Versions of XCode before 8 did not properly support TLS */
|
||||
#if defined(__apple_build_version__) && __apple_build_version__ < 8000042
|
||||
#undef HAS_COMPILER_TLS
|
||||
#endif
|
||||
|
||||
/* Android NDK's before version 12b did not support TLS */
|
||||
#if defined(__ANDROID__) && defined(__clang__)
|
||||
#if __has_include(<android/ndk-version.h>)
|
||||
#include <android/ndk-version.h>
|
||||
#endif
|
||||
#if defined(__ANDROID__) && defined(__clang__) && defined(__NDK_MAJOR__) && \
|
||||
defined(__NDK_MINOR__) && \
|
||||
((__NDK_MAJOR__ < 12) || ((__NDK_MAJOR__ == 12) && (__NDK_MINOR__ < 1)))
|
||||
#undef HAS_COMPILER_TLS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Holds pointers to allocated memory */
|
||||
#if defined(SMP) && !defined(USE_OPENMP_UNUSED)
|
||||
/* This is the number of threads than can be spawned by the server, which is the
|
||||
server plus the number of threads in the thread pool */
|
||||
# define MAX_ALLOCATING_THREADS MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER * 2
|
||||
static int next_memory_table_pos = 0;
|
||||
# if defined(HAS_COMPILER_TLS)
|
||||
/* Use compiler generated thread-local-storage */
|
||||
static int THREAD_LOCAL local_memory_table_pos = 0;
|
||||
#if defined(SMP)
|
||||
# if defined(OS_WINDOWS)
|
||||
static DWORD local_storage_key = 0;
|
||||
# else
|
||||
/* Use system-dependent thread-local-storage */
|
||||
# if defined(OS_WINDOWS)
|
||||
static DWORD local_storage_key;
|
||||
# else
|
||||
static pthread_key_t local_storage_key;
|
||||
# endif /* defined(OS_WINDOWS) */
|
||||
# endif /* defined(HAS_COMPILER_TLS) */
|
||||
#else
|
||||
/* There is only one allocating thread when in single-threaded mode and when using OpenMP */
|
||||
# define MAX_ALLOCATING_THREADS 1
|
||||
#endif /* defined(SMP) && !defined(USE_OPENMP) */
|
||||
static struct alloc_t * local_memory_table[MAX_ALLOCATING_THREADS][BUFFERS_PER_THREAD];
|
||||
static pthread_key_t local_storage_key = 0;
|
||||
# endif /* defined(OS_WINDOWS) */
|
||||
#endif /* defined(SMP) */
|
||||
|
||||
#if defined(OS_LINUX) && !defined(NO_WARMUP)
|
||||
static int hot_alloc = 0;
|
||||
#endif
|
||||
|
||||
/* Global lock for memory allocation */
|
||||
/* Global locks for memory allocation */
|
||||
|
||||
#if defined(USE_PTHREAD_LOCK)
|
||||
static pthread_mutex_t alloc_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_mutex_t tls_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
#elif defined(USE_PTHREAD_SPINLOCK)
|
||||
static pthread_spinlock_t alloc_lock = 0;
|
||||
static pthread_spinlock_t tls_lock = 0;
|
||||
#else
|
||||
static BLASULONG alloc_lock = 0UL;
|
||||
static BLASULONG tls_lock = 0UL;
|
||||
#endif
|
||||
|
||||
/* Returns a pointer to the start of the per-thread memory allocation data */
|
||||
static __inline struct alloc_t ** get_memory_table() {
|
||||
#if defined(SMP) && !defined(USE_OPENMP_UNUSED)
|
||||
# if !defined(HAS_COMPILER_TLS)
|
||||
# if defined(OS_WINDOWS)
|
||||
int local_memory_table_pos = (int)::TlsGetValue(local_storage_key);
|
||||
# else
|
||||
int local_memory_table_pos = (int)pthread_getspecific(local_storage_key);
|
||||
# endif /* defined(OS_WINDOWS) */
|
||||
# endif /* !defined(HAS_COMPILER_TLS) */
|
||||
if (!local_memory_table_pos) {
|
||||
LOCK_COMMAND(&alloc_lock);
|
||||
local_memory_table_pos = next_memory_table_pos++;
|
||||
if (next_memory_table_pos > MAX_ALLOCATING_THREADS)
|
||||
printf("OpenBLAS : Program will terminate because you tried to start too many threads.\n");
|
||||
UNLOCK_COMMAND(&alloc_lock);
|
||||
# if !defined(HAS_COMPILER_TLS)
|
||||
# if defined(OS_WINDOWS)
|
||||
::TlsSetValue(local_storage_key, (void*)local_memory_table_pos);
|
||||
# else
|
||||
pthread_setspecific(local_storage_key, (void*)local_memory_table_pos);
|
||||
# endif /* defined(OS_WINDOWS) */
|
||||
# endif /* !defined(HAS_COMPILER_TLS) */
|
||||
#if defined(SMP)
|
||||
static int tls_initialized = 0;
|
||||
if (!LIKELY_ONE(tls_initialized)) {
|
||||
LOCK_COMMAND(&tls_lock);
|
||||
/* Only one thread can get here at a time, so we are guaranteed to only do this initialization once */
|
||||
if (!tls_initialized) {
|
||||
blas_tls_init();
|
||||
/* Now any new thread entering the outer block will either do the TLS init, or nothing */
|
||||
tls_initialized = 1;
|
||||
}
|
||||
UNLOCK_COMMAND(&tls_lock);
|
||||
}
|
||||
return local_memory_table[local_memory_table_pos];
|
||||
# if defined(OS_WINDOWS)
|
||||
struct alloc_t ** local_memory_table = (struct alloc_t **)TlsGetValue(local_storage_key);
|
||||
# else
|
||||
struct alloc_t ** local_memory_table = (struct alloc_t **)pthread_getspecific(local_storage_key);
|
||||
# endif /* defined(OS_WINDOWS) */
|
||||
#else
|
||||
return local_memory_table[0];
|
||||
#endif /* defined(SMP) && !defined(USE_OPENMP) */
|
||||
static struct alloc_t ** local_memory_table = NULL;
|
||||
#endif /* defined(SMP) */
|
||||
if (local_storage_key && !local_memory_table) {
|
||||
local_memory_table = (struct alloc_t **)malloc(sizeof(struct alloc_t *) * NUM_BUFFERS);
|
||||
memset(local_memory_table, 0, sizeof(struct alloc_t *) * NUM_BUFFERS);
|
||||
#if defined(SMP)
|
||||
# if defined(OS_WINDOWS)
|
||||
TlsSetValue(local_storage_key, (void*)local_memory_table);
|
||||
# else
|
||||
pthread_setspecific(local_storage_key, (void*)local_memory_table);
|
||||
# endif /* defined(OS_WINDOWS) */
|
||||
#endif /* defined(SMP) */
|
||||
}
|
||||
return local_memory_table;
|
||||
}
|
||||
|
||||
#ifdef ALLOC_MMAP
|
||||
|
@ -1069,18 +1020,40 @@ static volatile int memory_initialized = 0;
|
|||
/* 1 : Level 2 functions */
|
||||
/* 2 : Thread */
|
||||
|
||||
static void blas_memory_cleanup(void* ptr){
|
||||
if (ptr) {
|
||||
struct alloc_t ** table = (struct alloc_t **)ptr;
|
||||
int pos;
|
||||
for (pos = 0; pos < NUM_BUFFERS; pos ++){
|
||||
struct alloc_t *alloc_info = table[pos];
|
||||
if (alloc_info) {
|
||||
alloc_info->release_func(alloc_info);
|
||||
table[pos] = (void *)0;
|
||||
}
|
||||
}
|
||||
free(table);
|
||||
}
|
||||
}
|
||||
|
||||
static void blas_tls_init(){
|
||||
#if defined(SMP)
|
||||
# if defined(OS_WINDOWS)
|
||||
local_storage_key = TlsAlloc();
|
||||
TlsSetValue(local_storage_key, (void*)0);
|
||||
# else
|
||||
pthread_key_create(&local_storage_key, blas_memory_cleanup);
|
||||
pthread_setspecific(local_storage_key, (void*)0);
|
||||
# endif /* defined(OS_WINDOWS) */
|
||||
#endif /* defined(SMP) */
|
||||
}
|
||||
|
||||
static void blas_memory_init(){
|
||||
#if defined(SMP) && !defined(USE_OPENMP_UNUSED)
|
||||
next_memory_table_pos = 0;
|
||||
# if !defined(HAS_COMPILER_TLS)
|
||||
# if defined(OS_WINDOWS)
|
||||
local_storage_key = ::TlsAlloc();
|
||||
# else
|
||||
pthread_key_create(&local_storage_key, NULL);
|
||||
# endif /* defined(OS_WINDOWS) */
|
||||
# endif /* defined(HAS_COMPILER_TLS) */
|
||||
#endif /* defined(SMP) && !defined(USE_OPENMP) */
|
||||
memset(local_memory_table, 0, sizeof(local_memory_table));
|
||||
memset(get_memory_table(), 0, sizeof(struct alloc_t *) * NUM_BUFFERS);
|
||||
}
|
||||
|
||||
static void blas_memory_init_from_fork(){
|
||||
blas_tls_init();
|
||||
blas_memory_init();
|
||||
}
|
||||
|
||||
void *blas_memory_alloc(int procpos){
|
||||
|
@ -1162,7 +1135,7 @@ void *blas_memory_alloc(int procpos){
|
|||
if (!alloc_table[position] || !alloc_table[position]->used) goto allocation;
|
||||
position ++;
|
||||
|
||||
} while (position < BUFFERS_PER_THREAD);
|
||||
} while (position < NUM_BUFFERS);
|
||||
|
||||
goto error;
|
||||
|
||||
|
@ -1260,7 +1233,7 @@ void blas_memory_free(void *buffer){
|
|||
|
||||
#ifdef DEBUG
|
||||
alloc_table = get_memory_table();
|
||||
for (position = 0; position < BUFFERS_PER_THREAD; position++){
|
||||
for (position = 0; position < NUM_BUFFERS; position++){
|
||||
if (alloc_table[position]) {
|
||||
printf("%4ld %p : %d\n", position, alloc_table[position], alloc_table[position]->used);
|
||||
}
|
||||
|
@ -1280,22 +1253,15 @@ void blas_memory_free_nolock(void * map_address) {
|
|||
}
|
||||
|
||||
void blas_shutdown(void){
|
||||
|
||||
int pos, thread;
|
||||
|
||||
#ifdef SMP
|
||||
BLASFUNC(blas_thread_shutdown)();
|
||||
#endif
|
||||
|
||||
for (thread = 0; thread < MAX_ALLOCATING_THREADS; thread ++){
|
||||
for (pos = 0; pos < BUFFERS_PER_THREAD; pos ++){
|
||||
struct alloc_t *alloc_info = local_memory_table[thread][pos];
|
||||
if (alloc_info) {
|
||||
alloc_info->release_func(alloc_info);
|
||||
local_memory_table[thread][pos] = (void *)0;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef SMP
|
||||
/* Only cleanupIf we were built for threading and TLS was initialized */
|
||||
if (local_storage_key)
|
||||
#endif
|
||||
blas_memory_cleanup((void*)get_memory_table());
|
||||
|
||||
#ifdef SEEK_ADDRESS
|
||||
base_address = 0UL;
|
||||
|
@ -1516,6 +1482,9 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReser
|
|||
case DLL_THREAD_ATTACH:
|
||||
break;
|
||||
case DLL_THREAD_DETACH:
|
||||
#if defined(SMP)
|
||||
blas_memory_cleanup((void*)get_memory_table());
|
||||
#endif
|
||||
break;
|
||||
case DLL_PROCESS_DETACH:
|
||||
gotoblas_quit();
|
||||
|
|
Loading…
Reference in New Issue