Merge branch 'develop' of https://github.com/TimothyGu/OpenBLAS into TimothyGu-develop
Conflicts: driver/others/memory.c
This commit is contained in:
@@ -13,19 +13,19 @@ met:
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
@@ -136,8 +136,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
|
||||
|
||||
#define CONSTRUCTOR __attribute__ ((constructor))
|
||||
#define DESTRUCTOR __attribute__ ((destructor))
|
||||
#define CONSTRUCTOR __attribute__ ((constructor))
|
||||
#define DESTRUCTOR __attribute__ ((destructor))
|
||||
|
||||
#ifdef DYNAMIC_ARCH
|
||||
gotoblas_t *gotoblas = NULL;
|
||||
@@ -171,32 +171,32 @@ int get_num_procs(void) {
|
||||
#ifdef OS_WINDOWS
|
||||
|
||||
int get_num_procs(void) {
|
||||
|
||||
|
||||
static int nums = 0;
|
||||
|
||||
if (nums == 0) {
|
||||
|
||||
SYSTEM_INFO sysinfo;
|
||||
|
||||
|
||||
GetSystemInfo(&sysinfo);
|
||||
|
||||
nums = sysinfo.dwNumberOfProcessors;
|
||||
}
|
||||
|
||||
|
||||
return nums;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(OS_FREEBSD)
|
||||
#if defined(OS_FREEBSD)
|
||||
|
||||
int get_num_procs(void) {
|
||||
|
||||
|
||||
static int nums = 0;
|
||||
|
||||
int m[2];
|
||||
size_t len;
|
||||
|
||||
|
||||
if (nums == 0) {
|
||||
m[0] = CTL_HW;
|
||||
m[1] = HW_NCPU;
|
||||
@@ -232,7 +232,7 @@ void set_stack_limit(int limitMB){
|
||||
rl.rlim_cur=StackSize;
|
||||
result=setrlimit(RLIMIT_STACK, &rl);
|
||||
if(result !=0){
|
||||
fprintf(stderr, "OpenBLAS: set stack limit error =%d\n", result);
|
||||
fprintf(stderr, "OpenBLAS: set stack limit error =%d\n", result);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -241,12 +241,12 @@ void set_stack_limit(int limitMB){
|
||||
#endif
|
||||
|
||||
/*
|
||||
OpenBLAS uses the numbers of CPU cores in multithreading.
|
||||
OpenBLAS uses the numbers of CPU cores in multithreading.
|
||||
It can be set by openblas_set_num_threads(int num_threads);
|
||||
*/
|
||||
int blas_cpu_number = 0;
|
||||
/*
|
||||
The numbers of threads in the thread pool.
|
||||
The numbers of threads in the thread pool.
|
||||
This value is equal or large than blas_cpu_number. This means some threads are sleep.
|
||||
*/
|
||||
int blas_num_threads = 0;
|
||||
@@ -295,7 +295,7 @@ int blas_get_cpu_number(void){
|
||||
if (readenv(p,"GOTO_NUM_THREADS")) blas_goto_num = atoi(p);
|
||||
if (blas_goto_num < 0) blas_goto_num = 0;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
blas_omp_num = 0;
|
||||
@@ -315,8 +315,8 @@ int blas_get_cpu_number(void){
|
||||
#ifdef DEBUG
|
||||
printf( "Adjusted number of threads : %3d\n", blas_num_threads);
|
||||
#endif
|
||||
|
||||
blas_cpu_number = blas_num_threads;
|
||||
|
||||
blas_cpu_number = blas_num_threads;
|
||||
|
||||
return blas_num_threads;
|
||||
}
|
||||
@@ -352,12 +352,12 @@ static void *alloc_mmap(void *address){
|
||||
void *map_address;
|
||||
|
||||
if (address){
|
||||
map_address = mmap(address,
|
||||
BUFFER_SIZE,
|
||||
map_address = mmap(address,
|
||||
BUFFER_SIZE,
|
||||
MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0);
|
||||
} else {
|
||||
map_address = mmap(address,
|
||||
BUFFER_SIZE,
|
||||
map_address = mmap(address,
|
||||
BUFFER_SIZE,
|
||||
MMAP_ACCESS, MMAP_POLICY, -1, 0);
|
||||
}
|
||||
|
||||
@@ -384,7 +384,7 @@ static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) {
|
||||
BLASULONG original, *p;
|
||||
BLASULONG start, stop, min;
|
||||
int iter, i, count;
|
||||
|
||||
|
||||
min = (BLASULONG)-1;
|
||||
|
||||
original = *(BLASULONG *)(address + size - PAGESIZE);
|
||||
@@ -394,20 +394,20 @@ static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) {
|
||||
for (iter = 0; iter < BENCH_ITERATION; iter ++ ) {
|
||||
|
||||
p = (BLASULONG *)address;
|
||||
|
||||
|
||||
count = size / PAGESIZE;
|
||||
|
||||
|
||||
start = rpcc();
|
||||
|
||||
|
||||
for (i = 0; i < count; i ++) {
|
||||
p = (BLASULONG *)(*p);
|
||||
}
|
||||
|
||||
|
||||
stop = rpcc();
|
||||
|
||||
|
||||
if (min > stop - start) min = stop - start;
|
||||
}
|
||||
|
||||
|
||||
*(BLASULONG *)(address + size - PAGESIZE + 0) = original;
|
||||
*(BLASULONG *)(address + size - PAGESIZE + 8) = (BLASULONG)p;
|
||||
|
||||
@@ -439,11 +439,11 @@ static void *alloc_mmap(void *address){
|
||||
} else {
|
||||
#endif
|
||||
|
||||
map_address = mmap(NULL, BUFFER_SIZE * SCALING,
|
||||
map_address = mmap(NULL, BUFFER_SIZE * SCALING,
|
||||
MMAP_ACCESS, MMAP_POLICY, -1, 0);
|
||||
|
||||
|
||||
if (map_address != (void *)-1) {
|
||||
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#ifdef DEBUG
|
||||
int ret=0;
|
||||
@@ -459,45 +459,45 @@ static void *alloc_mmap(void *address){
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
allocsize = DGEMM_P * DGEMM_Q * sizeof(double);
|
||||
|
||||
|
||||
start = (BLASULONG)map_address;
|
||||
current = (SCALING - 1) * BUFFER_SIZE;
|
||||
|
||||
|
||||
while(current > 0) {
|
||||
*(BLASLONG *)start = (BLASLONG)start + PAGESIZE;
|
||||
start += PAGESIZE;
|
||||
current -= PAGESIZE;
|
||||
}
|
||||
|
||||
|
||||
*(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address;
|
||||
|
||||
|
||||
start = (BLASULONG)map_address;
|
||||
|
||||
|
||||
best = (BLASULONG)-1;
|
||||
best_address = map_address;
|
||||
|
||||
|
||||
while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * BUFFER_SIZE)) {
|
||||
|
||||
|
||||
current = run_bench(start, allocsize);
|
||||
|
||||
|
||||
if (best > current) {
|
||||
best = current;
|
||||
best_address = (void *)start;
|
||||
}
|
||||
|
||||
|
||||
start += PAGESIZE;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
if ((BLASULONG)best_address > (BLASULONG)map_address)
|
||||
munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address);
|
||||
|
||||
|
||||
munmap((void *)((BLASULONG)best_address + BUFFER_SIZE), (SCALING - 1) * BUFFER_SIZE + (BLASULONG)map_address - (BLASULONG)best_address);
|
||||
|
||||
|
||||
map_address = best_address;
|
||||
|
||||
|
||||
#if defined(OS_LINUX) && !defined(NO_WARMUP)
|
||||
hot_alloc = 2;
|
||||
#endif
|
||||
@@ -629,7 +629,7 @@ static void alloc_devicedirver_free(struct release_t *release){
|
||||
}
|
||||
|
||||
static void *alloc_devicedirver(void *address){
|
||||
|
||||
|
||||
int fd;
|
||||
void *map_address;
|
||||
|
||||
@@ -643,7 +643,7 @@ static void *alloc_devicedirver(void *address){
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_FILE | MAP_SHARED,
|
||||
fd, 0);
|
||||
|
||||
|
||||
if (map_address != (void *)-1) {
|
||||
release_info[release_pos].address = map_address;
|
||||
release_info[release_pos].attr = fd;
|
||||
@@ -668,9 +668,9 @@ static void alloc_shm_free(struct release_t *release){
|
||||
static void *alloc_shm(void *address){
|
||||
void *map_address;
|
||||
int shmid;
|
||||
|
||||
|
||||
shmid = shmget(IPC_PRIVATE, BUFFER_SIZE,IPC_CREAT | 0600);
|
||||
|
||||
|
||||
map_address = (void *)shmat(shmid, address, 0);
|
||||
|
||||
if (map_address != (void *)-1){
|
||||
@@ -722,7 +722,7 @@ static void *alloc_hugetlb(void *address){
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_AIX)
|
||||
int shmid;
|
||||
|
||||
|
||||
shmid = shmget(IPC_PRIVATE, BUFFER_SIZE,
|
||||
#ifdef OS_LINUX
|
||||
SHM_HUGETLB |
|
||||
@@ -731,10 +731,10 @@ static void *alloc_hugetlb(void *address){
|
||||
SHM_LGPAGE | SHM_PIN |
|
||||
#endif
|
||||
IPC_CREAT | SHM_R | SHM_W);
|
||||
|
||||
|
||||
if (shmid != -1) {
|
||||
map_address = (void *)shmat(shmid, address, SHM_RND);
|
||||
|
||||
|
||||
#ifdef OS_LINUX
|
||||
my_mbind(map_address, BUFFER_SIZE, MPOL_PREFERRED, NULL, 0, 0);
|
||||
#endif
|
||||
@@ -747,7 +747,7 @@ static void *alloc_hugetlb(void *address){
|
||||
|
||||
#ifdef __sun__
|
||||
struct memcntl_mha mha;
|
||||
|
||||
|
||||
mha.mha_cmd = MHA_MAPSIZE_BSSBRK;
|
||||
mha.mha_flags = 0;
|
||||
mha.mha_pagesize = HUGE_PAGESIZE;
|
||||
@@ -785,7 +785,7 @@ static void *alloc_hugetlb(void *address){
|
||||
AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL);
|
||||
|
||||
if (map_address == (void *)NULL) map_address = (void *)-1;
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
if (map_address != (void *)-1){
|
||||
@@ -833,7 +833,7 @@ static void *alloc_hugetlbfile(void *address){
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED,
|
||||
fd, 0);
|
||||
|
||||
|
||||
if (map_address != (void *)-1) {
|
||||
release_info[release_pos].address = map_address;
|
||||
release_info[release_pos].attr = fd;
|
||||
@@ -886,7 +886,7 @@ static void gotoblas_memory_init(void);
|
||||
/* 2 : Thread */
|
||||
|
||||
void *blas_memory_alloc(int procpos){
|
||||
|
||||
|
||||
int position;
|
||||
#if defined(WHEREAMI) && !defined(USE_OPENMP)
|
||||
int mypos;
|
||||
@@ -921,11 +921,11 @@ void *blas_memory_alloc(int procpos){
|
||||
void *(**func)(void *address);
|
||||
|
||||
if (!memory_initialized) {
|
||||
|
||||
|
||||
LOCK_COMMAND(&alloc_lock);
|
||||
|
||||
|
||||
if (!memory_initialized) {
|
||||
|
||||
|
||||
#if defined(WHEREAMI) && !defined(USE_OPENMP)
|
||||
for (position = 0; position < NUM_BUFFERS; position ++){
|
||||
memory[position].addr = (void *)0;
|
||||
@@ -934,7 +934,7 @@ void *blas_memory_alloc(int procpos){
|
||||
memory[position].lock = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DYNAMIC_ARCH
|
||||
gotoblas_dynamic_init();
|
||||
#endif
|
||||
@@ -942,11 +942,11 @@ void *blas_memory_alloc(int procpos){
|
||||
#if defined(SMP) && defined(OS_LINUX) && !defined(NO_AFFINITY)
|
||||
gotoblas_affinity_init();
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef SMP
|
||||
if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number();
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
|
||||
#ifndef DYNAMIC_ARCH
|
||||
blas_set_parameter();
|
||||
@@ -972,16 +972,16 @@ void *blas_memory_alloc(int procpos){
|
||||
|
||||
do {
|
||||
if (!memory[position].used && (memory[position].pos == mypos)) {
|
||||
|
||||
|
||||
blas_lock(&memory[position].lock);
|
||||
|
||||
|
||||
if (!memory[position].used) goto allocation;
|
||||
|
||||
|
||||
blas_unlock(&memory[position].lock);
|
||||
}
|
||||
|
||||
|
||||
position ++;
|
||||
|
||||
|
||||
} while (position < NUM_BUFFERS);
|
||||
|
||||
|
||||
@@ -991,18 +991,18 @@ void *blas_memory_alloc(int procpos){
|
||||
|
||||
do {
|
||||
if (!memory[position].used) {
|
||||
|
||||
|
||||
blas_lock(&memory[position].lock);
|
||||
|
||||
if (!memory[position].used) goto allocation;
|
||||
|
||||
|
||||
blas_unlock(&memory[position].lock);
|
||||
}
|
||||
|
||||
|
||||
position ++;
|
||||
|
||||
|
||||
} while (position < NUM_BUFFERS);
|
||||
|
||||
|
||||
goto error;
|
||||
|
||||
allocation :
|
||||
@@ -1059,13 +1059,13 @@ void *blas_memory_alloc(int procpos){
|
||||
|
||||
} while ((BLASLONG)map_address == -1);
|
||||
|
||||
memory[position].addr = map_address;
|
||||
memory[position].addr = map_address;
|
||||
|
||||
#ifdef DEBUG
|
||||
printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#if defined(WHEREAMI) && !defined(USE_OPENMP)
|
||||
|
||||
if (memory[position].pos == -1) memory[position].pos = mypos;
|
||||
@@ -1075,18 +1075,18 @@ void *blas_memory_alloc(int procpos){
|
||||
#ifdef DYNAMIC_ARCH
|
||||
|
||||
if (memory_initialized == 1) {
|
||||
|
||||
|
||||
LOCK_COMMAND(&alloc_lock);
|
||||
|
||||
|
||||
if (memory_initialized == 1) {
|
||||
|
||||
|
||||
if (!gotoblas) gotoblas_dynamic_init();
|
||||
|
||||
|
||||
memory_initialized = 2;
|
||||
}
|
||||
|
||||
|
||||
UNLOCK_COMMAND(&alloc_lock);
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1094,8 +1094,8 @@ void *blas_memory_alloc(int procpos){
|
||||
#ifdef DEBUG
|
||||
printf("Mapped : %p %3d\n\n",
|
||||
(void *)memory[position].addr, position);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
return (void *)memory[position].addr;
|
||||
|
||||
error:
|
||||
@@ -1110,8 +1110,8 @@ void blas_memory_free(void *free_area){
|
||||
|
||||
#ifdef DEBUG
|
||||
printf("Unmapped Start : %p ...\n", free_area);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
position = 0;
|
||||
|
||||
while ((memory[position].addr != free_area)
|
||||
@@ -1121,21 +1121,21 @@ void blas_memory_free(void *free_area){
|
||||
|
||||
#ifdef DEBUG
|
||||
printf(" Position : %d\n", position);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
memory[position].used = 0;
|
||||
|
||||
#ifdef DEBUG
|
||||
printf("Unmap Succeeded.\n\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return;
|
||||
|
||||
|
||||
error:
|
||||
printf("BLAS : Bad memory unallocation! : %4d %p\n", position, free_area);
|
||||
|
||||
#ifdef DEBUG
|
||||
for (position = 0; position < NUM_BUFFERS; position++)
|
||||
for (position = 0; position < NUM_BUFFERS; position++)
|
||||
printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used);
|
||||
#endif
|
||||
|
||||
@@ -1155,7 +1155,7 @@ void blas_shutdown(void){
|
||||
for (pos = 0; pos < release_pos; pos ++) {
|
||||
release_info[pos].func(&release_info[pos]);
|
||||
}
|
||||
|
||||
|
||||
#ifdef SEEK_ADDRESS
|
||||
base_address = 0UL;
|
||||
#else
|
||||
@@ -1177,7 +1177,7 @@ void blas_shutdown(void){
|
||||
}
|
||||
|
||||
#if defined(OS_LINUX) && !defined(NO_WARMUP)
|
||||
|
||||
|
||||
#ifdef SMP
|
||||
#if defined(USE_PTHREAD_LOCK)
|
||||
static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
@@ -1188,7 +1188,7 @@ static BLASULONG init_lock = 0UL;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
|
||||
static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
|
||||
void *sa, void *sb, BLASLONG pos) {
|
||||
|
||||
#if !defined(ARCH_POWER) && !defined(ARCH_SPARC)
|
||||
@@ -1251,7 +1251,7 @@ static void _init_thread_memory(void *buffer) {
|
||||
|
||||
queue[num_cpu - 1].next = NULL;
|
||||
queue[0].sa = buffer;
|
||||
|
||||
|
||||
exec_blas(num_cpu, queue);
|
||||
|
||||
}
|
||||
@@ -1270,15 +1270,15 @@ static void gotoblas_memory_init(void) {
|
||||
#ifdef SMP_SERVER
|
||||
if (blas_server_avail == 0) blas_thread_init();
|
||||
#endif
|
||||
|
||||
|
||||
_init_thread_memory((void *)((BLASULONG)buffer + GEMM_OFFSET_A));
|
||||
|
||||
|
||||
#else
|
||||
|
||||
|
||||
_touch_memory(NULL, NULL, NULL, (void *)((BLASULONG)buffer + GEMM_OFFSET_A), NULL, 0);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
blas_memory_free(buffer);
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user