Merge branch 'develop' into bulldozer
Conflicts: kernel/x86_64/KERNEL.BULLDOZER
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
TOPDIR = ../..
|
||||
include ../../Makefile.system
|
||||
|
||||
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX)
|
||||
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX)
|
||||
|
||||
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
||||
|
||||
@@ -106,6 +106,9 @@ openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c
|
||||
openblas_get_config.$(SUFFIX) : openblas_get_config.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
openblas_get_parallel.$(SUFFIX) : openblas_get_parallel.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
|
||||
@@ -385,6 +385,7 @@ static int blas_thread_server(void *arg){
|
||||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
||||
}
|
||||
}
|
||||
queue->sb=sb;
|
||||
}
|
||||
|
||||
#ifdef MONITOR
|
||||
|
||||
@@ -49,8 +49,12 @@
|
||||
|
||||
int blas_server_avail = 0;
|
||||
|
||||
static void * blas_thread_buffer[MAX_CPU_NUMBER];
|
||||
|
||||
void goto_set_num_threads(int num_threads) {
|
||||
|
||||
int i=0;
|
||||
|
||||
if (num_threads < 1) num_threads = blas_num_threads;
|
||||
|
||||
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
|
||||
@@ -62,7 +66,19 @@ void goto_set_num_threads(int num_threads) {
|
||||
blas_cpu_number = num_threads;
|
||||
|
||||
omp_set_num_threads(blas_cpu_number);
|
||||
|
||||
|
||||
//adjust buffer for each thread
|
||||
for(i=0; i<blas_cpu_number; i++){
|
||||
if(blas_thread_buffer[i]==NULL){
|
||||
blas_thread_buffer[i]=blas_memory_alloc(2);
|
||||
}
|
||||
}
|
||||
for(; i<MAX_CPU_NUMBER; i++){
|
||||
if(blas_thread_buffer[i]!=NULL){
|
||||
blas_memory_free(blas_thread_buffer[i]);
|
||||
blas_thread_buffer[i]=NULL;
|
||||
}
|
||||
}
|
||||
#if defined(ARCH_MIPS64)
|
||||
//set parameters for different number of threads.
|
||||
blas_set_parameter();
|
||||
@@ -76,17 +92,33 @@ void openblas_set_num_threads(int num_threads) {
|
||||
|
||||
int blas_thread_init(void){
|
||||
|
||||
int i=0;
|
||||
|
||||
blas_get_cpu_number();
|
||||
|
||||
blas_server_avail = 1;
|
||||
|
||||
for(i=0; i<blas_num_threads; i++){
|
||||
blas_thread_buffer[i]=blas_memory_alloc(2);
|
||||
}
|
||||
for(; i<MAX_CPU_NUMBER; i++){
|
||||
blas_thread_buffer[i]=NULL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BLASFUNC(blas_thread_shutdown)(void){
|
||||
|
||||
int i=0;
|
||||
blas_server_avail = 0;
|
||||
|
||||
for(i=0; i<MAX_CPU_NUMBER; i++){
|
||||
if(blas_thread_buffer[i]!=NULL){
|
||||
blas_memory_free(blas_thread_buffer[i]);
|
||||
blas_thread_buffer[i]=NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -177,7 +209,8 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
|
||||
static void exec_threads(blas_queue_t *queue){
|
||||
|
||||
void *buffer, *sa, *sb;
|
||||
|
||||
int pos=0, release_flag=0;
|
||||
|
||||
buffer = NULL;
|
||||
sa = queue -> sa;
|
||||
sb = queue -> sb;
|
||||
@@ -189,9 +222,19 @@ static void exec_threads(blas_queue_t *queue){
|
||||
|
||||
if ((sa == NULL) && (sb == NULL) && ((queue -> mode & BLAS_PTHREAD) == 0)) {
|
||||
|
||||
buffer = blas_memory_alloc(2);
|
||||
pos = omp_get_thread_num();
|
||||
buffer = blas_thread_buffer[pos];
|
||||
|
||||
if (sa == NULL) sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
|
||||
//fallback
|
||||
if(buffer==NULL) {
|
||||
buffer = blas_memory_alloc(2);
|
||||
release_flag=1;
|
||||
}
|
||||
|
||||
if (sa == NULL) {
|
||||
sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
|
||||
queue->sa=sa;
|
||||
}
|
||||
|
||||
if (sb == NULL) {
|
||||
if (!(queue -> mode & BLAS_COMPLEX)){
|
||||
@@ -224,6 +267,7 @@ static void exec_threads(blas_queue_t *queue){
|
||||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
||||
}
|
||||
}
|
||||
queue->sb=sb;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -241,7 +285,7 @@ static void exec_threads(blas_queue_t *queue){
|
||||
|
||||
}
|
||||
|
||||
if (buffer != NULL) blas_memory_free(buffer);
|
||||
if (release_flag) blas_memory_free(buffer);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -253,6 +253,7 @@ static DWORD WINAPI blas_thread_server(void *arg){
|
||||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
||||
}
|
||||
}
|
||||
queue->sb=sb;
|
||||
}
|
||||
|
||||
#ifdef MONITOR
|
||||
@@ -495,4 +496,4 @@ void goto_set_num_threads(int num_threads)
|
||||
void openblas_set_num_threads(int num)
|
||||
{
|
||||
goto_set_num_threads(num);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,12 +64,15 @@ extern gotoblas_t gotoblas_BOBCAT;
|
||||
#ifndef NO_AVX
|
||||
extern gotoblas_t gotoblas_SANDYBRIDGE;
|
||||
extern gotoblas_t gotoblas_BULLDOZER;
|
||||
extern gotoblas_t gotoblas_PILEDRIVER;
|
||||
#else
|
||||
//Use NEHALEM kernels for sandy bridge
|
||||
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
|
||||
#define gotoblas_BULLDOZER gotoblas_BARCELONA
|
||||
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
|
||||
#endif
|
||||
|
||||
//Use sandy bridge kernels for haswell.
|
||||
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
|
||||
|
||||
#define VENDOR_INTEL 1
|
||||
#define VENDOR_AMD 2
|
||||
@@ -92,7 +95,7 @@ int support_avx(){
|
||||
int ret=0;
|
||||
|
||||
cpuid(1, &eax, &ebx, &ecx, &edx);
|
||||
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0){
|
||||
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
|
||||
xgetbv(0, &eax, &edx);
|
||||
if((eax & 6) == 6){
|
||||
ret=1; //OS support AVX
|
||||
@@ -175,7 +178,7 @@ static gotoblas_t *get_coretype(void){
|
||||
if(support_avx())
|
||||
return &gotoblas_SANDYBRIDGE;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS doesn't support AVX. Use Nehalem kernels.\n");
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
@@ -186,7 +189,27 @@ static gotoblas_t *get_coretype(void){
|
||||
if(support_avx())
|
||||
return &gotoblas_SANDYBRIDGE;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS doesn't support AVX. Use Nehalem kernels.\n");
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
//Intel Haswell
|
||||
if (model == 12) {
|
||||
if(support_avx())
|
||||
return &gotoblas_HASWELL;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
case 4:
|
||||
//Intel Haswell
|
||||
if (model == 5) {
|
||||
if(support_avx())
|
||||
return &gotoblas_HASWELL;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
@@ -207,13 +230,23 @@ static gotoblas_t *get_coretype(void){
|
||||
} else if (exfamily == 5) {
|
||||
return &gotoblas_BOBCAT;
|
||||
} else if (exfamily == 6) {
|
||||
//AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
|
||||
if(model == 1){
|
||||
//AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
|
||||
if(support_avx())
|
||||
return &gotoblas_BULLDOZER;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS doesn't support AVX. Use Barcelona kernels.\n");
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
}else if(model == 2){
|
||||
//AMD Bulldozer Opteron 6300 / Opteron 4300 / Opteron 3300
|
||||
if(support_avx())
|
||||
return &gotoblas_PILEDRIVER;
|
||||
else{
|
||||
fprintf(stderr, "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n");
|
||||
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return &gotoblas_BARCELONA;
|
||||
}
|
||||
@@ -251,6 +284,7 @@ static char *corename[] = {
|
||||
"Sandybridge",
|
||||
"Bobcat",
|
||||
"Bulldozer",
|
||||
"Piledriver",
|
||||
};
|
||||
|
||||
char *gotoblas_corename(void) {
|
||||
@@ -273,6 +307,7 @@ char *gotoblas_corename(void) {
|
||||
if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16];
|
||||
if (gotoblas == &gotoblas_BOBCAT) return corename[17];
|
||||
if (gotoblas == &gotoblas_BULLDOZER) return corename[18];
|
||||
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19];
|
||||
|
||||
return corename[0];
|
||||
}
|
||||
|
||||
@@ -82,6 +82,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <sched.h>
|
||||
#include <dirent.h>
|
||||
#include <dlfcn.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
|
||||
#define MAX_NODES 16
|
||||
#define MAX_CPUS 256
|
||||
@@ -314,7 +316,7 @@ static int numa_check(void) {
|
||||
}
|
||||
|
||||
while ((dir = readdir(dp)) != NULL) {
|
||||
if (*(unsigned int *) dir -> d_name == 0x065646f6eU) {
|
||||
if (strncmp(dir->d_name, "node", 4)==0) {
|
||||
|
||||
node = atoi(&dir -> d_name[4]);
|
||||
|
||||
@@ -735,7 +737,8 @@ void gotoblas_affinity_init(void) {
|
||||
fprintf(stderr, "Shared Memory Initialization.\n");
|
||||
#endif
|
||||
|
||||
common -> num_procs = get_nprocs();
|
||||
//returns the number of processors which are currently online
|
||||
common -> num_procs = sysconf(_SC_NPROCESSORS_ONLN);;
|
||||
|
||||
if(common -> num_procs > MAX_CPUS) {
|
||||
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS);
|
||||
|
||||
@@ -105,6 +105,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#if defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__))
|
||||
@@ -125,7 +126,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define NO_WARMUP
|
||||
#endif
|
||||
|
||||
#ifdef ALLOC_HUGETLB
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
@@ -216,6 +217,25 @@ int get_num_procs(void) {
|
||||
}
|
||||
return nums;
|
||||
}
|
||||
/*
|
||||
void set_stack_limit(int limitMB){
|
||||
int result=0;
|
||||
struct rlimit rl;
|
||||
rlim_t StackSize;
|
||||
|
||||
StackSize=limitMB*1024*1024;
|
||||
result=getrlimit(RLIMIT_STACK, &rl);
|
||||
if(result==0){
|
||||
if(rl.rlim_cur < StackSize){
|
||||
rl.rlim_cur=StackSize;
|
||||
result=setrlimit(RLIMIT_STACK, &rl);
|
||||
if(result !=0){
|
||||
fprintf(stderr, "OpenBLAS: set stack limit error =%d\n", result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -1248,6 +1268,7 @@ void CONSTRUCTOR gotoblas_init(void) {
|
||||
|
||||
if (gotoblas_initialized) return;
|
||||
|
||||
|
||||
#ifdef PROFILE
|
||||
moncontrol (0);
|
||||
#endif
|
||||
|
||||
52
driver/others/openblas_get_parallel.c
Normal file
52
driver/others/openblas_get_parallel.c
Normal file
@@ -0,0 +1,52 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2013 Martin Koehler, grisuthedragon@users.github.com
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(USE_OPENMP)
|
||||
static int parallel = 2 ;
|
||||
#elif defined(SMP_SERVER)
|
||||
static int parallel = 1;
|
||||
#else
|
||||
static int parallel = 0;
|
||||
#endif
|
||||
|
||||
int CNAME() {
|
||||
return parallel;
|
||||
}
|
||||
|
||||
int NAME() {
|
||||
return parallel;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user