Merge pull request #93 from xianyi/develop

rebase
This commit is contained in:
Martin Kroeker 2020-10-04 22:57:11 +02:00 committed by GitHub
commit cccd1438da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
62 changed files with 267 additions and 246 deletions

View File

@ -174,18 +174,18 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
### Support for multiple targets in a single library
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying DYNAMIC_ARCH=1 in Makefile.rule, on the gmake command line or as -DDYNAMIC_ARCH=TRUE in cmake.
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX. For cpu generations not included in this list, the corresponding older model is used. If you also specify DYNAMIC_OLDER=1, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option DYNAMIC_LIST that allows to specify an individual list of targets to include instead of the default.
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX. For cpu generations not included in this list, the corresponding older model is used. If you also specify `DYNAMIC_OLDER=1`, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option `DYNAMIC_LIST` that allows to specify an individual list of targets to include instead of the default.
DYNAMIC_ARCH is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias,
`DYNAMIC_ARCH` is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias,
Core2, Penryn, Dunnington, Nehalem, Athlon, Opteron, Opteron_SSE3, Barcelona, Bobcat, Atom and Nano.
On **ARMV8**, it enables support for CortexA53, CortexA57, CortexA72, CortexA73, Falkor, ThunderX, ThunderX2T99, TSV110 as well as generic ARMV8 cpus.
For **POWER**, the list encompasses POWER6, POWER8 and POWER9, on **ZARCH** it comprises Z13 and Z14.
The TARGET option can be used in conjunction with DYNAMIC_ARCH=1 to specify which cpu model should be assumed for all the
The `TARGET` option can be used in conjunction with `DYNAMIC_ARCH=1` to specify which cpu model should be assumed for all the
common code in the library, usually you will want to set this to the oldest model you expect to encounter.
Please note that it is not possible to combine support for different architectures, so no combined 32 and 64 bit or x86_64 and arm64 in the same library.

View File

@ -146,7 +146,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -145,7 +145,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -152,7 +152,7 @@ int main(int argc, char *argv[]){
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -152,7 +152,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -151,7 +151,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -154,7 +154,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -145,7 +145,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -214,7 +214,7 @@ int main(int argc, char *argv[]){
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -197,7 +197,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -163,7 +163,7 @@ int main(int argc, char *argv[]){
loops = atoi(p);
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -181,7 +181,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -165,7 +165,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -165,7 +165,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -188,7 +188,7 @@ int main(int argc, char *argv[]){
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -158,7 +158,7 @@ int main(int argc, char *argv[]){
exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -151,7 +151,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -152,7 +152,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -149,7 +149,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -151,7 +151,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -150,7 +150,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -149,7 +149,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -155,7 +155,7 @@ int main(int argc, char *argv[]){
exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -145,7 +145,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -145,7 +145,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -139,7 +139,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -139,7 +139,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -174,7 +174,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -139,7 +139,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -139,7 +139,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -145,7 +145,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -156,7 +156,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -168,7 +168,7 @@ int main(int argc, char *argv[])
exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -150,7 +150,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -163,7 +163,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -149,7 +149,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -153,7 +153,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -151,7 +151,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -162,7 +162,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -162,7 +162,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -144,7 +144,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -150,7 +150,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -162,7 +162,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -159,7 +159,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -132,7 +132,7 @@ int main(int argc, char *argv[])
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -132,7 +132,7 @@ int main(int argc, char *argv[])
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -162,7 +162,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -132,7 +132,7 @@ int main(int argc, char *argv[])
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -172,7 +172,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -159,7 +159,7 @@ int main(int argc, char *argv[]){
uplo,diag,loops);
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -146,7 +146,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -145,7 +145,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -352,7 +352,7 @@ typedef int blasint;
#endif
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
#endif
#ifdef BULLDOZER

View File

@ -54,7 +54,7 @@ static char *cpuname_lower[] = {
int get_feature(char *search)
{
#ifdef linux
#ifdef __linux
FILE *infile;
char buffer[2048], *p,*t;
p = (char *) NULL ;
@ -90,7 +90,7 @@ int get_feature(char *search)
int detect(void)
{
#ifdef linux
#ifdef __linux
FILE *infile;
char buffer[512], *p;
@ -289,7 +289,7 @@ void get_libname(void)
void get_features(void)
{
#ifdef linux
#ifdef __linux
FILE *infile;
char buffer[2048], *p,*t;
p = (char *) NULL ;

View File

@ -90,7 +90,7 @@ static char *cpuname_lower[] = {
int get_feature(char *search)
{
#ifdef linux
#ifdef __linux
FILE *infile;
char buffer[2048], *p,*t;
p = (char *) NULL ;
@ -126,7 +126,7 @@ int get_feature(char *search)
int detect(void)
{
#ifdef linux
#ifdef __linux
FILE *infile;
char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL;
@ -242,7 +242,7 @@ void get_cpucount(void)
{
int n=0;
#ifdef linux
#ifdef __linux
FILE *infile;
char buffer[2048], *p,*t;
p = (char *) NULL ;
@ -441,7 +441,7 @@ void get_libname(void)
void get_features(void)
{
#ifdef linux
#ifdef __linux
FILE *infile;
char buffer[2048], *p,*t;
p = (char *) NULL ;

View File

@ -84,7 +84,7 @@ static char *cpuname[] = {
int detect(void){
#ifdef linux
#ifdef __linux
FILE *infile;
char buffer[512], *p;

View File

@ -90,7 +90,7 @@ static char *cpuname[] = {
int detect(void){
#ifdef linux
#ifdef __linux
FILE *infile;
char buffer[512], *p;

View File

@ -104,7 +104,7 @@ char *corename[] = {
int detect(void){
#ifdef linux
#ifdef __linux
FILE *infile;
char buffer[512], *p;
@ -214,6 +214,8 @@ switch ( id >> 16 ) {
return CPUTYPE_UNKNOWN;
}
#endif
return CPUTYPE_UNKNOWN;
}
void get_architecture(void){

View File

@ -48,6 +48,21 @@
#else
#ifndef likely
#ifdef __GNUC__
#define likely(x) __builtin_expect(!!(x), 1)
#else
#define likely(x) (x)
#endif
#endif
#ifndef unlikely
#ifdef __GNUC__
#define unlikely(x) __builtin_expect(!!(x), 0)
#else
#define unlikely(x) (x)
#endif
#endif
#ifndef OMP_SCHED
#define OMP_SCHED static
#endif
@ -350,6 +365,9 @@ static void exec_threads(blas_queue_t *queue, int buf_index){
int exec_blas(BLASLONG num, blas_queue_t *queue){
// Handle lazy re-init of the thread-pool after a POSIX fork
if (unlikely(blas_server_avail == 0)) blas_thread_init();
BLASLONG i, buf_index;
if ((num <= 0) || (queue == NULL)) return 0;

View File

@ -68,7 +68,7 @@ extern void openblas_warning(int verbose, const char * msg);
#endif
#define get_cpu_ftr(id, var) ({ \
asm("mrs %0, "#id : "=r" (var)); \
__asm__("mrs %0, "#id : "=r" (var)); \
})
static char *corename[] = {

View File

@ -80,7 +80,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#undef COMPILE_TLS
#endif
#if defined(__GLIBC_PREREQ)
#if defined(__GLIBC_PREREQ)
#if !__GLIBC_PREREQ(2,20)
#undef COMPILE_TLS
#endif
@ -161,7 +161,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__))
#include <conio.h>
#undef printf
#define printf _cprintf
#define printf _cprintf
#endif
#ifdef OS_LINUX
@ -190,14 +190,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CONSTRUCTOR __cdecl
#define DESTRUCTOR __cdecl
#elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC)
#define CONSTRUCTOR __attribute__ ((constructor))
#define DESTRUCTOR __attribute__ ((destructor))
#define CONSTRUCTOR __attribute__ ((constructor))
#define DESTRUCTOR __attribute__ ((destructor))
#elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900))
#define CONSTRUCTOR __attribute__ ((constructor(101)))
#define DESTRUCTOR __attribute__ ((destructor(101)))
#define CONSTRUCTOR __attribute__ ((constructor(101)))
#define DESTRUCTOR __attribute__ ((destructor(101)))
#else
#define CONSTRUCTOR __attribute__ ((constructor))
#define DESTRUCTOR __attribute__ ((destructor))
#define CONSTRUCTOR __attribute__ ((constructor))
#define DESTRUCTOR __attribute__ ((destructor))
#endif
#ifdef DYNAMIC_ARCH
@ -272,7 +272,7 @@ int get_num_procs(void) {
return nums;
}
ret = CPU_COUNT_S(size,cpusetp);
if (ret > 0 && ret < nums) nums = ret;
if (ret > 0 && ret < nums) nums = ret;
CPU_FREE(cpusetp);
return nums;
} else {
@ -281,7 +281,7 @@ int get_num_procs(void) {
return nums;
}
ret = CPU_COUNT(&cpuset);
if (ret > 0 && ret < nums) nums = ret;
if (ret > 0 && ret < nums) nums = ret;
return nums;
}
#endif
@ -628,12 +628,12 @@ static void *alloc_mmap(void *address){
if (address){
map_address = mmap(address,
allocation_block_size,
MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0);
allocation_block_size,
MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0);
} else {
map_address = mmap(address,
allocation_block_size,
MMAP_ACCESS, MMAP_POLICY, -1, 0);
allocation_block_size,
MMAP_ACCESS, MMAP_POLICY, -1, 0);
}
STORE_RELEASE_FUNC(map_address, alloc_mmap_free);
@ -648,7 +648,7 @@ static void *alloc_mmap(void *address){
#else
#define BENCH_ITERATION 4
#define SCALING 2
#define SCALING 2
static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) {
@ -711,60 +711,60 @@ static void *alloc_mmap(void *address){
#endif
map_address = mmap(NULL, allocation_block_size * SCALING,
MMAP_ACCESS, MMAP_POLICY, -1, 0);
MMAP_ACCESS, MMAP_POLICY, -1, 0);
if (map_address != (void *)-1) {
#ifdef OS_LINUX
#ifdef DEBUG
int ret=0;
ret=my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0);
if(ret==-1){
int errsv=errno;
perror("OpenBLAS alloc_mmap:");
printf("error code=%d,\tmap_address=%lx\n",errsv,map_address);
}
int ret=0;
ret=my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0);
if(ret==-1){
int errsv=errno;
perror("OpenBLAS alloc_mmap:");
printf("error code=%d,\tmap_address=%lx\n",errsv,map_address);
}
#else
my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0);
my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0);
#endif
#endif
allocsize = DGEMM_P * DGEMM_Q * sizeof(double);
allocsize = DGEMM_P * DGEMM_Q * sizeof(double);
start = (BLASULONG)map_address;
current = (SCALING - 1) * allocation_block_size;
original = current;
start = (BLASULONG)map_address;
current = (SCALING - 1) * allocation_block_size;
original = current;
while(current > 0 && current <= original) {
*(BLASLONG *)start = (BLASLONG)start + PAGESIZE;
start += PAGESIZE;
current -= PAGESIZE;
}
while(current > 0 && current <= original) {
*(BLASLONG *)start = (BLASLONG)start + PAGESIZE;
start += PAGESIZE;
current -= PAGESIZE;
}
*(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address;
*(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address;
start = (BLASULONG)map_address;
start = (BLASULONG)map_address;
best = (BLASULONG)-1;
best_address = map_address;
best = (BLASULONG)-1;
best_address = map_address;
while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * allocation_block_size)) {
while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * allocation_block_size)) {
current = run_bench(start, allocsize);
current = run_bench(start, allocsize);
if (best > current) {
best = current;
best_address = (void *)start;
}
if (best > current) {
best = current;
best_address = (void *)start;
}
start += PAGESIZE;
start += PAGESIZE;
}
}
if ((BLASULONG)best_address > (BLASULONG)map_address)
munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address);
munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address);
munmap((void *)((BLASULONG)best_address + allocation_block_size), (SCALING - 1) * allocation_block_size + (BLASULONG)map_address - (BLASULONG)best_address);
@ -854,9 +854,9 @@ static void *alloc_windows(void *address){
void *map_address;
map_address = VirtualAlloc(address,
allocation_block_size,
MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE);
allocation_block_size,
MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE);
if (map_address == (void *)NULL) map_address = (void *)-1;
@ -897,9 +897,9 @@ static void *alloc_devicedirver(void *address){
}
map_address = mmap(address, allocation_block_size,
PROT_READ | PROT_WRITE,
MAP_FILE | MAP_SHARED,
fd, 0);
PROT_READ | PROT_WRITE,
MAP_FILE | MAP_SHARED,
fd, 0);
STORE_RELEASE_FUNC_WITH_ATTR(map_address, alloc_devicedirver_free, fd);
@ -974,12 +974,12 @@ static void *alloc_hugetlb(void *address){
shmid = shmget(IPC_PRIVATE, allocation_block_size,
#ifdef OS_LINUX
SHM_HUGETLB |
SHM_HUGETLB |
#endif
#ifdef OS_AIX
SHM_LGPAGE | SHM_PIN |
SHM_LGPAGE | SHM_PIN |
#endif
IPC_CREAT | SHM_R | SHM_W);
IPC_CREAT | SHM_R | SHM_W);
if (shmid != -1) {
map_address = (void *)shmat(shmid, address, SHM_RND);
@ -1026,9 +1026,9 @@ static void *alloc_hugetlb(void *address){
}
map_address = (void *)VirtualAlloc(address,
allocation_block_size,
MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE);
allocation_block_size,
MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE);
tp.Privileges[0].Attributes = 0;
AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL);
@ -1078,9 +1078,9 @@ static void *alloc_hugetlbfile(void *address){
unlink(filename);
map_address = mmap(address, allocation_block_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd, 0);
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd, 0);
STORE_RELEASE_FUNC_WITH_ATTR(map_address, alloc_hugetlbfile_free, fd);
@ -1107,7 +1107,7 @@ static volatile int memory_initialized = 0;
/* 1 : Level 2 functions */
/* 2 : Thread */
static void blas_memory_cleanup(void* ptr){
static void blas_memory_cleanup(void* ptr){
if (ptr) {
struct alloc_t ** table = (struct alloc_t **)ptr;
int pos;
@ -1243,27 +1243,27 @@ UNLOCK_COMMAND(&alloc_lock);
while ((func != NULL) && (map_address == (void *) -1)) {
map_address = (*func)((void *)base_address);
map_address = (*func)((void *)base_address);
#ifdef ALLOC_DEVICEDRIVER
if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) {
fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation failed.\n");
}
if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) {
fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation failed.\n");
}
#endif
#ifdef ALLOC_HUGETLBFILE
if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) {
if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) {
#ifndef OS_WINDOWS
fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation failed.\n");
fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation failed.\n");
#endif
}
}
#endif
#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
#endif
func ++;
func ++;
}
#ifdef DEBUG
@ -1377,7 +1377,7 @@ static BLASULONG init_lock = 0UL;
#endif
static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
void *sa, void *sb, BLASLONG pos) {
void *sa, void *sb, BLASLONG pos) {
#if !defined(ARCH_POWER) && !defined(ARCH_SPARC)
@ -1507,11 +1507,11 @@ void CONSTRUCTOR gotoblas_init(void) {
struct rlimit curlimit;
if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 )
{
if ( curlimit.rlim_cur != curlimit.rlim_max )
{
curlimit.rlim_cur = curlimit.rlim_max;
setrlimit(RLIMIT_STACK, &curlimit);
}
if ( curlimit.rlim_cur != curlimit.rlim_max )
{
curlimit.rlim_cur = curlimit.rlim_max;
setrlimit(RLIMIT_STACK, &curlimit);
}
}
#endif
@ -1545,7 +1545,7 @@ void DESTRUCTOR gotoblas_quit(void) {
TlsFree(local_storage_key);
#else
pthread_key_delete(local_storage_key);
#endif
#endif
#endif
#ifdef PROFILE
@ -1605,8 +1605,8 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReser
*/
static int on_process_term(void)
{
gotoblas_quit();
return 0;
gotoblas_quit();
return 0;
}
#ifdef _WIN64
#pragma comment(linker, "/INCLUDE:_tls_used")
@ -1705,7 +1705,7 @@ void gotoblas_dummy_for_PGI(void) {
#if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__))
#include <conio.h>
#undef printf
#define printf _cprintf
#define printf _cprintf
#endif
#ifdef OS_LINUX
@ -1734,14 +1734,14 @@ void gotoblas_dummy_for_PGI(void) {
#define CONSTRUCTOR __cdecl
#define DESTRUCTOR __cdecl
#elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC)
#define CONSTRUCTOR __attribute__ ((constructor))
#define DESTRUCTOR __attribute__ ((destructor))
#define CONSTRUCTOR __attribute__ ((constructor))
#define DESTRUCTOR __attribute__ ((destructor))
#elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900))
#define CONSTRUCTOR __attribute__ ((constructor(101)))
#define DESTRUCTOR __attribute__ ((destructor(101)))
#define CONSTRUCTOR __attribute__ ((constructor(101)))
#define DESTRUCTOR __attribute__ ((destructor(101)))
#else
#define CONSTRUCTOR __attribute__ ((constructor))
#define DESTRUCTOR __attribute__ ((destructor))
#define CONSTRUCTOR __attribute__ ((constructor))
#define DESTRUCTOR __attribute__ ((destructor))
#endif
#ifdef DYNAMIC_ARCH
@ -1817,7 +1817,7 @@ int get_num_procs(void) {
return nums;
}
ret = CPU_COUNT_S(size,cpusetp);
if (ret > 0 && ret < nums) nums = ret;
if (ret > 0 && ret < nums) nums = ret;
CPU_FREE(cpusetp);
return nums;
} else {
@ -1826,7 +1826,7 @@ int get_num_procs(void) {
return nums;
}
ret = CPU_COUNT(&cpuset);
if (ret > 0 && ret < nums) nums = ret;
if (ret > 0 && ret < nums) nums = ret;
return nums;
}
#endif
@ -2083,26 +2083,26 @@ static void *alloc_mmap(void *address){
if (address){
map_address = mmap(address,
BUFFER_SIZE,
MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0);
BUFFER_SIZE,
MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0);
} else {
map_address = mmap(address,
BUFFER_SIZE,
MMAP_ACCESS, MMAP_POLICY, -1, 0);
BUFFER_SIZE,
MMAP_ACCESS, MMAP_POLICY, -1, 0);
}
if (map_address != (void *)-1) {
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#endif
#endif
release_info[release_pos].address = map_address;
release_info[release_pos].func = alloc_mmap_free;
release_pos ++;
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#endif
#endif
} else {
#ifdef DEBUG
#ifdef DEBUG
int errsv=errno;
perror("OpenBLAS : mmap failed:");
printf("error code=%d,\tmap_address=%lx\n",errsv,map_address);
@ -2119,7 +2119,7 @@ static void *alloc_mmap(void *address){
#else
#define BENCH_ITERATION 4
#define SCALING 2
#define SCALING 2
static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) {
@ -2182,59 +2182,59 @@ static void *alloc_mmap(void *address){
#endif
map_address = mmap(NULL, BUFFER_SIZE * SCALING,
MMAP_ACCESS, MMAP_POLICY, -1, 0);
MMAP_ACCESS, MMAP_POLICY, -1, 0);
if (map_address != (void *)-1) {
#ifdef OS_LINUX
#ifdef DEBUG
int ret=0;
ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
if(ret==-1){
int errsv=errno;
perror("OpenBLAS alloc_mmap:");
printf("error code=%d,\tmap_address=%lx\n",errsv,map_address);
}
int ret=0;
ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
if(ret==-1){
int errsv=errno;
perror("OpenBLAS alloc_mmap:");
printf("error code=%d,\tmap_address=%lx\n",errsv,map_address);
}
#else
my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
#endif
#endif
allocsize = DGEMM_P * DGEMM_Q * sizeof(double);
allocsize = DGEMM_P * DGEMM_Q * sizeof(double);
start = (BLASULONG)map_address;
current = (SCALING - 1) * BUFFER_SIZE;
start = (BLASULONG)map_address;
current = (SCALING - 1) * BUFFER_SIZE;
while(current > 0) {
*(BLASLONG *)start = (BLASLONG)start + PAGESIZE;
start += PAGESIZE;
current -= PAGESIZE;
}
while(current > 0) {
*(BLASLONG *)start = (BLASLONG)start + PAGESIZE;
start += PAGESIZE;
current -= PAGESIZE;
}
*(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address;
*(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address;
start = (BLASULONG)map_address;
start = (BLASULONG)map_address;
best = (BLASULONG)-1;
best_address = map_address;
best = (BLASULONG)-1;
best_address = map_address;
while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * BUFFER_SIZE)) {
while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * BUFFER_SIZE)) {
current = run_bench(start, allocsize);
current = run_bench(start, allocsize);
if (best > current) {
best = current;
best_address = (void *)start;
}
if (best > current) {
best = current;
best_address = (void *)start;
}
start += PAGESIZE;
start += PAGESIZE;
}
}
if ((BLASULONG)best_address > (BLASULONG)map_address)
munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address);
munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address);
munmap((void *)((BLASULONG)best_address + BUFFER_SIZE), (SCALING - 1) * BUFFER_SIZE + (BLASULONG)map_address - (BLASULONG)best_address);
@ -2342,9 +2342,9 @@ static void *alloc_windows(void *address){
void *map_address;
map_address = VirtualAlloc(address,
BUFFER_SIZE,
MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE);
BUFFER_SIZE,
MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE);
if (map_address == (void *)NULL) map_address = (void *)-1;
@ -2388,9 +2388,9 @@ static void *alloc_devicedirver(void *address){
}
map_address = mmap(address, BUFFER_SIZE,
PROT_READ | PROT_WRITE,
MAP_FILE | MAP_SHARED,
fd, 0);
PROT_READ | PROT_WRITE,
MAP_FILE | MAP_SHARED,
fd, 0);
if (map_address != (void *)-1) {
release_info[release_pos].address = map_address;
@ -2471,12 +2471,12 @@ static void *alloc_hugetlb(void *address){
shmid = shmget(IPC_PRIVATE, BUFFER_SIZE,
#ifdef OS_LINUX
SHM_HUGETLB |
SHM_HUGETLB |
#endif
#ifdef OS_AIX
SHM_LGPAGE | SHM_PIN |
SHM_LGPAGE | SHM_PIN |
#endif
IPC_CREAT | SHM_R | SHM_W);
IPC_CREAT | SHM_R | SHM_W);
if (shmid != -1) {
map_address = (void *)shmat(shmid, address, SHM_RND);
@ -2511,7 +2511,7 @@ static void *alloc_hugetlb(void *address){
tp.PrivilegeCount = 1;
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) {
CloseHandle(hToken);
return (void*)-1;
@ -2523,9 +2523,9 @@ static void *alloc_hugetlb(void *address){
}
map_address = (void *)VirtualAlloc(address,
BUFFER_SIZE,
MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE);
BUFFER_SIZE,
MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE);
tp.Privileges[0].Attributes = 0;
AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL);
@ -2578,9 +2578,9 @@ static void *alloc_hugetlbfile(void *address){
unlink(filename);
map_address = mmap(address, BUFFER_SIZE,
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd, 0);
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd, 0);
if (map_address != (void *)-1) {
release_info[release_pos].address = map_address;
@ -2717,7 +2717,7 @@ void *blas_memory_alloc(int procpos){
if (!memory[position].used && (memory[position].pos == mypos)) {
#if defined(SMP) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#else
#else
blas_lock(&memory[position].lock);
#endif
if (!memory[position].used) goto allocation;
@ -2725,7 +2725,7 @@ void *blas_memory_alloc(int procpos){
UNLOCK_COMMAND(&alloc_lock);
#else
blas_unlock(&memory[position].lock);
#endif
#endif
}
position ++;
@ -2741,22 +2741,22 @@ void *blas_memory_alloc(int procpos){
LOCK_COMMAND(&alloc_lock);
#endif
do {
RMB;
#if defined(USE_OPENMP)
if (!memory[position].used) {
RMB;
#if defined(USE_OPENMP)
if (!memory[position].used) {
blas_lock(&memory[position].lock);
#endif
if (!memory[position].used) goto allocation;
#if defined(USE_OPENMP)
blas_unlock(&memory[position].lock);
blas_unlock(&memory[position].lock);
}
#endif
position ++;
} while (position < NUM_BUFFERS);
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
UNLOCK_COMMAND(&alloc_lock);
#endif
goto error;
@ -2770,7 +2770,7 @@ void *blas_memory_alloc(int procpos){
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#else
blas_unlock(&memory[position].lock);
blas_unlock(&memory[position].lock);
#endif
if (!memory[position].addr) {
do {
@ -2784,27 +2784,27 @@ void *blas_memory_alloc(int procpos){
while ((func != NULL) && (map_address == (void *) -1)) {
map_address = (*func)((void *)base_address);
map_address = (*func)((void *)base_address);
#ifdef ALLOC_DEVICEDRIVER
if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) {
fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n");
}
if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) {
fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n");
}
#endif
#ifdef ALLOC_HUGETLBFILE
if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) {
if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) {
#ifndef OS_WINDOWS
fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n");
fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n");
#endif
}
}
#endif
#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
#endif
func ++;
func ++;
}
#ifdef DEBUG
@ -2818,7 +2818,7 @@ void *blas_memory_alloc(int procpos){
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#endif
#endif
memory[position].addr = map_address;
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
@ -2856,7 +2856,7 @@ void *blas_memory_alloc(int procpos){
#ifdef DEBUG
printf("Mapped : %p %3d\n\n",
(void *)memory[position].addr, position);
(void *)memory[position].addr, position);
#endif
return (void *)memory[position].addr;
@ -2882,9 +2882,10 @@ void blas_memory_free(void *free_area){
while ((position < NUM_BUFFERS) && (memory[position].addr != free_area))
position++;
if (memory[position].addr != free_area) goto error;
if (position >= NUM_BUFFERS) goto error;
#ifdef DEBUG
if (memory[position].addr != free_area) goto error;
printf(" Position : %d\n", position);
#endif
@ -2972,7 +2973,7 @@ static BLASULONG init_lock = 0UL;
#endif
static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
void *sa, void *sb, BLASLONG pos) {
void *sa, void *sb, BLASLONG pos) {
#if !defined(ARCH_POWER) && !defined(ARCH_SPARC)
@ -3099,15 +3100,15 @@ void CONSTRUCTOR gotoblas_init(void) {
//#if defined(OS_LINUX)
#if 0
struct rlimit curlimit;
if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 )
{
if ( curlimit.rlim_cur != curlimit.rlim_max )
{
curlimit.rlim_cur = curlimit.rlim_max;
setrlimit(RLIMIT_STACK, &curlimit);
}
}
struct rlimit curlimit;
if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 )
{
if ( curlimit.rlim_cur != curlimit.rlim_max )
{
curlimit.rlim_cur = curlimit.rlim_max;
setrlimit(RLIMIT_STACK, &curlimit);
}
}
#endif
#ifdef SMP
@ -3189,8 +3190,8 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReser
*/
static int on_process_term(void)
{
gotoblas_quit();
return 0;
gotoblas_quit();
return 0;
}
#ifdef _WIN64
#pragma comment(linker, "/INCLUDE:_tls_used")
@ -3237,7 +3238,7 @@ void gotoblas_dummy_for_PGI(void) {
asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text");
asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text");
#endif
#endif
#endif
}
#endif

View File

@ -62,7 +62,7 @@ static void daxpy_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
y5 = a * x[5] + y[5];
y6 = a * x[6] + y[6];
y7 = a * x[7] + y[7];
asm("":"+w"(y0),"+w"(y1),"+w"(y2),"+w"(y3),"+w"(y4),"+w"(y5),"+w"(y6),"+w"(y7));
__asm__("":"+w"(y0),"+w"(y1),"+w"(y2),"+w"(y3),"+w"(y4),"+w"(y5),"+w"(y6),"+w"(y7));
y[0] = y0;
y[1] = y1;
y[2] = y2;
@ -74,7 +74,7 @@ static void daxpy_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
xx = (x + 4*128/sizeof(*x));
yy = (y + 4*128/sizeof(*y));
asm("":"+r"(yy)::"memory");
__asm__("":"+r"(yy)::"memory");
prefetch(xx);
prefetch(yy);