Refs #283. Fixed the incorrect usage of long data type for Windows 64.
This commit is contained in:
parent
a2942456ef
commit
5048a80032
|
@ -229,6 +229,11 @@ endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
# ifeq logical or
|
||||||
|
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix))
|
||||||
|
OS_WINDOWS=1
|
||||||
|
endif
|
||||||
|
|
||||||
ifdef QUAD_PRECISION
|
ifdef QUAD_PRECISION
|
||||||
CCOMMON_OPT += -DQUAD_PRECISION
|
CCOMMON_OPT += -DQUAD_PRECISION
|
||||||
NO_EXPRECISION = 1
|
NO_EXPRECISION = 1
|
||||||
|
@ -849,6 +854,9 @@ LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H
|
||||||
ifdef INTERFACE64
|
ifdef INTERFACE64
|
||||||
LAPACK_CFLAGS += -DLAPACK_ILP64
|
LAPACK_CFLAGS += -DLAPACK_ILP64
|
||||||
endif
|
endif
|
||||||
|
ifdef OS_WINDOWS
|
||||||
|
LAPACK_CFLAGS += -DOPENBLAS_OS_WINDOWS
|
||||||
|
endif
|
||||||
ifeq ($(C_COMPILER), LSB)
|
ifeq ($(C_COMPILER), LSB)
|
||||||
LAPACK_CFLAGS += -DLAPACK_COMPLEX_STRUCTURE
|
LAPACK_CFLAGS += -DLAPACK_COMPLEX_STRUCTURE
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -363,7 +363,7 @@ static void *alloc_mmap(void *address){
|
||||||
#define BENCH_ITERATION 4
|
#define BENCH_ITERATION 4
|
||||||
#define SCALING 2
|
#define SCALING 2
|
||||||
|
|
||||||
static inline BLASULONG run_bench(BLASULONG address, long size) {
|
static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) {
|
||||||
|
|
||||||
BLASULONG original, *p;
|
BLASULONG original, *p;
|
||||||
BLASULONG start, stop, min;
|
BLASULONG start, stop, min;
|
||||||
|
@ -450,12 +450,12 @@ static void *alloc_mmap(void *address){
|
||||||
current = (SCALING - 1) * BUFFER_SIZE;
|
current = (SCALING - 1) * BUFFER_SIZE;
|
||||||
|
|
||||||
while(current > 0) {
|
while(current > 0) {
|
||||||
*(long *)start = (long)start + PAGESIZE;
|
*(BLASLONG *)start = (BLASLONG)start + PAGESIZE;
|
||||||
start += PAGESIZE;
|
start += PAGESIZE;
|
||||||
current -= PAGESIZE;
|
current -= PAGESIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
*(long *)(start - PAGESIZE) = (BLASULONG)map_address;
|
*(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address;
|
||||||
|
|
||||||
start = (BLASULONG)map_address;
|
start = (BLASULONG)map_address;
|
||||||
|
|
||||||
|
@ -1170,7 +1170,7 @@ static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
#if !defined(ARCH_POWER) && !defined(ARCH_SPARC)
|
#if !defined(ARCH_POWER) && !defined(ARCH_SPARC)
|
||||||
|
|
||||||
long size;
|
size_t size;
|
||||||
BLASULONG buffer;
|
BLASULONG buffer;
|
||||||
|
|
||||||
size = BUFFER_SIZE - PAGESIZE;
|
size = BUFFER_SIZE - PAGESIZE;
|
||||||
|
|
|
@ -45,7 +45,11 @@ extern "C" {
|
||||||
|
|
||||||
#ifndef lapack_int
|
#ifndef lapack_int
|
||||||
#if defined(LAPACK_ILP64)
|
#if defined(LAPACK_ILP64)
|
||||||
|
#if defined(OPENBLAS_OS_WINDOWS)
|
||||||
|
#define lapack_int long long
|
||||||
|
#else
|
||||||
#define lapack_int long
|
#define lapack_int long
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
#define lapack_int int
|
#define lapack_int int
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -67,14 +67,14 @@ double sqrt(double);
|
||||||
#undef GETRF_FACTOR
|
#undef GETRF_FACTOR
|
||||||
#define GETRF_FACTOR 1.00
|
#define GETRF_FACTOR 1.00
|
||||||
|
|
||||||
static inline long FORMULA1(long M, long N, long IS, long BK, long T) {
|
static inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) {
|
||||||
|
|
||||||
double m = (double)(M - IS - BK);
|
double m = (double)(M - IS - BK);
|
||||||
double n = (double)(N - IS - BK);
|
double n = (double)(N - IS - BK);
|
||||||
double b = (double)BK;
|
double b = (double)BK;
|
||||||
double a = (double)T;
|
double a = (double)T;
|
||||||
|
|
||||||
return (long)((n + GETRF_FACTOR * m * b * (1. - a) / (b + m)) / a);
|
return (BLASLONG)((n + GETRF_FACTOR * m * b * (1. - a) / (b + m)) / a);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,7 +111,7 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
|
||||||
|
|
||||||
if (args -> a == NULL) {
|
if (args -> a == NULL) {
|
||||||
TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb);
|
TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb);
|
||||||
sbb = (FLOAT *)((((long)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
sbb = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
||||||
} else {
|
} else {
|
||||||
sb = (FLOAT *)args -> a;
|
sb = (FLOAT *)args -> a;
|
||||||
}
|
}
|
||||||
|
@ -221,7 +221,7 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
|
||||||
|
|
||||||
if (args -> a == NULL) {
|
if (args -> a == NULL) {
|
||||||
TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb);
|
TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb);
|
||||||
sbb = (FLOAT *)((((long)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
sbb = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
||||||
} else {
|
} else {
|
||||||
sb = (FLOAT *)args -> a;
|
sb = (FLOAT *)args -> a;
|
||||||
}
|
}
|
||||||
|
@ -448,7 +448,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
|
|
||||||
TRSM_ILTCOPY(bk, bk, a, lda, 0, sb);
|
TRSM_ILTCOPY(bk, bk, a, lda, 0, sb);
|
||||||
|
|
||||||
sbb = (FLOAT *)((((long)(sb + bk * bk * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
sbb = (FLOAT *)((((BLASULONG)(sb + bk * bk * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
||||||
|
|
||||||
is = 0;
|
is = 0;
|
||||||
num_cpu = 0;
|
num_cpu = 0;
|
||||||
|
@ -685,7 +685,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
if (width > n - init_bk) width = n - init_bk;
|
if (width > n - init_bk) width = n - init_bk;
|
||||||
|
|
||||||
if (width < init_bk) {
|
if (width < init_bk) {
|
||||||
long temp;
|
BLASLONG temp;
|
||||||
|
|
||||||
temp = FORMULA2(m, n, 0, init_bk, args -> nthreads);
|
temp = FORMULA2(m, n, 0, init_bk, args -> nthreads);
|
||||||
temp = (temp + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
|
temp = (temp + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
|
||||||
|
@ -708,7 +708,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
is = 0;
|
is = 0;
|
||||||
num_cpu = 0;
|
num_cpu = 0;
|
||||||
|
|
||||||
sbb = (FLOAT *)((((long)(sb + GEMM_PQ * GEMM_PQ * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
sbb = (FLOAT *)((((BLASULONG)(sb + GEMM_PQ * GEMM_PQ * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
||||||
|
|
||||||
while (is < mn) {
|
while (is < mn) {
|
||||||
|
|
||||||
|
|
|
@ -178,7 +178,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
sbb = (FLOAT *)((((long)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
sbb = (FLOAT *)((((BLASULONG)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
||||||
|
|
||||||
info = 0;
|
info = 0;
|
||||||
|
|
||||||
|
|
|
@ -82,7 +82,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
sbb = (FLOAT *)((((long)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
sbb = (FLOAT *)((((BLASULONG)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
||||||
|
|
||||||
info = 0;
|
info = 0;
|
||||||
|
|
||||||
|
|
|
@ -185,7 +185,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
||||||
|
|
||||||
buffer[0] = (FLOAT *)((((long)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
buffer[0] = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
||||||
for (i = 1; i < DIVIDE_RATE; i++) {
|
for (i = 1; i < DIVIDE_RATE; i++) {
|
||||||
buffer[i] = buffer[i - 1] + GEMM_Q * div_n * COMPSIZE;
|
buffer[i] = buffer[i - 1] + GEMM_Q * div_n * COMPSIZE;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue