commit
4f43668eec
|
@ -48,6 +48,7 @@ ifndef NO_CBLAS
|
||||||
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
|
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifneq ($(OSNAME), AIX)
|
||||||
ifndef NO_LAPACKE
|
ifndef NO_LAPACKE
|
||||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||||
|
@ -72,6 +73,7 @@ ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
|
||||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
|
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
|
||||||
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||||
|
@ -93,6 +95,33 @@ ifeq ($(OSNAME), CYGWIN_NT)
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
else
|
||||||
|
#install on AIX has different options syntax
|
||||||
|
ifndef NO_LAPACKE
|
||||||
|
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||||
|
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||||
|
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
|
||||||
|
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
|
||||||
|
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
|
||||||
|
endif
|
||||||
|
|
||||||
|
#for install static library
|
||||||
|
ifndef NO_STATIC
|
||||||
|
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||||
|
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||||
|
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||||
|
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||||
|
endif
|
||||||
|
#for install shared library
|
||||||
|
ifndef NO_SHARED
|
||||||
|
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||||
|
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||||
|
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||||
|
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||||
|
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||||
|
endif
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
#Generating openblas.pc
|
#Generating openblas.pc
|
||||||
@echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
@echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
||||||
|
|
|
@ -192,8 +192,8 @@ NO_AFFINITY = 1
|
||||||
# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT
|
# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT
|
||||||
# COMMON_OPT = -O2
|
# COMMON_OPT = -O2
|
||||||
|
|
||||||
# gfortran option for LAPACK
|
# gfortran option for LAPACK to improve thread-safety
|
||||||
# enable this flag only on 64bit Linux and if you need a thread safe lapack library
|
# It is enabled by default in Makefile.system for gfortran
|
||||||
# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT
|
# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT
|
||||||
# FCOMMON_OPT = -frecursive
|
# FCOMMON_OPT = -frecursive
|
||||||
|
|
||||||
|
|
|
@ -725,6 +725,8 @@ endif
|
||||||
ifeq ($(F_COMPILER), GFORTRAN)
|
ifeq ($(F_COMPILER), GFORTRAN)
|
||||||
CCOMMON_OPT += -DF_INTERFACE_GFORT
|
CCOMMON_OPT += -DF_INTERFACE_GFORT
|
||||||
FCOMMON_OPT += -Wall
|
FCOMMON_OPT += -Wall
|
||||||
|
# make single-threaded LAPACK calls thread-safe #1847
|
||||||
|
FCOMMON_OPT += -frecursive
|
||||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||||
ifneq ($(NO_LAPACK), 1)
|
ifneq ($(NO_LAPACK), 1)
|
||||||
EXTRALIB += -lgfortran
|
EXTRALIB += -lgfortran
|
||||||
|
@ -1211,7 +1213,11 @@ endif
|
||||||
|
|
||||||
LIBDLLNAME = $(LIBPREFIX).dll
|
LIBDLLNAME = $(LIBPREFIX).dll
|
||||||
IMPLIBNAME = lib$(LIBNAMEBASE).dll.a
|
IMPLIBNAME = lib$(LIBNAMEBASE).dll.a
|
||||||
|
ifneq ($(OSNAME), AIX)
|
||||||
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so)
|
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so)
|
||||||
|
else
|
||||||
|
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.a)
|
||||||
|
endif
|
||||||
LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
|
LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
|
||||||
LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def)
|
LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def)
|
||||||
LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp)
|
LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp)
|
||||||
|
|
|
@ -44,7 +44,7 @@ endif ()
|
||||||
|
|
||||||
if (${F_COMPILER} STREQUAL "GFORTRAN")
|
if (${F_COMPILER} STREQUAL "GFORTRAN")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
|
||||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
|
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall -frecursive")
|
||||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||||
if (NOT NO_LAPACK)
|
if (NOT NO_LAPACK)
|
||||||
set(EXTRALIB "{EXTRALIB} -lgfortran")
|
set(EXTRALIB "{EXTRALIB} -lgfortran")
|
||||||
|
|
2
common.h
2
common.h
|
@ -183,7 +183,7 @@ extern "C" {
|
||||||
|
|
||||||
#define ALLOCA_ALIGN 63UL
|
#define ALLOCA_ALIGN 63UL
|
||||||
|
|
||||||
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER)
|
#define NUM_BUFFERS MAX(50,(MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER))
|
||||||
|
|
||||||
#ifdef NEEDBUNDERSCORE
|
#ifdef NEEDBUNDERSCORE
|
||||||
#define BLASFUNC(FUNC) FUNC##_
|
#define BLASFUNC(FUNC) FUNC##_
|
||||||
|
|
|
@ -115,8 +115,8 @@ int detect(void)
|
||||||
fclose(infile);
|
fclose(infile);
|
||||||
if(cpu_part != NULL && cpu_implementer != NULL) {
|
if(cpu_part != NULL && cpu_implementer != NULL) {
|
||||||
if (strstr(cpu_implementer, "0x41") &&
|
if (strstr(cpu_implementer, "0x41") &&
|
||||||
(strstr(cpu_part, "0xd07") || strstr(cpu_part,"0xd08") || strstr(cpu_part,"0xd03") ))
|
(strstr(cpu_part, "0xd07") || strstr(cpu_part,"0xd08")))
|
||||||
return CPU_CORTEXA57; //or compatible A53, A72
|
return CPU_CORTEXA57; //or compatible, ex. A72
|
||||||
else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42"))
|
else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42"))
|
||||||
return CPU_VULCAN;
|
return CPU_VULCAN;
|
||||||
else if (strstr(cpu_part, "0x0a1") && strstr(cpu_implementer, "0x43"))
|
else if (strstr(cpu_part, "0x0a1") && strstr(cpu_implementer, "0x43"))
|
||||||
|
|
|
@ -2009,6 +2009,8 @@ int get_coretype(void){
|
||||||
switch (model) {
|
switch (model) {
|
||||||
case 1:
|
case 1:
|
||||||
// AMD Ryzen
|
// AMD Ryzen
|
||||||
|
case 8:
|
||||||
|
// Ryzen 2
|
||||||
if(support_avx())
|
if(support_avx())
|
||||||
#ifndef NO_AVX2
|
#ifndef NO_AVX2
|
||||||
return CORE_ZEN;
|
return CORE_ZEN;
|
||||||
|
|
|
@ -48,6 +48,10 @@
|
||||||
#define SWITCH_RATIO 2
|
#define SWITCH_RATIO 2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef GEMM_PREFERED_SIZE
|
||||||
|
#define GEMM_PREFERED_SIZE 1
|
||||||
|
#endif
|
||||||
|
|
||||||
//The array of job_t may overflow the stack.
|
//The array of job_t may overflow the stack.
|
||||||
//Instead, use malloc to alloc job_t.
|
//Instead, use malloc to alloc job_t.
|
||||||
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
|
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
|
||||||
|
@ -510,6 +514,16 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int round_up(int remainder, int width, int multiple)
|
||||||
|
{
|
||||||
|
if (multiple > remainder || width <= multiple)
|
||||||
|
return width;
|
||||||
|
width = (width + multiple - 1) / multiple;
|
||||||
|
width = width * multiple;
|
||||||
|
return width;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||||
*range_n, FLOAT *sa, FLOAT *sb,
|
*range_n, FLOAT *sa, FLOAT *sb,
|
||||||
BLASLONG nthreads_m, BLASLONG nthreads_n) {
|
BLASLONG nthreads_m, BLASLONG nthreads_n) {
|
||||||
|
@ -601,9 +615,14 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||||
num_parts = 0;
|
num_parts = 0;
|
||||||
while (m > 0){
|
while (m > 0){
|
||||||
width = blas_quickdivide(m + nthreads_m - num_parts - 1, nthreads_m - num_parts);
|
width = blas_quickdivide(m + nthreads_m - num_parts - 1, nthreads_m - num_parts);
|
||||||
|
|
||||||
|
width = round_up(m, width, GEMM_PREFERED_SIZE);
|
||||||
|
|
||||||
m -= width;
|
m -= width;
|
||||||
|
|
||||||
if (m < 0) width = width + m;
|
if (m < 0) width = width + m;
|
||||||
range_M[num_parts + 1] = range_M[num_parts] + width;
|
range_M[num_parts + 1] = range_M[num_parts] + width;
|
||||||
|
|
||||||
num_parts ++;
|
num_parts ++;
|
||||||
}
|
}
|
||||||
for (i = num_parts; i < MAX_CPU_NUMBER; i++) {
|
for (i = num_parts; i < MAX_CPU_NUMBER; i++) {
|
||||||
|
@ -645,9 +664,12 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||||
if (width < SWITCH_RATIO) {
|
if (width < SWITCH_RATIO) {
|
||||||
width = SWITCH_RATIO;
|
width = SWITCH_RATIO;
|
||||||
}
|
}
|
||||||
|
width = round_up(n, width, GEMM_PREFERED_SIZE);
|
||||||
|
|
||||||
n -= width;
|
n -= width;
|
||||||
if (n < 0) width = width + n;
|
if (n < 0) width = width + n;
|
||||||
range_N[num_parts + 1] = range_N[num_parts] + width;
|
range_N[num_parts + 1] = range_N[num_parts] + width;
|
||||||
|
|
||||||
num_parts ++;
|
num_parts ++;
|
||||||
}
|
}
|
||||||
for (j = num_parts; j < MAX_CPU_NUMBER; j++) {
|
for (j = num_parts; j < MAX_CPU_NUMBER; j++) {
|
||||||
|
|
|
@ -850,6 +850,11 @@ void goto_set_num_threads(int num_threads) {
|
||||||
|
|
||||||
long i;
|
long i;
|
||||||
|
|
||||||
|
#ifdef SMP_SERVER
|
||||||
|
// Handle lazy re-init of the thread-pool after a POSIX fork
|
||||||
|
if (unlikely(blas_server_avail == 0)) blas_thread_init();
|
||||||
|
#endif
|
||||||
|
|
||||||
if (num_threads < 1) num_threads = blas_num_threads;
|
if (num_threads < 1) num_threads = blas_num_threads;
|
||||||
|
|
||||||
#ifndef NO_AFFINITY
|
#ifndef NO_AFFINITY
|
||||||
|
|
|
@ -478,7 +478,12 @@ int BLASFUNC(blas_thread_shutdown)(void){
|
||||||
|
|
||||||
void goto_set_num_threads(int num_threads)
|
void goto_set_num_threads(int num_threads)
|
||||||
{
|
{
|
||||||
long i;
|
long i;
|
||||||
|
|
||||||
|
#if defined(SMP_SERVER) && defined(OS_CYGWIN_NT)
|
||||||
|
// Handle lazy re-init of the thread-pool after a POSIX fork
|
||||||
|
if (unlikely(blas_server_avail == 0)) blas_thread_init();
|
||||||
|
#endif
|
||||||
|
|
||||||
if (num_threads < 1) num_threads = blas_cpu_number;
|
if (num_threads < 1) num_threads = blas_cpu_number;
|
||||||
|
|
||||||
|
|
|
@ -259,6 +259,16 @@ int get_num_procs(void) {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef OS_AIX
|
||||||
|
int get_num_procs(void) {
|
||||||
|
static int nums = 0;
|
||||||
|
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
|
||||||
|
return nums;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef OS_WINDOWS
|
#ifdef OS_WINDOWS
|
||||||
|
|
||||||
int get_num_procs(void) {
|
int get_num_procs(void) {
|
||||||
|
@ -1738,6 +1748,22 @@ int get_num_procs(void) {
|
||||||
return nums;
|
return nums;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef OS_HAIKU
|
||||||
|
int get_num_procs(void) {
|
||||||
|
static int nums = 0;
|
||||||
|
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
|
||||||
|
return nums;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef OS_AIX
|
||||||
|
int get_num_procs(void) {
|
||||||
|
static int nums = 0;
|
||||||
|
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
|
||||||
|
return nums;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef OS_WINDOWS
|
#ifdef OS_WINDOWS
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,14 @@
|
||||||
#include "functable.h"
|
#include "functable.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8)
|
||||||
|
// Multithreaded swap gives performance benefits in ThunderX2T99
|
||||||
|
#else
|
||||||
|
// Disable multi-threading as it does not show any performance
|
||||||
|
// benefits. Keep the multi-threading code for the record.
|
||||||
|
#undef SMP
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef CBLAS
|
#ifndef CBLAS
|
||||||
|
|
||||||
void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){
|
void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){
|
||||||
|
@ -81,7 +89,7 @@ FLOAT *y = (FLOAT*)vy;
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
//disable multi-thread when incx==0 or incy==0
|
//disable multi-thread when incx==0 or incy==0
|
||||||
//In that case, the threads would be dependent.
|
//In that case, the threads would be dependent.
|
||||||
if (incx == 0 || incy == 0)
|
if (incx == 0 || incy == 0 || n < 1048576 * GEMM_MULTITHREAD_THRESHOLD / sizeof(FLOAT))
|
||||||
nthreads = 1;
|
nthreads = 1;
|
||||||
else
|
else
|
||||||
nthreads = num_cpu_avail(1);
|
nthreads = num_cpu_avail(1);
|
||||||
|
|
|
@ -55,6 +55,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (m == 0 || n == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
c_offset = c;
|
c_offset = c;
|
||||||
|
|
||||||
|
@ -69,7 +71,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
|
||||||
|
|
||||||
i = m;
|
i = m;
|
||||||
|
|
||||||
while (i > 32) {
|
while (i >= 32) {
|
||||||
_mm512_storeu_pd(c_offset1, z_zero);
|
_mm512_storeu_pd(c_offset1, z_zero);
|
||||||
_mm512_storeu_pd(c_offset1 + 8, z_zero);
|
_mm512_storeu_pd(c_offset1 + 8, z_zero);
|
||||||
_mm512_storeu_pd(c_offset1 + 16, z_zero);
|
_mm512_storeu_pd(c_offset1 + 16, z_zero);
|
||||||
|
@ -77,7 +79,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
|
||||||
c_offset1 += 32;
|
c_offset1 += 32;
|
||||||
i -= 32;
|
i -= 32;
|
||||||
}
|
}
|
||||||
while (i > 8) {
|
while (i >= 8) {
|
||||||
_mm512_storeu_pd(c_offset1, z_zero);
|
_mm512_storeu_pd(c_offset1, z_zero);
|
||||||
c_offset1 += 8;
|
c_offset1 += 8;
|
||||||
i -= 8;
|
i -= 8;
|
||||||
|
|
|
@ -55,6 +55,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (n == 0 || m == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
c_offset = c;
|
c_offset = c;
|
||||||
|
|
||||||
|
@ -71,13 +73,13 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
|
||||||
|
|
||||||
i = m;
|
i = m;
|
||||||
|
|
||||||
while (i > 32) {
|
while (i >= 32) {
|
||||||
_mm512_storeu_ps(c_offset1, z_zero);
|
_mm512_storeu_ps(c_offset1, z_zero);
|
||||||
_mm512_storeu_ps(c_offset1 + 16, z_zero);
|
_mm512_storeu_ps(c_offset1 + 16, z_zero);
|
||||||
c_offset1 += 32;
|
c_offset1 += 32;
|
||||||
i -= 32;
|
i -= 32;
|
||||||
}
|
}
|
||||||
while (i > 8) {
|
while (i >= 8) {
|
||||||
_mm256_storeu_ps(c_offset1, y_zero);
|
_mm256_storeu_ps(c_offset1, y_zero);
|
||||||
c_offset1 += 8;
|
c_offset1 += 8;
|
||||||
i -= 8;
|
i -= 8;
|
||||||
|
|
|
@ -34,6 +34,13 @@
|
||||||
#ifndef _LAPACKE_CONFIG_H_
|
#ifndef _LAPACKE_CONFIG_H_
|
||||||
#define _LAPACKE_CONFIG_H_
|
#define _LAPACKE_CONFIG_H_
|
||||||
|
|
||||||
|
// For Android prior to API 21 (no <complex> include)
|
||||||
|
#if defined(__ANDROID__)
|
||||||
|
#if __ANDROID_API__ < 21
|
||||||
|
#define LAPACK_COMPLEX_STRUCTURE
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
#if defined(LAPACK_COMPLEX_CPP)
|
#if defined(LAPACK_COMPLEX_CPP)
|
||||||
#include <complex>
|
#include <complex>
|
||||||
|
|
Loading…
Reference in New Issue