Compare commits

...

38 Commits

Author SHA1 Message Date
Martin Kroeker
e32f3b1447 Restore -march flag for Android builds
fixes #2419 - renewed discussion in #2112 suggests removal of the option was primarily aimed at non-Android builds
2020-02-16 17:32:13 +01:00
Martin Kroeker
5e94aa4877 Merge pull request #2417 from marxin/make-ctest-verbose-for-drone
Make ctest verbose for drone
2020-02-15 21:57:41 +01:00
Martin Kroeker
93f3e27574 Merge pull request #2415 from marxin/add-cmake-to-gitignore
Add CMake related files to .gitignore.
2020-02-15 21:57:03 +01:00
Martin Kroeker
785c389b0e Merge pull request #2420 from martin-frbg/issue2396
Correct generation of GETRF files by the CMAKE build
2020-02-15 21:56:16 +01:00
Martin Kroeker
c222b25b81 Correct generation of GETRF files by the CMAKE build
fixes #2396
2020-02-15 19:29:14 +01:00
Martin Kroeker
221da8bf05 Merge pull request #2411 from martin-frbg/fix2254-038
Fix pre-processed POWER8 codes and wrong conditionals in the POWER8,PPC440 and PPC970 KERNEL files
2020-02-14 23:07:43 +01:00
Martin Liska
eb285b4d20 Make ctest verbose for drone builder. 2020-02-14 10:46:37 +01:00
Martin Kroeker
cafdd999b8 Update caxpy_power8.S 2020-02-13 22:44:09 +01:00
Martin Kroeker
92ca92a46c Update caxpy_power8.S 2020-02-13 21:24:54 +01:00
Martin Kroeker
486c35c5dc Update icamin_power8.S 2020-02-13 18:38:43 +01:00
Martin Liska
0e05ea9bac Add CMake related files to .gitignore. 2020-02-13 14:51:55 +01:00
Martin Kroeker
5ba3699f41 Update isamin_power8.S 2020-02-13 00:00:32 +01:00
Martin Kroeker
8eefa530cd Update isamax_power8.S 2020-02-12 23:59:50 +01:00
Martin Kroeker
de40d47edf Update isamin_power8.S 2020-02-12 23:57:48 +01:00
Martin Kroeker
7c162b8a21 Update isamax_power8.S 2020-02-12 23:56:57 +01:00
Martin Kroeker
0544cbc806 Fix syntax of endianness conditional 2020-02-12 20:00:29 +01:00
Martin Kroeker
120d20731f Fix syntax of endianness conditional 2020-02-12 19:58:42 +01:00
Martin Kroeker
dc345d84df Fix syntax of endianness conditional and add gcc version check for workaround 2020-02-12 19:56:52 +01:00
Martin Kroeker
616921fd91 Merge pull request #27 from xianyi/develop
rebase
2020-02-12 19:16:14 +01:00
Martin Kroeker
8a9e9a82a1 Merge pull request #2410 from bartoldeman/fix-dscal-inline-asm
Fix inline asm in dscal: mark x, x1 as clobbered. Fixes #2408
2020-02-12 15:38:37 +01:00
Bart Oldeman
7ea5e07d1c Fix inline asm in dscal: mark x, x1 as clobbered. Fixes #2408
The leaq instructions in dscal_kernel_inc_8 modify x and x1 so they
must be declared as input/output constraints, otherwise the compiler
may assume the corresponding registers are not modified.
2020-02-12 14:11:44 +00:00
Martin Kroeker
cb6ef49857 Merge pull request #2407 from susilehtola/patch-2
Patch out instances of Z15 in dynamic_zarch.c
2020-02-11 13:04:44 +01:00
Martin Kroeker
63994e1cdb Merge pull request #2405 from susilehtola/patch-1
Fix typo in dynamic_zarch.c
2020-02-11 13:03:35 +01:00
Martin Kroeker
496e3019bc Merge pull request #2404 from martin-frbg/issue2395
Fix spurious application of USE_TRMM in cmake builds
2020-02-11 13:00:36 +01:00
Martin Kroeker
169be3f097 Merge pull request #2403 from martin-frbg/issue2400
Fix coretype identification of Intel Cannon Lake, Ice Lake and Goldmont
2020-02-11 13:00:16 +01:00
Martin Kroeker
6ccbb089c2 Merge pull request #2402 from gxw-loongson/develop
Avoid printing the following information on mips and mips64 when check msa
2020-02-11 12:59:53 +01:00
Martin Kroeker
59ebe3636a Merge pull request #2399 from martin-frbg/buffersize
Make BUFFER_SIZE configurable at build time
2020-02-11 12:56:56 +01:00
Susi Lehtola
5a6bba3061 Patch out instances of Z15 in dynamic_zarch.c
There does not appear to be a Z15 kernel yet, causing link errors from the code. This patch fixes the issue.
2020-02-11 15:07:33 +13:00
Susi Lehtola
dff173e50e Fix typo in dynamic_zarch.c 2020-02-11 14:46:30 +13:00
Martin Kroeker
7e5cbb6f35 Fix bad conditional syntax that caused spurious application of USE_TRMM 2020-02-10 21:17:39 +01:00
Martin Kroeker
303bdb673b Fix coretype detection for Intel extended models 6 and 7
affecting Goldmont, Cannon Lake, Ice Lake autodetection
2020-02-10 19:17:32 +01:00
gxw
754433f420 Avoid printing the following information on mips and mips64 when check msa:
"unrecognized command line option ‘-mmsa’"
2020-02-10 19:11:45 +08:00
Martin Kroeker
7f0d523b42 Make BUFFER_SIZE configurable 2020-02-09 23:32:57 +01:00
Martin Kroeker
c353d8b106 Make BUFFER_SIZE configurable 2020-02-09 23:30:22 +01:00
Martin Kroeker
579be3aa9d Add configuration option for BUFFER_SIZE 2020-02-09 23:28:04 +01:00
Martin Kroeker
449e8ea443 Merge pull request #26 from xianyi/develop
rebase
2020-02-09 23:23:55 +01:00
Martin Kroeker
3bec250cf9 Increment version to 0.3.9.dev 2020-02-09 23:18:44 +01:00
Martin Kroeker
f03dd23e90 Increment version to 0.3.9.dev 2020-02-09 23:18:07 +01:00
20 changed files with 133 additions and 36 deletions

View File

@@ -92,7 +92,7 @@ steps:
- mkdir build && cd build
- cmake $CMAKE_FLAGS ..
- make -j
- ctest
- ctest -V
---
kind: pipeline
@@ -116,7 +116,7 @@ steps:
- mkdir build && cd build
- cmake $CMAKE_FLAGS ..
- make -j
- ctest
- ctest -V
---
kind: pipeline
@@ -140,4 +140,4 @@ steps:
- mkdir build && cd build
- cmake $CMAKE_FLAGS ..
- make -j
- ctest
- ctest -V

3
.gitignore vendored
View File

@@ -87,4 +87,5 @@ build.*
*.swp
benchmark/*.goto
benchmark/smallscaling
CMakeCache.txt
CMakeFiles/*

View File

@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
project(OpenBLAS C ASM)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 8)
set(OpenBLAS_PATCH_VERSION 9.dev)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
# Adhere to GNU filesystem layout conventions

View File

@@ -1,7 +1,7 @@
ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15))
ifeq ($(OSNAME), Android)
CCOMMON_OPT += -mfpu=neon
FCOMMON_OPT += -mfpu=neon
CCOMMON_OPT += -mfpu=neon -march=armv7-a
FCOMMON_OPT += -mfpu=neon -march=armv7-a
else
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a

View File

@@ -3,7 +3,7 @@
#
# This library's version
VERSION = 0.3.8
VERSION = 0.3.9.dev
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@@ -97,6 +97,15 @@ VERSION = 0.3.8
# they need to wait for the preceding API calls to finish or risk data corruption.
# NUM_PARALLEL = 2
# When multithreading, OpenBLAS needs to use a memory buffer for communicating
# and collating results for individual subranges of the original matrix. Since
# the original GotoBLAS of the early 2000s, the default size of this buffer has
# been set at a value of 32<<20 (which is 32MB) on x86_64 , twice that on PPC.
# If you expect to handle large problem sizes (beyond about 30000x30000) uncomment
# this line and adjust the (32<<n) factor if necessary. Usually an insufficient value
# manifests itself as a crash in the relevant scal kernel (sscal_k, dscal_k etc)
# BUFFERSIZE = 25
# If you don't need to install the static library, please comment this in.
# NO_STATIC = 1

View File

@@ -195,7 +195,7 @@ if (($architecture eq "mips") || ($architecture eq "mips64")) {
print $tmpf "void main(void){ __asm__ volatile($code); }\n";
$args = "$msa_flags -o $tmpf.o $tmpf";
my @cmd = ("$compiler_name $args");
my @cmd = ("$compiler_name $args >/dev/null 2>/dev/null");
system(@cmd) == 0;
if ($? != 0) {
$have_msa = 0;

View File

@@ -289,6 +289,10 @@ set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}")
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_PARALLEL_NUMBER=${NUM_PARALLEL}")
if (BUFFERSIZE)
set(CCOMMON_OPT "${CCOMMON_OPT} -DBUFFERSIZE=${BUFFERSIZE}")
endif ()
if (USE_SIMPLE_THREADED_LEVEL3)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
endif ()

View File

@@ -225,7 +225,11 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
#endif
#define HUGE_PAGESIZE ( 2 << 20)
#ifndef BUFFERSIZE
#define BUFFER_SIZE (32 << 20)
#else
#define BUFFER_SIZE (32 << BUFFERSIZE)
#endif
#define SEEK_ADDRESS

View File

@@ -2006,6 +2006,38 @@ int get_coretype(void){
return CORE_NEHALEM;
}
break;
case 6:
if (model == 6)
#ifndef NO_AVX512
return CORE_SKYLAKEX;
#else
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
#endif
break;
case 7:
if (model == 10)
return CORE_NEHALEM;
if (model == 14)
#ifndef NO_AVX512
return CORE_SKYLAKEX;
#else
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
#endif
break;
case 9:
case 8:
if (model == 14) { // Kaby Lake

View File

@@ -3,12 +3,12 @@
extern gotoblas_t gotoblas_Z13;
extern gotoblas_t gotoblas_Z14;
extern gotoblas_t gotoblas_Z15;
//extern gotoblas_t gotoblas_Z15;
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
//extern gotoblas_t gotoblas_Z14;
//#endif
#define NUM_CORETYPES 5
#define NUM_CORETYPES 4
extern void openblas_warning(int verbose, const char* msg);
@@ -16,14 +16,14 @@ static char* corename[] = {
"unknown",
"Z13",
"Z14",
"Z15",
// "Z15",
"ZARCH_GENERIC",
};
char* gotoblas_corename(void) {
if (gotoblas == &gotoblas_Z13) return corename[1];
if (gotoblas == &gotoblas_Z14) return corename[2];
if (gotoblas == &gotoblas_Z15) return corename[3];
// if (gotoblas == &gotoblas_Z15) return corename[3];
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
// if (gotoblas == &gotoblas_POWER9) return corename[3];
//#endif
@@ -31,7 +31,7 @@ char* gotoblas_corename(void) {
}
// __builtin_cpu_is is not supported by zarch
static gotolabs_t* get_coretype(void) {
static gotoblas_t* get_coretype(void) {
FILE* infile;
char buffer[512], * p;
@@ -78,7 +78,7 @@ static gotoblas_t* force_coretype(char* coretype) {
{
case 1: return (&gotoblas_Z13);
case 2: return (&gotoblas_Z14);
case 3: return (&gotoblas_Z15);
// case 3: return (&gotoblas_Z15);
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
// case 3: return (&gotoblas_POWER9);
//#endif

View File

@@ -121,8 +121,10 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
# Makefile.L3
set(USE_TRMM false)
if (ARM OR ARM64 OR "${TARGET_CORE}" STREQUAL "LONGSOON3B" OR "${TARGET_CORE}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET_CORE}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "${CORE}" STREQUAL "zen" OR "${TARGET_CORE}" STREQUAL "SKYLAKEX" OR "${CORE}" STREQUAL "skylakex")
if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) )
set(USE_TRMM true)
endif ()
if (ZARCH OR (TARGET_CORE MATCHES POWER8) OR (TARGET_CORE MATCHES POWER9))
set(USE_TRMM true)
endif ()

View File

@@ -89,30 +89,52 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
#SMINKERNEL = ../arm/min.c
#DMINKERNEL = ../arm/min.c
#
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
ifneq ($(GCCVERSIONGTEQ9),1)
ISAMAXKERNEL = isamax_power8.S
else
ISAMAXKERNEL = isamax.c
endif
else
ISAMAXKERNEL = isamax.c
endif
#
IDAMAXKERNEL = idamax.c
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
#
ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
ifneq ($(GCCVERSIONGTEQ9),1)
ICAMAXKERNEL = icamax_power8.S
else
ICAMAXKERNEL = icamax.c
endif
else
ICAMAXKERNEL = icamax.c
endif
#
IZAMAXKERNEL = izamax.c
#
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
ifneq ($(GCCVERSIONGTEQ9),1)
ISAMINKERNEL = isamin_power8.S
else
ISAMINKERNEL = isamin.c
endif
else
ISAMINKERNEL = isamin.c
endif
#
IDAMINKERNEL = idamin.c
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
#
ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
ifneq ($(GCCVERSIONGTEQ9),1)
ICAMINKERNEL = icamin_power8.S
else
ICAMINKERNEL = icamin.c
endif
else
ICAMINKERNEL = icamin.c
endif
#
IZAMINKERNEL = izamin.c
#
#ISMAXKERNEL = ../arm/imax.c
@@ -128,11 +150,16 @@ ZASUMKERNEL = zasum.c
#
SAXPYKERNEL = saxpy.c
DAXPYKERNEL = daxpy.c
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
ifneq ($(GCCVERSIONGTEQ9),1)
CAXPYKERNEL = caxpy_power8.S
else
CAXPYKERNEL = caxpy.c
endif
else
CAXPYKERNEL = caxpy.c
endif
#
ZAXPYKERNEL = zaxpy.c
#
SCOPYKERNEL = scopy.c

View File

@@ -15,7 +15,7 @@ ZASUMKERNEL = zasum_ppc440.S
SAXPYKERNEL = axpy_ppc440.S
DAXPYKERNEL = axpy_ppc440.S
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
CAXPYKERNEL = ../arm/zaxpy.c
ZAXPYKERNEL = ../arm/zaxpy.c
else
@@ -25,7 +25,7 @@ endif
SDOTKERNEL = dot_ppc440.S
DDOTKERNEL = dot_ppc440.S
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
CDOTKERNEL = zdot_ppc440.S
ZDOTKERNEL = zdot_ppc440.S
else
@@ -62,7 +62,7 @@ ZNRM2KERNEL = znrm2_ppc440.S
SROTKERNEL = rot_ppc440.S
DROTKERNEL = rot_ppc440.S
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
ifneq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
CROTKERNEL = zrot_ppc440.S
ZROTKERNEL = zrot_ppc440.S
else
@@ -132,7 +132,7 @@ ZTRSMKERNEL_LT = ztrsm_kernel_ppc440_LT.S
ZTRSMKERNEL_RN = ztrsm_kernel_ppc440_LT.S
ZTRSMKERNEL_RT = ztrsm_kernel_ppc440_RT.S
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
ifeq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
SGEMVNKERNEL = ../arm/gemv_n.c
DGEMVNKERNEL = ../arm/gemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c

View File

@@ -1,4 +1,4 @@
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
ifeq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
SGEMMKERNEL = gemm_kernel.S
SGEMMINCOPY =
SGEMMITCOPY =
@@ -30,7 +30,7 @@ DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
ifeq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
CGEMMKERNEL = zgemm_kernel.S
CGEMMINCOPY =
CGEMMITCOPY =
@@ -72,7 +72,7 @@ ZTRSMKERNEL_LT = ztrsm_kernel_LT.S
ZTRSMKERNEL_RN = ztrsm_kernel_LT.S
ZTRSMKERNEL_RT = ztrsm_kernel_RT.S
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
ifeq ($(__BYTE_ORDER__),"__ORDER_BIG_ENDIAN__")
STRSMKERNEL_LN = trsm_kernel_LN.S
STRSMKERNEL_LT = trsm_kernel_LT.S
STRSMKERNEL_RN = trsm_kernel_LT.S

View File

@@ -12,6 +12,13 @@
PROLOGUE
#if _CALL_ELF ==2
#ifdef CONJ
caxpyc_k:
#else
caxpy_k:
#endif
#endif
.LCF0:
0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l

View File

@@ -10,7 +10,9 @@
#include "common.h"
PROLOGUE
#if _CALL_ELF ==2
icamin_k:
#endif
.LCF0:
0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l

View File

@@ -11,7 +11,10 @@
#include "common.h"
PROLOGUE
#if _CALL_ELF == 2
isamax_k:
#endif
.LCF0:
0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l

View File

@@ -11,6 +11,9 @@
PROLOGUE
#if _CALL_ELF ==2
isamin_k:
#endif
.LCF0:
0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l

View File

@@ -136,10 +136,10 @@ static void dscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_
"jnz 1b \n\t"
:
"+r" (n) // 0
"+r" (n), // 0
"+r" (x), // 1
"+r" (x1) // 2
:
"r" (x), // 1
"r" (x1), // 2
"r" (alpha), // 3
"r" (inc_x), // 4
"r" (inc_x3) // 5

View File

@@ -4,7 +4,6 @@ include_directories(${PROJECT_BINARY_DIR})
set(LAPACK_SOURCES
getrf/getrf_single.c
potrf/potrf_U_single.c
potrf/potrf_L_single.c
lauum/lauum_U_single.c
@@ -45,6 +44,10 @@ GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" false "" "" false 3)
GenerateNamedObjects("laswp/generic/laswp_k_4.c" "" "laswp_plus" false "" "" false 3)
GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3)
foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("getrf/getrf_single.c" "UNIT" "getrf_single" false "" "" false ${float_type})
endforeach ()
# dynamic_arch laswp needs arch specific code ?
#foreach(TARGET_CORE ${DYNAMIC_CORE})
# set(TSUFFIX "_${TARGET_CORE}")
@@ -81,7 +84,7 @@ if (USE_THREAD)
)
foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("${GETRF_SRC}" "" "getrf_parallel" false "" "" false ${float_type})
GenerateNamedObjects("${GETRF_SRC}" "UNIT" "getrf_parallel" false "" "" false ${float_type})
endforeach()
GenerateNamedObjects("${PARALLEL_SOURCES}")