Merge pull request #3987 from xianyi/develop
Merge from develop branch for 0.3.23
This commit is contained in:
commit
af3606d9fb
|
@ -8,7 +8,7 @@ project(OpenBLAS C ASM)
|
|||
|
||||
set(OpenBLAS_MAJOR_VERSION 0)
|
||||
set(OpenBLAS_MINOR_VERSION 3)
|
||||
set(OpenBLAS_PATCH_VERSION 22)
|
||||
set(OpenBLAS_PATCH_VERSION 22.dev)
|
||||
|
||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||
|
||||
|
|
|
@ -1,4 +1,21 @@
|
|||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.3.23
|
||||
01-Apr-2023
|
||||
|
||||
general:
|
||||
- fixed a serious regression in GETRF/GETF2 and ZGETRF/ZGETF2 where
|
||||
subnormal but nonzero data elements triggered the singularity flag
|
||||
- fixed a long-standing bug in CSPR/ZSPR in single-threaded operation
|
||||
for cases where elements of the X vector are real numbers (or
|
||||
complex with only the real part zero)
|
||||
- fixed gmake builds with the option NO_LAPACK
|
||||
- fixed a few instances in the gmake Makefiles where expressly
|
||||
setting NO_LAPACK=0 or NO_LAPACKE=0 would have the opposite effect
|
||||
|
||||
x86_64:
|
||||
- added further CPUID values for Intel Raptor Lake
|
||||
|
||||
====================================================================
|
||||
Version 0.3.22
|
||||
26-Mar-2023
|
||||
|
|
|
@ -77,7 +77,7 @@ endif
|
|||
endif
|
||||
|
||||
ifneq ($(OSNAME), AIX)
|
||||
ifndef NO_LAPACKE
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
|
||||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||
|
@ -127,7 +127,7 @@ endif
|
|||
|
||||
else
|
||||
#install on AIX has different options syntax
|
||||
ifndef NO_LAPACKE
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.3.22
|
||||
VERSION = 0.3.22.dev
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
|
|
|
@ -1547,6 +1547,8 @@ int get_cpuname(void){
|
|||
case 11: //family 6 exmodel 11
|
||||
switch (model) {
|
||||
case 7: // Raptor Lake
|
||||
case 10:
|
||||
case 15:
|
||||
if(support_avx2())
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
|
@ -2348,6 +2350,8 @@ int get_coretype(void){
|
|||
case 11:
|
||||
switch (model) {
|
||||
case 7: // Raptor Lake
|
||||
case 10:
|
||||
case 15:
|
||||
#ifndef NO_AVX2
|
||||
if(support_avx2())
|
||||
return CORE_HASWELL;
|
||||
|
|
|
@ -92,7 +92,7 @@ CBLASOBJS += \
|
|||
ctrsv_RUU.$(SUFFIX) ctrsv_RUN.$(SUFFIX) ctrsv_RLU.$(SUFFIX) ctrsv_RLN.$(SUFFIX) \
|
||||
ctrsv_CUU.$(SUFFIX) ctrsv_CUN.$(SUFFIX) ctrsv_CLU.$(SUFFIX) ctrsv_CLN.$(SUFFIX)
|
||||
|
||||
ifndef NO_LAPACK
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
CBLASOBJS += \
|
||||
cspmv_U.$(SUFFIX) cspmv_L.$(SUFFIX) \
|
||||
cspr_U.$(SUFFIX) cspr_L.$(SUFFIX) \
|
||||
|
|
|
@ -53,7 +53,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
|||
|
||||
for (i = 0; i < m; i++){
|
||||
#ifndef LOWER
|
||||
if ((X[i * 2 + 0] != ZERO) && (X[i * 2 + 1] != ZERO)) {
|
||||
if ((X[i * 2 + 0] != ZERO) || (X[i * 2 + 1] != ZERO)) {
|
||||
AXPYU_K(i + 1, 0, 0,
|
||||
alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
|
||||
alpha_i * X[i * 2 + 0] + alpha_r * X[i * 2 + 1],
|
||||
|
@ -61,7 +61,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
|||
}
|
||||
a += (i + 1) * 2;
|
||||
#else
|
||||
if ((X[i * 2 + 0] != ZERO) && (X[i * 2 + 1] != ZERO)) {
|
||||
if ((X[i * 2 + 0] != ZERO) || (X[i * 2 + 1] != ZERO)) {
|
||||
AXPYU_K(m - i, 0, 0,
|
||||
alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
|
||||
alpha_i * X[i * 2 + 0] + alpha_r * X[i * 2 + 1],
|
||||
|
|
|
@ -92,9 +92,8 @@ CBLAS2OBJS = \
|
|||
cgemv.$(SUFFIX) cgeru.$(SUFFIX) cgerc.$(SUFFIX) \
|
||||
ctrsv.$(SUFFIX) ctrmv.$(SUFFIX) \
|
||||
csyr2.$(SUFFIX) cgbmv.$(SUFFIX) \
|
||||
csbmv.$(SUFFIX) cspmv.$(SUFFIX) \
|
||||
cspr.$(SUFFIX) cspr2.$(SUFFIX) \
|
||||
csymv.$(SUFFIX) csyr.$(SUFFIX) \
|
||||
csbmv.$(SUFFIX) \
|
||||
cspr2.$(SUFFIX) \
|
||||
ctbsv.$(SUFFIX) ctbmv.$(SUFFIX) \
|
||||
ctpsv.$(SUFFIX) ctpmv.$(SUFFIX) \
|
||||
chemv.$(SUFFIX) chbmv.$(SUFFIX) \
|
||||
|
@ -122,9 +121,8 @@ ZBLAS2OBJS = \
|
|||
zgemv.$(SUFFIX) zgeru.$(SUFFIX) zgerc.$(SUFFIX) \
|
||||
ztrsv.$(SUFFIX) ztrmv.$(SUFFIX) \
|
||||
zsyr2.$(SUFFIX) zgbmv.$(SUFFIX) \
|
||||
zsbmv.$(SUFFIX) zspmv.$(SUFFIX) \
|
||||
zspr.$(SUFFIX) zspr2.$(SUFFIX) \
|
||||
zsymv.$(SUFFIX) zsyr.$(SUFFIX) \
|
||||
zsbmv.$(SUFFIX) \
|
||||
zspr2.$(SUFFIX) \
|
||||
ztbsv.$(SUFFIX) ztbmv.$(SUFFIX) \
|
||||
ztpsv.$(SUFFIX) ztpmv.$(SUFFIX) \
|
||||
zhemv.$(SUFFIX) zhbmv.$(SUFFIX) \
|
||||
|
@ -447,7 +445,8 @@ QLAPACKOBJS = \
|
|||
CLAPACKOBJS = \
|
||||
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
|
||||
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
|
||||
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX)
|
||||
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX) \
|
||||
cspr.$(SUFFIX) cspmv.$(SUFFIX) csymv.$(SUFFIX) csyr.$(SUFFIX)
|
||||
|
||||
#ZLAPACKOBJS = \
|
||||
# zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
|
||||
|
@ -458,8 +457,8 @@ CLAPACKOBJS = \
|
|||
ZLAPACKOBJS = \
|
||||
zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
|
||||
zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \
|
||||
zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) ztrtrs.$(SUFFIX)
|
||||
|
||||
zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) ztrtrs.$(SUFFIX) \
|
||||
zspr.$(SUFFIX) zspmv.$(SUFFIX) zsymv.$(SUFFIX) zsyr.$(SUFFIX)
|
||||
|
||||
XLAPACKOBJS = \
|
||||
xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \
|
||||
|
@ -1021,7 +1020,7 @@ dsymv.$(SUFFIX) dsymv.$(PSUFFIX) : symv.c
|
|||
qsymv.$(SUFFIX) qsymv.$(PSUFFIX) : symv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
ifndef NO_LAPACK
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
csymv.$(SUFFIX) csymv.$(PSUFFIX) : zsymv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
|
@ -1041,7 +1040,7 @@ dsyr.$(SUFFIX) dsyr.$(PSUFFIX) : syr.c
|
|||
qsyr.$(SUFFIX) qsyr.$(PSUFFIX) : syr.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
ifndef NO_LAPACK
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
csyr.$(SUFFIX) csyr.$(PSUFFIX) : zsyr.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
|
@ -1115,7 +1114,7 @@ dspmv.$(SUFFIX) dspmv.$(PSUFFIX) : spmv.c
|
|||
qspmv.$(SUFFIX) qspmv.$(PSUFFIX) : spmv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
ifndef NO_LAPACK
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
cspmv.$(SUFFIX) cspmv.$(PSUFFIX) : zspmv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
|
@ -1135,7 +1134,7 @@ dspr.$(SUFFIX) dspr.$(PSUFFIX) : spr.c
|
|||
qspr.$(SUFFIX) qspr.$(PSUFFIX) : spr.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
ifndef NO_LAPACK
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
cspr.$(SUFFIX) cspr.$(PSUFFIX) : zspr.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
|
|
|
@ -100,16 +100,21 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
|||
jp--;
|
||||
temp1 = *(b + jp);
|
||||
|
||||
//if (temp1 != ZERO) {
|
||||
if (temp1 != ZERO) {
|
||||
#if defined(DOUBLE)
|
||||
if (fabs(temp1) >= DBL_MIN ) {
|
||||
temp1 = dp1 / temp1;
|
||||
#else
|
||||
if (fabs(temp1) >= FLT_MIN ) {
|
||||
#endif
|
||||
temp1 = dp1 / temp1;
|
||||
|
||||
if (jp != j) {
|
||||
SWAP_K(j + 1, 0, 0, ZERO, a + j, lda, a + jp, lda, NULL, 0);
|
||||
}
|
||||
if (j + 1 < m) {
|
||||
SCAL_K(m - j - 1, 0, 0, temp1, b + j + 1, 1, NULL, 0, NULL, 0);
|
||||
}
|
||||
if (jp != j) {
|
||||
SWAP_K(j + 1, 0, 0, ZERO, a + j, lda, a + jp, lda, NULL, 0);
|
||||
}
|
||||
if (j + 1 < m) {
|
||||
SCAL_K(m - j - 1, 0, 0, temp1, b + j + 1, 1, NULL, 0, NULL, 0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!info) info = j + 1;
|
||||
}
|
||||
|
|
|
@ -106,30 +106,34 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
|||
temp1 = *(b + jp * 2 + 0);
|
||||
temp2 = *(b + jp * 2 + 1);
|
||||
|
||||
// if ((temp1 != ZERO) || (temp2 != ZERO)) {
|
||||
if ((fabs(temp1) >= DBL_MIN) && (fabs(temp2) >= DBL_MIN)) {
|
||||
|
||||
if (jp != j) {
|
||||
SWAP_K(j + 1, 0, 0, ZERO, ZERO, a + j * 2, lda,
|
||||
if ((temp1 != ZERO) || (temp2 != ZERO)) {
|
||||
#if defined(DOUBLE)
|
||||
if ((fabs(temp1) >= DBL_MIN) || (fabs(temp2) >= DBL_MIN)) {
|
||||
#else
|
||||
if ((fabs(temp1) >= FLT_MIN) || (fabs(temp2) >= FLT_MIN)) {
|
||||
#endif
|
||||
if (jp != j) {
|
||||
SWAP_K(j + 1, 0, 0, ZERO, ZERO, a + j * 2, lda,
|
||||
a + jp * 2, lda, NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (fabs(temp1) >= fabs(temp2)){
|
||||
ratio = temp2 / temp1;
|
||||
den = dp1 /(temp1 * ( 1 + ratio * ratio));
|
||||
temp3 = den;
|
||||
temp4 = -ratio * den;
|
||||
} else {
|
||||
ratio = temp1 / temp2;
|
||||
den = dp1 /(temp2 * ( 1 + ratio * ratio));
|
||||
temp3 = ratio * den;
|
||||
temp4 = -den;
|
||||
}
|
||||
if (fabs(temp1) >= fabs(temp2)){
|
||||
ratio = temp2 / temp1;
|
||||
den = dp1 /(temp1 * ( 1 + ratio * ratio));
|
||||
temp3 = den;
|
||||
temp4 = -ratio * den;
|
||||
} else {
|
||||
ratio = temp1 / temp2;
|
||||
den = dp1 /(temp2 * ( 1 + ratio * ratio));
|
||||
temp3 = ratio * den;
|
||||
temp4 = -den;
|
||||
}
|
||||
|
||||
if (j + 1 < m) {
|
||||
SCAL_K(m - j - 1, 0, 0, temp3, temp4,
|
||||
b + (j + 1) * 2, 1, NULL, 0, NULL, 0);
|
||||
}
|
||||
if (j + 1 < m) {
|
||||
SCAL_K(m - j - 1, 0, 0, temp3, temp4,
|
||||
b + (j + 1) * 2, 1, NULL, 0, NULL, 0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!info) info = j + 1;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue