Merge pull request #411 from wernsaar/develop

Lapack-test on x86 32bit now runs without errors.
This commit is contained in:
Zhang Xianyi 2014-07-10 22:38:15 +08:00
commit 7961404a40
3 changed files with 58 additions and 297 deletions

View File

@ -356,25 +356,25 @@ ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
#SLAPACKOBJS = \
# sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
# spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
# slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \
# sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
# spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \
# slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX)
SLAPACKOBJS = \
sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \
slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX)
slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX)
#DLAPACKOBJS = \
# dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
# dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
# dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \
# dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \
# dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \
# dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX)
DLAPACKOBJS = \
dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \
dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \
dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX)
dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX)
QLAPACKOBJS = \
@ -382,28 +382,29 @@ QLAPACKOBJS = \
qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \
qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \
#CLAPACKOBJS = \
# cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
# cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
# claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \
# cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
# cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
# clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX)
CLAPACKOBJS = \
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX)
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX)
#ZLAPACKOBJS = \
# zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
# zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
# zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \
# zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
# zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \
# zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX)
ZLAPACKOBJS = \
zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \
zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX)
zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX)
XLAPACKOBJS = \

View File

@ -1,263 +1,3 @@
GEMVDEP = ../l2param.h
ifdef HAVE_SSE
ifndef SAMAXKERNEL
SAMAXKERNEL = amax_sse.S
endif
ifndef CAMAXKERNEL
CAMAXKERNEL = zamax_sse.S
endif
ifndef SAMINKERNEL
SAMINKERNEL = amax_sse.S
endif
ifndef CAMINKERNEL
CAMINKERNEL = zamax_sse.S
endif
ifndef ISAMAXKERNEL
ISAMAXKERNEL = iamax_sse.S
endif
ifndef ICAMAXKERNEL
ICAMAXKERNEL = izamax_sse.S
endif
ifndef ISAMINKERNEL
ISAMINKERNEL = iamax_sse.S
endif
ifndef ICAMINKERNEL
ICAMINKERNEL = izamax_sse.S
endif
ifndef ISMAXKERNEL
ISMAXKERNEL = iamax_sse.S
endif
ifndef ISMINKERNEL
ISMINKERNEL = iamax_sse.S
endif
ifndef SMAXKERNEL
SMAXKERNEL = amax_sse.S
endif
ifndef SMINKERNEL
SMINKERNEL = amax_sse.S
endif
ifndef SASUMKERNEL
SASUMKERNEL = asum_sse.S
endif
ifndef CASUMKERNEL
CASUMKERNEL = zasum_sse.S
endif
ifndef SDOTKERNEL
SDOTKERNEL = ../arm/dot.c
endif
ifndef CDOTKERNEL
CDOTKERNEL = zdot_sse.S
endif
ifndef SCOPYKERNEL
SCOPYKERNEL = copy_sse.S
endif
ifndef CCOPYKERNEL
CCOPYKERNEL = zcopy_sse.S
endif
ifndef SSACALKERNEL
SSCALKERNEL = scal_sse.S
endif
ifndef CSACALKERNEL
CSCALKERNEL = zscal_sse.S
endif
ifndef SAXPYKERNEL
SAXPYKERNEL = axpy_sse.S
endif
ifndef CAXPYKERNEL
CAXPYKERNEL = zaxpy_sse.S
endif
ifndef SROTKERNEL
SROTKERNEL = rot_sse.S
endif
ifndef CROTKERNEL
CROTKERNEL = zrot_sse.S
endif
ifndef SSWAPKERNEL
SSWAPKERNEL = swap_sse.S
endif
ifndef CSWAPKERNEL
CSWAPKERNEL = zswap_sse.S
endif
ifndef SGEMVNKERNEL
SGEMVNKERNEL = ../arm/gemv_n.c
endif
ifndef SGEMVTKERNEL
SGEMVTKERNEL = ../arm/gemv_t.c
endif
ifndef CGEMVNKERNEL
CGEMVNKERNEL = zgemv_n_sse.S
endif
ifndef CGEMVTKERNEL
CGEMVTKERNEL = zgemv_t_sse.S
endif
endif
ifdef HAVE_SSE2
ifndef DAMAXKERNEL
DAMAXKERNEL = amax_sse2.S
endif
ifndef ZAMAXKERNEL
ZAMAXKERNEL = zamax_sse2.S
endif
ifndef DAMINKERNEL
DAMINKERNEL = amax_sse2.S
endif
ifndef ZAMINKERNEL
ZAMINKERNEL = zamax_sse2.S
endif
ifndef IDAMAXKERNEL
IDAMAXKERNEL = iamax_sse2.S
endif
ifndef IZAMAXKERNEL
IZAMAXKERNEL = izamax_sse2.S
endif
ifndef IDAMINKERNEL
IDAMINKERNEL = iamax_sse2.S
endif
ifndef IZAMINKERNEL
IZAMINKERNEL = izamax_sse2.S
endif
ifndef IDMAXKERNEL
IDMAXKERNEL = iamax_sse2.S
endif
ifndef IDMINKERNEL
IDMINKERNEL = iamax_sse2.S
endif
ifndef DMAXKERNEL
DMAXKERNEL = amax_sse2.S
endif
ifndef DMINKERNEL
DMINKERNEL = amax_sse2.S
endif
ifndef DDOTKERNEL
DDOTKERNEL = dot_sse2.S
endif
ifndef ZDOTKERNEL
ZDOTKERNEL = zdot_sse2.S
endif
ifndef DCOPYKERNEL
# DCOPYKERNEL = copy_sse2.S
endif
ifndef ZCOPYKERNEL
ZCOPYKERNEL = zcopy_sse2.S
endif
ifndef DSACALKERNEL
DSCALKERNEL = scal_sse2.S
endif
ifndef ZSACALKERNEL
ZSCALKERNEL = zscal_sse2.S
endif
ifndef DASUMKERNEL
DASUMKERNEL = asum_sse2.S
endif
ifndef ZASUMKERNEL
ZASUMKERNEL = zasum_sse2.S
endif
ifndef DAXPYKERNEL
DAXPYKERNEL = axpy_sse2.S
endif
ifndef ZAXPYKERNEL
ZAXPYKERNEL = zaxpy_sse2.S
endif
ifndef SNRM2KERNEL
SNRM2KERNEL = nrm2_sse.S
endif
ifndef CNRM2KERNEL
CNRM2KERNEL = znrm2_sse.S
endif
ifndef DROTKERNEL
DROTKERNEL = rot_sse2.S
endif
ifndef ZROTKERNEL
ZROTKERNEL = zrot_sse2.S
endif
ifndef DSWAPKERNEL
DSWAPKERNEL = swap_sse2.S
endif
ifndef ZSWAPKERNEL
ZSWAPKERNEL = zswap_sse2.S
endif
ifndef DGEMVNKERNEL
DGEMVNKERNEL = gemv_n_sse2.S
endif
ifndef DGEMVTKERNEL
DGEMVTKERNEL = gemv_t_sse2.S
endif
ifndef ZGEMVNKERNEL
ZGEMVNKERNEL = zgemv_n_sse2.S
endif
ifndef ZGEMVTKERNEL
ZGEMVTKERNEL = zgemv_t_sse2.S
endif
endif
ifndef SAMINKERNEL
SAMINKERNEL = amax.S
endif
@ -394,21 +134,41 @@ XGEMMITCOPYOBJ =
XGEMMONCOPYOBJ = xgemm_oncopy$(TSUFFIX).$(SUFFIX)
XGEMMOTCOPYOBJ = xgemm_otcopy$(TSUFFIX).$(SUFFIX)
SGEMM_BETA = gemm_beta.S
DGEMM_BETA = gemm_beta.S
QGEMM_BETA = ../generic/gemm_beta.c
CGEMM_BETA = zgemm_beta.S
ZGEMM_BETA = zgemm_beta.S
XGEMM_BETA = ../generic/zgemm_beta.c
QTRSMKERNEL_LN = qtrsm_kernel_LN_2x2.S
QTRSMKERNEL_LT = qtrsm_kernel_LT_2x2.S
QTRSMKERNEL_RN = qtrsm_kernel_LT_2x2.S
QTRSMKERNEL_RT = qtrsm_kernel_RT_2x2.S
QTRSMKERNEL_LN = qtrsm_kernel_LN_2x2.S
QTRSMKERNEL_LT = qtrsm_kernel_LT_2x2.S
QTRSMKERNEL_RN = qtrsm_kernel_LT_2x2.S
QTRSMKERNEL_RT = qtrsm_kernel_RT_2x2.S
XTRSMKERNEL_LN = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_LT = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_LN = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_LT = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S
XGEMM3MKERNEL = xgemm3m_kernel_2x2.S
# bug in zdot assembler kernel
ifndef ZDOTKERNEL
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = ../arm/dot.c
# Bug in znrm2 assembler kernel
ifndef ZNRM2KERNEL
ZNRM2KERNEL = ../arm/znrm2.c
endif
# Bug in zgemv_t assembler kernel
ifndef ZGEMVTKERNEL
ZGEMVTKERNEL = ../arm/zgemv_t.c
endif
SGEMM_BETA = ../generic/gemm_beta.c
DGEMM_BETA = ../generic/gemm_beta.c
CGEMM_BETA = ../generic/zgemm_beta.c
ZGEMM_BETA = ../generic/zgemm_beta.c
QGEMM_BETA = ../generic/gemm_beta.c
XGEMM_BETA = ../generic/zgemm_beta.c

View File

@ -155,7 +155,7 @@ SLASRC = \
sbbcsd.o slapmr.o sorbdb.o sorbdb1.o sorbdb2.o sorbdb3.o sorbdb4.o \
sorbdb5.o sorbdb6.o sorcsd.o sorcsd2by1.o \
sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \
stpqrt.o stpqrt2.o stpmqrt.o stprfb.o
stpqrt.o stpqrt2.o stpmqrt.o stprfb.o spotri.o
DSLASRC = spotrs.o
@ -236,7 +236,7 @@ CLASRC = \
cbbcsd.o clapmr.o cunbdb.o cunbdb1.o cunbdb2.o cunbdb3.o cunbdb4.o \
cunbdb5.o cunbdb6.o cuncsd.o cuncsd2by1.o \
cgeqrt.o cgeqrt2.o cgeqrt3.o cgemqrt.o \
ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o
ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o cpotri.o
ifdef USEXBLAS
CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
@ -316,7 +316,7 @@ DLASRC = \
dbbcsd.o dlapmr.o dorbdb.o dorbdb1.o dorbdb2.o dorbdb3.o dorbdb4.o \
dorbdb5.o dorbdb6.o dorcsd.o dorcsd2by1.o \
dgeqrt.o dgeqrt2.o dgeqrt3.o dgemqrt.o \
dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o
dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o dpotri.o
ifdef USEXBLAS
DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \
@ -400,7 +400,7 @@ ZLASRC = \
zbbcsd.o zlapmr.o zunbdb.o zunbdb1.o zunbdb2.o zunbdb3.o zunbdb4.o \
zunbdb5.o zunbdb6.o zuncsd.o zuncsd2by1.o \
zgeqrt.o zgeqrt2.o zgeqrt3.o zgemqrt.o \
ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o
ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o zpotri.o
ifdef USEXBLAS
ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \