Merge pull request #404 from wernsaar/develop
A lot of fixes for v0.2.10-rc2
This commit is contained in:
		
						commit
						94d3cfaa10
					
				
							
								
								
									
										8
									
								
								Makefile
								
								
								
								
							
							
						
						
									
										8
									
								
								Makefile
								
								
								
								
							|  | @ -247,10 +247,16 @@ ifndef NOFORTRAN | |||
| 	-@echo "SUFFIX      = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| 	-@echo "PSUFFIX     = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| 	-@echo "CEXTRALIB   = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| ifeq ($(F_COMPILER), GFORTRAN) | ||||
| ifeq ($(FC), GFORTRAN) | ||||
| 	-@echo "TIMER       = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| ifdef SMP | ||||
| 	-@echo "LOADER      = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| else | ||||
| 	-@echo "LOADER      = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| endif | ||||
| else | ||||
| 	-@echo "TIMER       = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| 	-@echo "LOADER      = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| endif | ||||
| 	-@cat  make.inc >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| endif | ||||
|  |  | |||
|  | @ -1,12 +1,14 @@ | |||
| TOPDIR	= ../.. | ||||
| include ../../Makefile.system | ||||
| 
 | ||||
| USE_GEMM3M = 0 | ||||
| 
 | ||||
| ifeq ($(ARCH), x86) | ||||
| USE_GEMM3M = 1 | ||||
| USE_GEMM3M = 0 | ||||
| endif | ||||
| 
 | ||||
| ifeq ($(ARCH), x86_64) | ||||
| USE_GEMM3M = 1 | ||||
| USE_GEMM3M = 0 | ||||
| endif | ||||
| 
 | ||||
| ifeq ($(ARCH), ia64) | ||||
|  | @ -168,7 +170,7 @@ XBLASOBJS	+= \ | |||
| 	xher2k_kernel_UN.$(SUFFIX) xher2k_kernel_UC.$(SUFFIX) \
 | ||||
| 	xher2k_kernel_LN.$(SUFFIX) xher2k_kernel_LC.$(SUFFIX) | ||||
| 
 | ||||
| ifdef USE_GEMM3M | ||||
| ifeq ($(USE_GEMM3M), 1) | ||||
| 
 | ||||
| CBLASOBJS   +=  \
 | ||||
| 	cgemm3m_nn.$(SUFFIX) cgemm3m_cn.$(SUFFIX) cgemm3m_tn.$(SUFFIX) cgemm3m_nc.$(SUFFIX) \
 | ||||
|  | @ -239,7 +241,7 @@ CBLASOBJS    += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread | |||
| ZBLASOBJS    += zherk_thread_UN.$(SUFFIX) zherk_thread_UC.$(SUFFIX) zherk_thread_LN.$(SUFFIX) zherk_thread_LC.$(SUFFIX) | ||||
| XBLASOBJS    += xherk_thread_UN.$(SUFFIX) xherk_thread_UC.$(SUFFIX) xherk_thread_LN.$(SUFFIX) xherk_thread_LC.$(SUFFIX) | ||||
| 
 | ||||
| ifdef USE_GEMM3M | ||||
| ifeq ($(USE_GEMM3M), 1) | ||||
| 
 | ||||
| CBLASOBJS    += cgemm3m_thread_nn.$(SUFFIX) cgemm3m_thread_nt.$(SUFFIX) cgemm3m_thread_nr.$(SUFFIX) cgemm3m_thread_nc.$(SUFFIX) | ||||
| CBLASOBJS    += cgemm3m_thread_tn.$(SUFFIX) cgemm3m_thread_tt.$(SUFFIX) cgemm3m_thread_tr.$(SUFFIX) cgemm3m_thread_tc.$(SUFFIX) | ||||
|  |  | |||
|  | @ -165,7 +165,8 @@ int get_L2_size(void){ | |||
| 
 | ||||
| #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ | ||||
|     defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ | ||||
|   defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) | ||||
|     defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ | ||||
|     defined(PILEDRIVER) || defined(HASWELL) | ||||
| 
 | ||||
|   cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | ||||
| 
 | ||||
|  |  | |||
|  | @ -73,7 +73,7 @@ | |||
| 	       ); | ||||
| 
 | ||||
| @gemm3mobjs = ( | ||||
| 	       zgemm3m, cgemm3m, zsymm3m, csymm3m, zhemm3m, chemm3m, | ||||
| 	        | ||||
| 	       ); | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,6 +1,8 @@ | |||
| TOPDIR	= .. | ||||
| include $(TOPDIR)/Makefile.system | ||||
| 
 | ||||
| SUPPORT_GEMM3M = 0 | ||||
| 
 | ||||
| ifeq ($(ARCH), x86) | ||||
| SUPPORT_GEMM3M = 0 | ||||
| endif | ||||
|  | @ -124,7 +126,7 @@ ZBLAS3OBJS    = \ | |||
| 	       	zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \
 | ||||
| 		zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX) | ||||
| 
 | ||||
| ifdef SUPPORT_GEMM3M | ||||
| ifeq ($(SUPPORT_GEMM3M), 1) | ||||
| 
 | ||||
| CBLAS3OBJS   +=  cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX) | ||||
| 
 | ||||
|  | @ -182,7 +184,7 @@ XBLAS3OBJS    = \ | |||
| 		xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \
 | ||||
| 		xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) | ||||
| 
 | ||||
| ifdef SUPPORT_GEMM3M | ||||
| ifeq ($(SUPPORT_GEMM3M), 1) | ||||
| 
 | ||||
| XBLAS3OBJS   +=  xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) | ||||
| 
 | ||||
|  | @ -238,7 +240,7 @@ XBLAS3OBJS    = \ | |||
| 		xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \
 | ||||
| 		xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) | ||||
| 
 | ||||
| ifdef SUPPORT_GEMM3M | ||||
| ifeq ($(SUPPORT_GEMM3M), 1) | ||||
| 
 | ||||
| XBLAS3OBJS   +=  xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) | ||||
| 
 | ||||
|  |  | |||
|  | @ -42,6 +42,12 @@ | |||
| #include "functable.h" | ||||
| #endif | ||||
| 
 | ||||
| #ifdef SMP | ||||
| #ifdef __64BIT__ | ||||
| #define SMPTEST 1 | ||||
| #endif | ||||
| #endif | ||||
| 
 | ||||
| #ifdef XDOUBLE | ||||
| #define ERROR_NAME "QGER  " | ||||
| #elif defined DOUBLE | ||||
|  | @ -75,7 +81,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha, | |||
|   blasint    incy  = *INCY; | ||||
|   blasint    lda   = *LDA; | ||||
|   FLOAT *buffer; | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
|   int nthreads; | ||||
| #endif | ||||
| 
 | ||||
|  | @ -107,7 +113,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
| 
 | ||||
|   FLOAT *buffer; | ||||
|   blasint info, t; | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
|   int nthreads; | ||||
| #endif | ||||
| 
 | ||||
|  | @ -167,7 +173,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
| 
 | ||||
|   buffer = (FLOAT *)blas_memory_alloc(1); | ||||
| 
 | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
|   nthreads = num_cpu_avail(2); | ||||
| 
 | ||||
| 
 | ||||
|  | @ -176,7 +182,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
| 
 | ||||
|     GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer); | ||||
| 
 | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
|   } else { | ||||
| 
 | ||||
|     GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads); | ||||
|  |  | |||
|  | @ -43,6 +43,14 @@ | |||
| #include "functable.h" | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
| #ifdef SMP | ||||
| #ifdef __64BIT__ | ||||
| #define SMPTEST 1 | ||||
| #endif | ||||
| #endif | ||||
| */ | ||||
| 
 | ||||
| #ifdef XDOUBLE | ||||
| #define ERROR_NAME "QSBMV " | ||||
| #elif defined(DOUBLE) | ||||
|  | @ -61,7 +69,7 @@ static  int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA | |||
| #endif | ||||
| }; | ||||
| 
 | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
| static  int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { | ||||
| #ifdef XDOUBLE | ||||
|   qsbmv_thread_U, qsbmv_thread_L, | ||||
|  | @ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT  *ALPHA, FLOAT *a, blasint * | |||
|   blasint info; | ||||
|   int uplo; | ||||
|   FLOAT *buffer; | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
|   int nthreads; | ||||
| #endif | ||||
| 
 | ||||
|  | @ -130,7 +138,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
|   FLOAT *buffer; | ||||
|   int uplo; | ||||
|   blasint info; | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
|   int nthreads; | ||||
| #endif | ||||
| 
 | ||||
|  | @ -189,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
| 
 | ||||
|   buffer = (FLOAT *)blas_memory_alloc(1); | ||||
| 
 | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
|   nthreads = num_cpu_avail(2); | ||||
| 
 | ||||
|   if (nthreads == 1) { | ||||
|  | @ -197,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
| 
 | ||||
|   (sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer); | ||||
| 
 | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
|   } else { | ||||
| 
 | ||||
|     (sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads); | ||||
|  |  | |||
|  | @ -42,6 +42,13 @@ | |||
| #include "functable.h" | ||||
| #endif | ||||
| 
 | ||||
| #ifdef SMP | ||||
| #ifdef __64BIT__ | ||||
| #define SMPTEST 1 | ||||
| #endif | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| #ifdef XDOUBLE | ||||
| #ifndef CONJ | ||||
| #define ERROR_NAME "XGERU  " | ||||
|  | @ -109,7 +116,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha, | |||
|   blasint    incy  = *INCY; | ||||
|   blasint    lda   = *LDA; | ||||
|   FLOAT *buffer; | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
|   int nthreads; | ||||
| #endif | ||||
| 
 | ||||
|  | @ -144,7 +151,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
| 
 | ||||
|   FLOAT *buffer; | ||||
|   blasint info, t; | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
|   int nthreads; | ||||
| #endif | ||||
| 
 | ||||
|  | @ -205,7 +212,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
| 
 | ||||
|   buffer = (FLOAT *)blas_memory_alloc(1); | ||||
| 
 | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
|   nthreads = num_cpu_avail(2); | ||||
| 
 | ||||
|   if (nthreads == 1) { | ||||
|  | @ -221,7 +228,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
|   } | ||||
| #endif | ||||
| 
 | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
| 
 | ||||
|   } else { | ||||
| 
 | ||||
|  |  | |||
|  | @ -43,6 +43,14 @@ | |||
| #include "functable.h" | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
| #ifdef SMP | ||||
| #ifdef __64BIT__ | ||||
| #define SMPTEST 1 | ||||
| #endif | ||||
| #endif | ||||
| */ | ||||
| 
 | ||||
| #ifdef XDOUBLE | ||||
| #define ERROR_NAME "XSBMV " | ||||
| #elif defined(DOUBLE) | ||||
|  | @ -61,7 +69,7 @@ static  int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT | |||
| #endif | ||||
| }; | ||||
| 
 | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
| static  int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { | ||||
| #ifdef XDOUBLE | ||||
|   xsbmv_thread_U, xsbmv_thread_L, | ||||
|  | @ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT  *ALPHA, FLOAT *a, blasint * | |||
|   blasint info; | ||||
|   int uplo; | ||||
|   FLOAT *buffer; | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
|   int nthreads; | ||||
| #endif | ||||
| 
 | ||||
|  | @ -131,7 +139,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT  *ALPHA, FLOAT *a, blasint * | |||
| 
 | ||||
|   buffer = (FLOAT *)blas_memory_alloc(1); | ||||
| 
 | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
|   nthreads = num_cpu_avail(2); | ||||
| 
 | ||||
|   if (nthreads == 1) { | ||||
|  | @ -139,7 +147,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT  *ALPHA, FLOAT *a, blasint * | |||
| 
 | ||||
|   (sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer); | ||||
| 
 | ||||
| #ifdef SMPBUG | ||||
| #ifdef SMPTEST | ||||
|   } else { | ||||
| 
 | ||||
|     (sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads); | ||||
|  |  | |||
|  | @ -1,3 +1,5 @@ | |||
| USE_GEMM3M = 0 | ||||
| 
 | ||||
| ifeq ($(ARCH), x86) | ||||
| USE_GEMM3M = 1 | ||||
| endif | ||||
|  | @ -122,7 +124,7 @@ XBLASOBJS	+= \ | |||
| 	xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
 | ||||
| 	xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \
 | ||||
| 
 | ||||
| ifdef USE_GEMM3M | ||||
| ifeq ($(USE_GEMM3M), 1) | ||||
| 
 | ||||
| CBLASOBJS    +=  cgemm3m_kernel$(TSUFFIX).$(SUFFIX) | ||||
| ZBLASOBJS    +=  zgemm3m_kernel$(TSUFFIX).$(SUFFIX) | ||||
|  | @ -256,7 +258,7 @@ XBLASOBJS += \ | |||
| 	xhemm_iutcopy$(TSUFFIX).$(SUFFIX) xhemm_iltcopy$(TSUFFIX).$(SUFFIX) \
 | ||||
| 	xhemm_outcopy$(TSUFFIX).$(SUFFIX) xhemm_oltcopy$(TSUFFIX).$(SUFFIX) | ||||
| 
 | ||||
| ifdef USE_GEMM3M | ||||
| ifeq ($(USE_GEMM3M), 1) | ||||
| 
 | ||||
| CBLASOBJS += \
 | ||||
| 	cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \
 | ||||
|  |  | |||
|  | @ -0,0 +1,104 @@ | |||
| /***************************************************************************
 | ||||
| Copyright (c) 2014, The OpenBLAS Project | ||||
| All rights reserved. | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| modification, are permitted provided that the following conditions are | ||||
| met: | ||||
| 1. Redistributions of source code must retain the above copyright | ||||
| notice, this list of conditions and the following disclaimer. | ||||
| 2. Redistributions in binary form must reproduce the above copyright | ||||
| notice, this list of conditions and the following disclaimer in | ||||
| the documentation and/or other materials provided with the | ||||
| distribution. | ||||
| 3. Neither the name of the OpenBLAS project nor the names of | ||||
| its contributors may be used to endorse or promote products | ||||
| derived from this software without specific prior written permission. | ||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | ||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | ||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | ||||
| 
 | ||||
| 
 | ||||
| #include "common.h" | ||||
| 
 | ||||
| #if defined(DSDOT) | ||||
| double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | ||||
| #else | ||||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | ||||
| #endif | ||||
| { | ||||
| 	BLASLONG i=0; | ||||
| 	BLASLONG ix=0,iy=0; | ||||
| 
 | ||||
| #if defined(DSDOT) | ||||
| 	double dot = 0.0 ; | ||||
| #else | ||||
| 	FLOAT  dot = 0.0 ; | ||||
| #endif | ||||
| 
 | ||||
| 	if ( n < 0 )  return(dot); | ||||
| 
 | ||||
| 	if ( (inc_x == 1) && (inc_y == 1) ) | ||||
| 	{ | ||||
| 
 | ||||
| 		int n1 = n & -4; | ||||
| 
 | ||||
| 		while(i < n1) | ||||
| 		{ | ||||
| 
 | ||||
| #if defined(DSDOT) | ||||
| 			dot += (double) y[i] * (double) x[i] | ||||
| 			    + (double) y[i+1] * (double) x[i+1] | ||||
| 			    + (double) y[i+2] * (double) x[i+2] | ||||
| 			    + (double) y[i+3] * (double) x[i+3] ; | ||||
| #else | ||||
| 			dot += y[i] * x[i] | ||||
| 			    + y[i+1] * x[i+1] | ||||
| 			    + y[i+2] * x[i+2] | ||||
| 			    + y[i+3] * x[i+3] ; | ||||
| #endif | ||||
| 			i+=4 ; | ||||
| 
 | ||||
| 		} | ||||
| 
 | ||||
| 		while(i < n) | ||||
| 		{ | ||||
| 
 | ||||
| #if defined(DSDOT) | ||||
| 			dot += (double) y[i] * (double) x[i] ; | ||||
| #else | ||||
| 			dot += y[i] * x[i] ; | ||||
| #endif | ||||
| 			i++ ; | ||||
| 
 | ||||
| 		} | ||||
| 		return(dot); | ||||
| 
 | ||||
| 
 | ||||
| 	} | ||||
| 
 | ||||
| 	while(i < n) | ||||
| 	{ | ||||
| 
 | ||||
| #if defined(DSDOT) | ||||
| 		dot += (double) y[iy] * (double) x[ix] ; | ||||
| #else | ||||
| 		dot += y[iy] * x[ix] ; | ||||
| #endif | ||||
| 		ix  += inc_x ; | ||||
| 		iy  += inc_y ; | ||||
| 		i++ ; | ||||
| 
 | ||||
| 	} | ||||
| 	return(dot); | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
|  | @ -714,13 +714,13 @@ static void init_parameter(void) { | |||
|   fprintf(stderr, "Core2\n"); | ||||
| #endif | ||||
| 
 | ||||
|   TABLE_NAME.sgemm_p =  92 * (l2 >> 9); | ||||
|   TABLE_NAME.dgemm_p =  46 * (l2 >> 9); | ||||
|   TABLE_NAME.cgemm_p =  46 * (l2 >> 9); | ||||
|   TABLE_NAME.zgemm_p =  23 * (l2 >> 9); | ||||
|   TABLE_NAME.sgemm_p =  92 * (l2 >> 9) + 8; | ||||
|   TABLE_NAME.dgemm_p =  46 * (l2 >> 9) + 8; | ||||
|   TABLE_NAME.cgemm_p =  46 * (l2 >> 9) + 4; | ||||
|   TABLE_NAME.zgemm_p =  23 * (l2 >> 9) + 4; | ||||
| #ifdef EXPRECISION | ||||
|   TABLE_NAME.qgemm_p =  92 * (l2 >> 9); | ||||
|   TABLE_NAME.xgemm_p =  46 * (l2 >> 9); | ||||
|   TABLE_NAME.qgemm_p =  92 * (l2 >> 9) + 8; | ||||
|   TABLE_NAME.xgemm_p =  46 * (l2 >> 9) + 4; | ||||
| #endif | ||||
| #endif | ||||
| 
 | ||||
|  | @ -740,6 +740,23 @@ static void init_parameter(void) { | |||
| #endif | ||||
| #endif | ||||
| 
 | ||||
| #ifdef DUNNINGTON | ||||
| 
 | ||||
| #ifdef DEBUG | ||||
|   fprintf(stderr, "Dunnington\n"); | ||||
| #endif | ||||
| 
 | ||||
|   TABLE_NAME.sgemm_p =  42 * (l2 >> 9) + 8; | ||||
|   TABLE_NAME.dgemm_p =  42 * (l2 >> 9) + 8; | ||||
|   TABLE_NAME.cgemm_p =  21 * (l2 >> 9) + 4; | ||||
|   TABLE_NAME.zgemm_p =  21 * (l2 >> 9) + 4; | ||||
| #ifdef EXPRECISION | ||||
|   TABLE_NAME.qgemm_p =  42 * (l2 >> 9) + 8; | ||||
|   TABLE_NAME.xgemm_p =  21 * (l2 >> 9) + 4; | ||||
| #endif | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| #ifdef NEHALEM | ||||
| 
 | ||||
| #ifdef DEBUG | ||||
|  |  | |||
|  | @ -119,15 +119,13 @@ XCOPYKERNEL = zcopy.S | |||
| endif | ||||
| 
 | ||||
| ifndef SDOTKERNEL | ||||
| SDOTKERNEL = dot_sse.S | ||||
| SDOTKERNEL = ../generic/dot.c  | ||||
| endif | ||||
| 
 | ||||
| 
 | ||||
| ifndef DSDOTKERNEL | ||||
| DSDOTKERNEL = ../arm/dot.c  | ||||
| DSDOTKERNEL = ../generic/dot.c  | ||||
| endif | ||||
| 
 | ||||
| 
 | ||||
| ifndef DDOTKERNEL | ||||
| DDOTKERNEL =  dot_sse2.S | ||||
| endif | ||||
|  |  | |||
|  | @ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S | |||
| 
 | ||||
| DGEMVNKERNEL = dgemv_n_bulldozer.S | ||||
| DGEMVTKERNEL = dgemv_t_bulldozer.S | ||||
| DAXPYKERNEL  = daxpy_bulldozer.S | ||||
| DDOTKERNEL   = ddot_bulldozer.S | ||||
| DCOPYKERNEL  = dcopy_bulldozer.S | ||||
| 
 | ||||
|  |  | |||
|  | @ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S | |||
| 
 | ||||
| DGEMVNKERNEL = dgemv_n_bulldozer.S | ||||
| DGEMVTKERNEL = dgemv_t_bulldozer.S | ||||
| DAXPYKERNEL  = daxpy_bulldozer.S | ||||
| DDOTKERNEL   = ddot_bulldozer.S | ||||
| DCOPYKERNEL  = dcopy_bulldozer.S | ||||
| 
 | ||||
|  |  | |||
|  | @ -19,7 +19,7 @@ DGEMMINCOPYOBJ = | |||
| DGEMMITCOPYOBJ = | ||||
| DGEMMONCOPYOBJ =  dgemm_oncopy$(TSUFFIX).$(SUFFIX) | ||||
| DGEMMOTCOPYOBJ =  dgemm_otcopy$(TSUFFIX).$(SUFFIX) | ||||
| CGEMMKERNEL    =  zgemm_kernel_4x2_sse3.S | ||||
| CGEMMKERNEL    =  zgemm_kernel_4x2_sse.S | ||||
| CGEMMINCOPY    =  ../generic/zgemm_ncopy_4.c | ||||
| CGEMMITCOPY    =  ../generic/zgemm_tcopy_4.c | ||||
| CGEMMONCOPY    =  zgemm_ncopy_2.S | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue