Merge branch 'develop'
This commit is contained in:
commit
f9991fd5f6
8
Makefile
8
Makefile
|
@ -247,10 +247,16 @@ ifndef NOFORTRAN
|
|||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifeq ($(FC), GFORTRAN)
|
||||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifdef SMP
|
||||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
else
|
||||
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.2.10.rc1
|
||||
VERSION = 0.2.10.rc2
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
|
|
3
cblas.h
3
cblas.h
|
@ -16,6 +16,9 @@ void goto_set_num_threads(int num_threads);
|
|||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
/*Get the CPU corename on runtime.*/
|
||||
char* openblas_get_corename(void);
|
||||
|
||||
/* Get the parallelization type which is used by OpenBLAS */
|
||||
int openblas_get_parallel(void);
|
||||
/* OpenBLAS is compiled for sequential use */
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
TOPDIR = ../..
|
||||
include ../../Makefile.system
|
||||
|
||||
USE_GEMM3M = 0
|
||||
|
||||
ifeq ($(ARCH), x86)
|
||||
USE_GEMM3M = 1
|
||||
USE_GEMM3M = 0
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
USE_GEMM3M = 1
|
||||
USE_GEMM3M = 0
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), ia64)
|
||||
|
@ -168,7 +170,7 @@ XBLASOBJS += \
|
|||
xher2k_kernel_UN.$(SUFFIX) xher2k_kernel_UC.$(SUFFIX) \
|
||||
xher2k_kernel_LN.$(SUFFIX) xher2k_kernel_LC.$(SUFFIX)
|
||||
|
||||
ifdef USE_GEMM3M
|
||||
ifeq ($(USE_GEMM3M), 1)
|
||||
|
||||
CBLASOBJS += \
|
||||
cgemm3m_nn.$(SUFFIX) cgemm3m_cn.$(SUFFIX) cgemm3m_tn.$(SUFFIX) cgemm3m_nc.$(SUFFIX) \
|
||||
|
@ -239,7 +241,7 @@ CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread
|
|||
ZBLASOBJS += zherk_thread_UN.$(SUFFIX) zherk_thread_UC.$(SUFFIX) zherk_thread_LN.$(SUFFIX) zherk_thread_LC.$(SUFFIX)
|
||||
XBLASOBJS += xherk_thread_UN.$(SUFFIX) xherk_thread_UC.$(SUFFIX) xherk_thread_LN.$(SUFFIX) xherk_thread_LC.$(SUFFIX)
|
||||
|
||||
ifdef USE_GEMM3M
|
||||
ifeq ($(USE_GEMM3M), 1)
|
||||
|
||||
CBLASOBJS += cgemm3m_thread_nn.$(SUFFIX) cgemm3m_thread_nt.$(SUFFIX) cgemm3m_thread_nr.$(SUFFIX) cgemm3m_thread_nc.$(SUFFIX)
|
||||
CBLASOBJS += cgemm3m_thread_tn.$(SUFFIX) cgemm3m_thread_tt.$(SUFFIX) cgemm3m_thread_tr.$(SUFFIX) cgemm3m_thread_tc.$(SUFFIX)
|
||||
|
|
|
@ -32,6 +32,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
static char* openblas_config_str=""
|
||||
#ifdef USE64BITINT
|
||||
"USE64BITINT "
|
||||
|
@ -50,10 +52,33 @@ static char* openblas_config_str=""
|
|||
#endif
|
||||
#ifdef NO_AFFINITY
|
||||
"NO_AFFINITY "
|
||||
#endif
|
||||
#ifndef DYNAMIC_ARCH
|
||||
CHAR_CORENAME
|
||||
#endif
|
||||
;
|
||||
|
||||
#ifdef DYNAMIC_ARCH
|
||||
char *gotoblas_corename();
|
||||
static char tmp_config_str[256];
|
||||
#endif
|
||||
|
||||
|
||||
char* CNAME() {
|
||||
#ifndef DYNAMIC_ARCH
|
||||
return openblas_config_str;
|
||||
#else
|
||||
strcpy(tmp_config_str, openblas_config_str);
|
||||
strcat(tmp_config_str, gotoblas_corename());
|
||||
return tmp_config_str;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
char* openblas_get_corename() {
|
||||
#ifndef DYNAMIC_ARCH
|
||||
return CHAR_CORENAME;
|
||||
#else
|
||||
return gotoblas_corename();
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -165,7 +165,8 @@ int get_L2_size(void){
|
|||
|
||||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
|
||||
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
|
||||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC)
|
||||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
|
||||
defined(PILEDRIVER) || defined(HASWELL)
|
||||
|
||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
|
|
|
@ -73,7 +73,7 @@
|
|||
);
|
||||
|
||||
@gemm3mobjs = (
|
||||
zgemm3m, cgemm3m, zsymm3m, csymm3m, zhemm3m, chemm3m,
|
||||
|
||||
);
|
||||
|
||||
|
||||
|
@ -85,6 +85,7 @@
|
|||
@misc_no_underscore_objs = (
|
||||
goto_set_num_threads,
|
||||
openblas_get_config,
|
||||
openblas_get_corename,
|
||||
);
|
||||
|
||||
@misc_underscore_objs = (
|
||||
|
|
|
@ -952,6 +952,15 @@ int main(int argc, char *argv[]){
|
|||
#else
|
||||
get_cpuconfig();
|
||||
#endif
|
||||
|
||||
#ifdef FORCE
|
||||
printf("#define CHAR_CORENAME \"%s\"\n", CORENAME);
|
||||
#else
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__)
|
||||
printf("#define CHAR_CORENAME \"%s\"\n", get_corename());
|
||||
#endif
|
||||
#endif
|
||||
|
||||
break;
|
||||
|
||||
case '2' : /* SMP */
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
TOPDIR = ..
|
||||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
SUPPORT_GEMM3M = 0
|
||||
|
||||
ifeq ($(ARCH), x86)
|
||||
SUPPORT_GEMM3M = 0
|
||||
endif
|
||||
|
@ -124,7 +126,7 @@ ZBLAS3OBJS = \
|
|||
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \
|
||||
zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)
|
||||
|
||||
ifdef SUPPORT_GEMM3M
|
||||
ifeq ($(SUPPORT_GEMM3M), 1)
|
||||
|
||||
CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX)
|
||||
|
||||
|
@ -182,7 +184,7 @@ XBLAS3OBJS = \
|
|||
xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \
|
||||
xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX)
|
||||
|
||||
ifdef SUPPORT_GEMM3M
|
||||
ifeq ($(SUPPORT_GEMM3M), 1)
|
||||
|
||||
XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX)
|
||||
|
||||
|
@ -238,7 +240,7 @@ XBLAS3OBJS = \
|
|||
xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \
|
||||
xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX)
|
||||
|
||||
ifdef SUPPORT_GEMM3M
|
||||
ifeq ($(SUPPORT_GEMM3M), 1)
|
||||
|
||||
XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX)
|
||||
|
||||
|
|
|
@ -42,6 +42,12 @@
|
|||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef __64BIT__
|
||||
#define SMPTEST 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef XDOUBLE
|
||||
#define ERROR_NAME "QGER "
|
||||
#elif defined DOUBLE
|
||||
|
@ -75,7 +81,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha,
|
|||
blasint incy = *INCY;
|
||||
blasint lda = *LDA;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -107,7 +113,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
FLOAT *buffer;
|
||||
blasint info, t;
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -167,7 +173,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
|
||||
|
@ -176,7 +182,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer);
|
||||
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
} else {
|
||||
|
||||
GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads);
|
||||
|
|
|
@ -43,6 +43,14 @@
|
|||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifdef SMP
|
||||
#ifdef __64BIT__
|
||||
#define SMPTEST 1
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifdef XDOUBLE
|
||||
#define ERROR_NAME "QSBMV "
|
||||
#elif defined(DOUBLE)
|
||||
|
@ -61,7 +69,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA
|
|||
#endif
|
||||
};
|
||||
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||
#ifdef XDOUBLE
|
||||
qsbmv_thread_U, qsbmv_thread_L,
|
||||
|
@ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
|||
blasint info;
|
||||
int uplo;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -130,7 +138,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
FLOAT *buffer;
|
||||
int uplo;
|
||||
blasint info;
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -189,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
|
@ -197,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
(sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer);
|
||||
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
} else {
|
||||
|
||||
(sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads);
|
||||
|
|
|
@ -42,6 +42,13 @@
|
|||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef __64BIT__
|
||||
#define SMPTEST 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XDOUBLE
|
||||
#ifndef CONJ
|
||||
#define ERROR_NAME "XGERU "
|
||||
|
@ -109,7 +116,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha,
|
|||
blasint incy = *INCY;
|
||||
blasint lda = *LDA;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -144,7 +151,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
FLOAT *buffer;
|
||||
blasint info, t;
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -205,7 +212,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
|
@ -221,7 +228,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
|
||||
} else {
|
||||
|
||||
|
|
|
@ -43,6 +43,14 @@
|
|||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifdef SMP
|
||||
#ifdef __64BIT__
|
||||
#define SMPTEST 1
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifdef XDOUBLE
|
||||
#define ERROR_NAME "XSBMV "
|
||||
#elif defined(DOUBLE)
|
||||
|
@ -61,7 +69,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
|
|||
#endif
|
||||
};
|
||||
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||
#ifdef XDOUBLE
|
||||
xsbmv_thread_U, xsbmv_thread_L,
|
||||
|
@ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
|||
blasint info;
|
||||
int uplo;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -131,7 +139,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
|||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
|
@ -139,7 +147,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
|||
|
||||
(sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer);
|
||||
|
||||
#ifdef SMPBUG
|
||||
#ifdef SMPTEST
|
||||
} else {
|
||||
|
||||
(sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads);
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
USE_GEMM3M = 0
|
||||
|
||||
ifeq ($(ARCH), x86)
|
||||
USE_GEMM3M = 1
|
||||
endif
|
||||
|
@ -122,7 +124,7 @@ XBLASOBJS += \
|
|||
xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
|
||||
xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \
|
||||
|
||||
ifdef USE_GEMM3M
|
||||
ifeq ($(USE_GEMM3M), 1)
|
||||
|
||||
CBLASOBJS += cgemm3m_kernel$(TSUFFIX).$(SUFFIX)
|
||||
ZBLASOBJS += zgemm3m_kernel$(TSUFFIX).$(SUFFIX)
|
||||
|
@ -256,7 +258,7 @@ XBLASOBJS += \
|
|||
xhemm_iutcopy$(TSUFFIX).$(SUFFIX) xhemm_iltcopy$(TSUFFIX).$(SUFFIX) \
|
||||
xhemm_outcopy$(TSUFFIX).$(SUFFIX) xhemm_oltcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ifdef USE_GEMM3M
|
||||
ifeq ($(USE_GEMM3M), 1)
|
||||
|
||||
CBLASOBJS += \
|
||||
cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(DSDOT)
|
||||
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||
#else
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||
#endif
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0,iy=0;
|
||||
|
||||
#if defined(DSDOT)
|
||||
double dot = 0.0 ;
|
||||
#else
|
||||
FLOAT dot = 0.0 ;
|
||||
#endif
|
||||
|
||||
if ( n < 0 ) return(dot);
|
||||
|
||||
if ( (inc_x == 1) && (inc_y == 1) )
|
||||
{
|
||||
|
||||
int n1 = n & -4;
|
||||
|
||||
while(i < n1)
|
||||
{
|
||||
|
||||
#if defined(DSDOT)
|
||||
dot += (double) y[i] * (double) x[i]
|
||||
+ (double) y[i+1] * (double) x[i+1]
|
||||
+ (double) y[i+2] * (double) x[i+2]
|
||||
+ (double) y[i+3] * (double) x[i+3] ;
|
||||
#else
|
||||
dot += y[i] * x[i]
|
||||
+ y[i+1] * x[i+1]
|
||||
+ y[i+2] * x[i+2]
|
||||
+ y[i+3] * x[i+3] ;
|
||||
#endif
|
||||
i+=4 ;
|
||||
|
||||
}
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
|
||||
#if defined(DSDOT)
|
||||
dot += (double) y[i] * (double) x[i] ;
|
||||
#else
|
||||
dot += y[i] * x[i] ;
|
||||
#endif
|
||||
i++ ;
|
||||
|
||||
}
|
||||
return(dot);
|
||||
|
||||
|
||||
}
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
|
||||
#if defined(DSDOT)
|
||||
dot += (double) y[iy] * (double) x[ix] ;
|
||||
#else
|
||||
dot += y[iy] * x[ix] ;
|
||||
#endif
|
||||
ix += inc_x ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
|
||||
}
|
||||
return(dot);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -714,13 +714,13 @@ static void init_parameter(void) {
|
|||
fprintf(stderr, "Core2\n");
|
||||
#endif
|
||||
|
||||
TABLE_NAME.sgemm_p = 92 * (l2 >> 9);
|
||||
TABLE_NAME.dgemm_p = 46 * (l2 >> 9);
|
||||
TABLE_NAME.cgemm_p = 46 * (l2 >> 9);
|
||||
TABLE_NAME.zgemm_p = 23 * (l2 >> 9);
|
||||
TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
|
||||
TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
|
||||
TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
|
||||
TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
|
||||
#ifdef EXPRECISION
|
||||
TABLE_NAME.qgemm_p = 92 * (l2 >> 9);
|
||||
TABLE_NAME.xgemm_p = 46 * (l2 >> 9);
|
||||
TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
|
||||
TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -740,6 +740,23 @@ static void init_parameter(void) {
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef DUNNINGTON
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "Dunnington\n");
|
||||
#endif
|
||||
|
||||
TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
|
||||
TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
|
||||
TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
|
||||
TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
|
||||
#ifdef EXPRECISION
|
||||
TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
|
||||
TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef NEHALEM
|
||||
|
||||
#ifdef DEBUG
|
||||
|
|
|
@ -119,15 +119,13 @@ XCOPYKERNEL = zcopy.S
|
|||
endif
|
||||
|
||||
ifndef SDOTKERNEL
|
||||
SDOTKERNEL = dot_sse.S
|
||||
SDOTKERNEL = ../generic/dot.c
|
||||
endif
|
||||
|
||||
|
||||
ifndef DSDOTKERNEL
|
||||
DSDOTKERNEL = ../arm/dot.c
|
||||
DSDOTKERNEL = ../generic/dot.c
|
||||
endif
|
||||
|
||||
|
||||
ifndef DDOTKERNEL
|
||||
DDOTKERNEL = dot_sse2.S
|
||||
endif
|
||||
|
|
|
@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S
|
|||
|
||||
DGEMVNKERNEL = dgemv_n_bulldozer.S
|
||||
DGEMVTKERNEL = dgemv_t_bulldozer.S
|
||||
DAXPYKERNEL = daxpy_bulldozer.S
|
||||
DDOTKERNEL = ddot_bulldozer.S
|
||||
DCOPYKERNEL = dcopy_bulldozer.S
|
||||
|
||||
|
|
|
@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S
|
|||
|
||||
DGEMVNKERNEL = dgemv_n_bulldozer.S
|
||||
DGEMVTKERNEL = dgemv_t_bulldozer.S
|
||||
DAXPYKERNEL = daxpy_bulldozer.S
|
||||
DDOTKERNEL = ddot_bulldozer.S
|
||||
DCOPYKERNEL = dcopy_bulldozer.S
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ DGEMMINCOPYOBJ =
|
|||
DGEMMITCOPYOBJ =
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMKERNEL = zgemm_kernel_4x2_sse3.S
|
||||
CGEMMKERNEL = zgemm_kernel_4x2_sse.S
|
||||
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c
|
||||
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c
|
||||
CGEMMONCOPY = zgemm_ncopy_2.S
|
||||
|
|
1
make.inc
1
make.inc
|
@ -1,7 +1,6 @@
|
|||
SHELL = /bin/sh
|
||||
PLAT = _LINUX
|
||||
DRVOPTS = $(OPTS)
|
||||
LOADER = $(FORTRAN)
|
||||
ARCHFLAGS= -ru
|
||||
#RANLIB = ranlib
|
||||
|
||||
|
|
|
@ -1,15 +1,19 @@
|
|||
UTEST_CHECK = 1
|
||||
TOPDIR = ..
|
||||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
TARGET=openblas_utest
|
||||
|
||||
.PHONY : all
|
||||
.NOTPARALLEL : all run_test $(TARGET)
|
||||
|
||||
CUNIT_URL=http://downloads.sourceforge.net/project/cunit/CUnit/2.1-2/CUnit-2.1-2-src.tar.bz2
|
||||
CUNIT_DIR=$(CURDIR)/CUnit-2.1-2
|
||||
|
||||
CUNIT_LIB=$(CUNIT_DIR)/lib/libcunit.a
|
||||
|
||||
CFLAGS+=-I$(CUNIT_DIR)/include
|
||||
CFLAGS +=-I$(CUNIT_DIR)/include
|
||||
|
||||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o test_fork.o
|
||||
|
||||
|
|
Loading…
Reference in New Issue