From 9fc6764fa762f896fcfaf9f40d3eb39b53a81c00 Mon Sep 17 00:00:00 2001 From: traits Date: Mon, 5 Sep 2011 17:37:07 +0800 Subject: [PATCH 01/17] refs #55. Added DTB_ENTRIES into dynamic arch setting parameters. Now, it can read DTB_ENTRIES on runtime. --- common_param.h | 16 ++++++---- cpuid_alpha.c | 6 ++-- cpuid_ia64.c | 2 +- cpuid_mips.c | 4 +-- cpuid_power.c | 2 +- cpuid_sparc.c | 2 +- cpuid_x86.c | 4 +-- getarch.c | 70 +++++++++++++++++++++---------------------- kernel/setparam-ref.c | 2 ++ 9 files changed, 57 insertions(+), 51 deletions(-) diff --git a/common_param.h b/common_param.h index c4580cc22..e978193d4 100644 --- a/common_param.h +++ b/common_param.h @@ -44,6 +44,7 @@ #ifdef DYNAMIC_ARCH typedef struct { + int dtb_entries; int offsetA, offsetB, align; int sgemm_p, sgemm_q, sgemm_r; @@ -813,6 +814,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); extern gotoblas_t *gotoblas; +#define DTB_ENTRIES gotoblas -> dtb_entries #define GEMM_OFFSET_A gotoblas -> offsetA #define GEMM_OFFSET_B gotoblas -> offsetB #define GEMM_ALIGN gotoblas -> align @@ -863,6 +865,8 @@ extern gotoblas_t *gotoblas; #else +#define DTB_ENTRIES DTB_DEFAULT_ENTRIES + #define GEMM_OFFSET_A GEMM_DEFAULT_OFFSET_A #define GEMM_OFFSET_B GEMM_DEFAULT_OFFSET_B #define GEMM_ALIGN GEMM_DEFAULT_ALIGN @@ -997,14 +1001,14 @@ extern gotoblas_t *gotoblas; #endif #ifdef XDOUBLE -#define GEMM3M_UNROLL_M QGEMM_DEFAULT_UNROLL_M -#define GEMM3M_UNROLL_N QGEMM_DEFAULT_UNROLL_N +#define GEMM3M_UNROLL_M QGEMM_UNROLL_M +#define GEMM3M_UNROLL_N QGEMM_UNROLL_N #elif defined(DOUBLE) -#define GEMM3M_UNROLL_M DGEMM_DEFAULT_UNROLL_M -#define GEMM3M_UNROLL_N DGEMM_DEFAULT_UNROLL_N +#define GEMM3M_UNROLL_M DGEMM_UNROLL_M +#define GEMM3M_UNROLL_N DGEMM_UNROLL_N #else -#define GEMM3M_UNROLL_M SGEMM_DEFAULT_UNROLL_M -#define GEMM3M_UNROLL_N SGEMM_DEFAULT_UNROLL_N +#define GEMM3M_UNROLL_M SGEMM_UNROLL_M +#define GEMM3M_UNROLL_N SGEMM_UNROLL_N #endif diff --git a/cpuid_alpha.c b/cpuid_alpha.c index ca786d550..adcc314c3 100644 --- a/cpuid_alpha.c +++ b/cpuid_alpha.c @@ -72,7 +72,7 @@ void get_cpuconfig(void){ printf("#define L1_DATA_LINESIZE 32\n"); printf("#define L2_SIZE 2097152\n"); printf("#define L2_LINESIZE 32\n"); - printf("#define DTB_ENTRIES 32\n"); + printf("#define DTB_DEFAULT_ENTRIES 32\n"); printf("#define DTB_SIZE 8192\n"); break; @@ -81,7 +81,7 @@ void get_cpuconfig(void){ printf("#define L1_DATA_LINESIZE 32\n"); printf("#define L2_SIZE 2097152\n"); printf("#define L2_LINESIZE 64\n"); - printf("#define DTB_ENTRIES 64\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_SIZE 8192\n"); break; @@ -90,7 +90,7 @@ void get_cpuconfig(void){ printf("#define L1_DATA_LINESIZE 64\n"); printf("#define L2_SIZE 4194304\n"); printf("#define L2_LINESIZE 64\n"); - printf("#define DTB_ENTRIES 64\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_SIZE 8192\n"); break; } diff --git a/cpuid_ia64.c b/cpuid_ia64.c index 7f0fa6d2f..d372182a7 100644 --- a/cpuid_ia64.c +++ b/cpuid_ia64.c @@ -133,6 +133,6 @@ void get_cpuconfig(void){ printf("#define L2_SIZE 1572864\n"); printf("#define L2_LINESIZE 128\n"); printf("#define DTB_SIZE 16384\n"); - printf("#define DTB_ENTRIES 128\n"); + printf("#define DTB_DEFAULT_ENTRIES 128\n"); } diff --git a/cpuid_mips.c b/cpuid_mips.c index 7d0b5cc62..f50a4ec3e 100644 --- a/cpuid_mips.c +++ b/cpuid_mips.c @@ -146,7 +146,7 @@ void get_cpuconfig(void){ printf("#define L1_DATA_LINESIZE 32\n"); printf("#define L2_SIZE 512488\n"); printf("#define L2_LINESIZE 32\n"); - printf("#define DTB_ENTRIES 64\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_SIZE 4096\n"); printf("#define L2_ASSOCIATIVE 4\n"); }else{ @@ -155,7 +155,7 @@ void get_cpuconfig(void){ printf("#define L1_DATA_LINESIZE 32\n"); printf("#define L2_SIZE 512488\n"); printf("#define L2_LINESIZE 32\n"); - printf("#define DTB_ENTRIES 32\n"); + printf("#define DTB_DEFAULT_ENTRIES 32\n"); printf("#define DTB_SIZE 4096\n"); printf("#define L2_ASSOCIATIVE 8\n"); } diff --git a/cpuid_power.c b/cpuid_power.c index 46ff30a3a..24ff78f05 100644 --- a/cpuid_power.c +++ b/cpuid_power.c @@ -165,7 +165,7 @@ void get_cpuconfig(void){ printf("#define L1_DATA_LINESIZE 128\n"); printf("#define L2_SIZE 524288\n"); printf("#define L2_LINESIZE 128 \n"); - printf("#define DTB_ENTRIES 128\n"); + printf("#define DTB_DEFAULT_ENTRIES 128\n"); printf("#define DTB_SIZE 4096\n"); printf("#define L2_ASSOCIATIVE 8\n"); diff --git a/cpuid_sparc.c b/cpuid_sparc.c index b65c69de4..b1e212b2f 100644 --- a/cpuid_sparc.c +++ b/cpuid_sparc.c @@ -50,7 +50,7 @@ void get_subdirname(void){ void get_cpuconfig(void){ printf("#define V9\n"); - printf("#define DTB_ENTRIES 32\n"); + printf("#define DTB_DEFAULT_ENTRIES 32\n"); } void get_libname(void){ diff --git a/cpuid_x86.c b/cpuid_x86.c index 6e3e74f82..b68f53573 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1393,7 +1393,7 @@ void get_cpuconfig(void){ if (info.size > 0) { printf("#define DTB_SIZE %d\n", info.size * 1024); printf("#define DTB_ASSOCIATIVE %d\n", info.associative); - printf("#define DTB_ENTRIES %d\n", info.linesize); + printf("#define DTB_DEFAULT_ENTRIES %d\n", info.linesize); } features = get_cputype(GET_FEATURE); @@ -1422,7 +1422,7 @@ void get_cpuconfig(void){ features = get_coretype(); if (features > 0) printf("#define CORE_%s\n", corename[features]); } else { - printf("#define DTB_ENTRIES 16\n"); + printf("#define DTB_DEFAULT_ENTRIES 16\n"); printf("#define L1_CODE_SIZE 8192\n"); printf("#define L1_DATA_SIZE 8192\n"); printf("#define L2_SIZE 0\n"); diff --git a/getarch.c b/getarch.c index 8864753b7..df052df8a 100644 --- a/getarch.c +++ b/getarch.c @@ -130,7 +130,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENTIUM2 " \ "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX" #define LIBNAME "p2" #define CORENAME "P5" @@ -144,7 +144,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENTIUM3 " \ "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE " #define LIBNAME "coppermine" #define CORENAME "COPPERMINE" @@ -158,7 +158,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENTIUM3 " \ "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=524288 -DL2_LINESIZE=32 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE " #define LIBNAME "katmai" #define CORENAME "KATMAI" @@ -172,7 +172,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENTIUM4 " \ "-DL1_DATA_SIZE=8192 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 " #define LIBNAME "northwood" #define CORENAME "NORTHWOOD" @@ -186,7 +186,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENTIUM4 " \ "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3" #define LIBNAME "prescott" #define CORENAME "PRESCOTT" @@ -200,7 +200,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENTIUMM " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 " #define LIBNAME "banias" #define CORENAME "BANIAS" @@ -214,7 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENTIUMM " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 " #define LIBNAME "yonah" #define CORENAME "YONAH" @@ -228,7 +228,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DCORE2 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=256 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3" #define LIBNAME "core2" #define CORENAME "CORE2" @@ -242,7 +242,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENRYN " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=256 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1" #define LIBNAME "penryn" #define CORENAME "PENRYN" @@ -257,7 +257,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ "-DL3_SIZE=16777216 -DL3_LINESIZE=64 " \ - "-DDTB_ENTRIES=256 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1" #define LIBNAME "dunnington" #define CORENAME "DUNNINGTON" @@ -271,7 +271,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DNEHALEM " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2" #define LIBNAME "nehalem" #define CORENAME "NEHALEM" @@ -285,7 +285,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DATOM " \ "-DL1_DATA_SIZE=24576 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3" #define LIBNAME "atom" #define CORENAME "ATOM" @@ -299,7 +299,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DATHLON " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ + "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE " #define LIBNAME "athlon" #define CORENAME "ATHLON" @@ -313,7 +313,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DOPTERON " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ + "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 " #define LIBNAME "opteron" #define CORENAME "OPTERON" @@ -327,7 +327,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DOPTERON " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ + "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3" #define LIBNAME "opteron" #define CORENAME "OPTERON" @@ -341,7 +341,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DBARCELONA " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL3_SIZE=2097152 " \ - "-DDTB_ENTRIES=48 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ + "-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \ "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU" #define LIBNAME "barcelona" @@ -356,7 +356,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DGENERIC " \ "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2" #define LIBNAME "generic" #define CORENAME "GENERIC" @@ -370,7 +370,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DVIAC3 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=65536 -DL2_LINESIZE=32 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 " \ "-DHAVE_MMX -DHAVE_SSE " #define LIBNAME "viac3" #define CORENAME "VIAC3" @@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DNANO " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3" #define LIBNAME "nano" #define CORENAME "NANO" @@ -398,7 +398,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPOWER3 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=2097152 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=256 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "power3" #define CORENAME "POWER3" #endif @@ -411,7 +411,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPOWER4 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 " #define LIBNAME "power4" #define CORENAME "POWER4" #endif @@ -424,7 +424,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPOWER5 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 " #define LIBNAME "power5" #define CORENAME "POWER5" #endif @@ -437,7 +437,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPOWER6 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "power6" #define CORENAME "POWER6" #endif @@ -450,7 +450,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPPCG4 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "ppcg4" #define CORENAME "PPCG4" #endif @@ -463,7 +463,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPPC970 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "ppc970" #define CORENAME "PPC970" #endif @@ -476,7 +476,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPPC970 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=1024976 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "ppc970mp" #define CORENAME "PPC970" #endif @@ -489,7 +489,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPPC440 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=16384 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " #define LIBNAME "ppc440" #define CORENAME "PPC440" #endif @@ -502,7 +502,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPPC440FP2 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=16384 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " #define LIBNAME "ppc440FP2" #define CORENAME "PPC440FP2" #endif @@ -515,7 +515,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DCELL " \ "-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "cell" #define CORENAME "CELL" #endif @@ -528,7 +528,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DSICORTEX " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ - "-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "mips" #define CORENAME "sicortex" #endif @@ -542,7 +542,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DLOONGSON3A " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " #define LIBNAME "loongson3a" #define CORENAME "LOONGSON3A" #else @@ -555,7 +555,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SUBDIRNAME "ia64" #define ARCHCONFIG "-DITANIUM2 " \ "-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \ - "-DL2_SIZE=1572864 -DL2_LINESIZE=128 -DDTB_SIZE=16384 -DDTB_ENTRIES=128 " + "-DL2_SIZE=1572864 -DL2_LINESIZE=128 -DDTB_SIZE=16384 -DDTB_DEFAULT_ENTRIES=128 " #define LIBNAME "itanium2" #define CORENAME "itanium2" #endif @@ -567,7 +567,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SUBDIRNAME "sparc" #define ARCHCONFIG "-DSPARC -DV9 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ - "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_ENTRIES=64 " + "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 " #define LIBNAME "sparc" #define CORENAME "sparc" #endif @@ -579,7 +579,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SUBDIRNAME "sparc" #define ARCHCONFIG "-DSPARC -DV7 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ - "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_ENTRIES=64 " + "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 " #define LIBNAME "sparcv7" #define CORENAME "sparcv7" #endif @@ -592,7 +592,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DGENERIC " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "generic" #define CORENAME "generic" #endif diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index d3734bbd9..aa45d47f8 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -49,6 +49,8 @@ static void init_parameter(void); gotoblas_t TABLE_NAME = { + DTB_DEFAULT_ENTRIES , + GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, 0, 0, 0, From 90481ce7428bac7cb680220316101635d10d565f Mon Sep 17 00:00:00 2001 From: traits Date: Mon, 5 Sep 2011 17:40:55 +0800 Subject: [PATCH 02/17] Updated the doc about 0.1alpha2.3. --- Changelog.txt | 8 ++++++++ Makefile.rule | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Changelog.txt b/Changelog.txt index b431c9723..7d8a06edb 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,12 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.1 alpha2.3 +5-Sep-2011 + +x86/x86_64: + * Added DTB_ENTRIES into dynamic arch setting parameters. Now, + it can read DTB_ENTRIES on runtime. (Refs issue #55 on github) + ==================================================================== Version 0.1 alpha2.2 14-Jul-2011 diff --git a/Makefile.rule b/Makefile.rule index 8d3d73ac9..3b32ded84 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.1alpha2.2 +VERSION = 0.1alpha2.3 # You can specify the target architecture, otherwise it's # automatically detected. From b1fe26c45a3055b1cdf107330d66521425e05d20 Mon Sep 17 00:00:00 2001 From: traits Date: Tue, 6 Sep 2011 14:14:07 +0800 Subject: [PATCH 03/17] refs #55. Changed DTB_ENTRIES to DTB_DEFAULT_ENTRIES in x86 gemv_n kernel codes. --- kernel/x86/gemv_n.S | 2 +- kernel/x86/qgemv_n.S | 2 +- kernel/x86/xgemv_n.S | 4 ++-- kernel/x86/zgemv_n.S | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/x86/gemv_n.S b/kernel/x86/gemv_n.S index 13fd1ed67..652c0bb0f 100644 --- a/kernel/x86/gemv_n.S +++ b/kernel/x86/gemv_n.S @@ -48,7 +48,7 @@ #endif #ifndef P -#define P DTB_ENTRIES +#define P DTB_DEFAULT_ENTRIES #endif #define STACK 16 diff --git a/kernel/x86/qgemv_n.S b/kernel/x86/qgemv_n.S index 842423260..e33bce2df 100644 --- a/kernel/x86/qgemv_n.S +++ b/kernel/x86/qgemv_n.S @@ -48,7 +48,7 @@ #endif #ifndef P -#define P DTB_ENTRIES +#define P DTB_DEFAULT_ENTRIES #endif #define STACK 16 diff --git a/kernel/x86/xgemv_n.S b/kernel/x86/xgemv_n.S index 0bf44455b..32447ba7e 100644 --- a/kernel/x86/xgemv_n.S +++ b/kernel/x86/xgemv_n.S @@ -44,11 +44,11 @@ #endif #if defined(PENTIUM4) || defined(ATHLON) -#define P (DTB_ENTRIES / 2) +#define P (DTB_DEFAULT_ENTRIES / 2) #endif #ifndef P -#define P DTB_ENTRIES +#define P DTB_DEFAULT_ENTRIES #endif #define STACK 16 diff --git a/kernel/x86/zgemv_n.S b/kernel/x86/zgemv_n.S index 8e2b2b842..a3c9174e6 100644 --- a/kernel/x86/zgemv_n.S +++ b/kernel/x86/zgemv_n.S @@ -44,11 +44,11 @@ #endif #if defined(PENTIUM4) || defined(ATHLON) -#define P ((DTB_ENTRIES) >> 1) +#define P ((DTB_DEFAULT_ENTRIES) >> 1) #endif #ifndef P -#define P DTB_ENTRIES +#define P DTB_DEFAULT_ENTRIES #endif #define STACK 16 From 7b410b7f0e94edde2a606593086694ae6bb17be8 Mon Sep 17 00:00:00 2001 From: Zhang Xiianyi Date: Wed, 14 Sep 2011 23:52:51 +0800 Subject: [PATCH 04/17] Fixed #58 zdot SEGFAULT bug with GCC-4.6. Thank Mr. John for this patch. In i386 calling convention, the caller put the address of return value of zdot into the first hidden parameter. Thus, the callee should delete this address before return. Actually, I have fixed the same bug on x86/zdot_sse2.S (issue #32). However, that is not a good implementation which uses 3 instructions. Mr. John told me used "ret $0x4" to skip the first hidden address (4 bytes). --- kernel/x86/xdot.S | 10 +++++++++- kernel/x86/zdot.S | 10 +++++++++- kernel/x86/zdot_sse2.S | 4 +--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/kernel/x86/xdot.S b/kernel/x86/xdot.S index 4a5af4642..929763271 100644 --- a/kernel/x86/xdot.S +++ b/kernel/x86/xdot.S @@ -307,7 +307,11 @@ popl %ebx popl %esi popl %edi +#if defined(F_INTERFACE) && defined(RETURN_BY_STACK) + ret $0x4 +#else ret +#endif ALIGN_3 .L88: @@ -326,6 +330,10 @@ popl %ebx popl %esi popl %edi - ret +#if defined(F_INTERFACE) && defined(RETURN_BY_STACK) + ret $0x4 +#else + ret +#endif EPILOGUE diff --git a/kernel/x86/zdot.S b/kernel/x86/zdot.S index aa4481f97..9d8866ad0 100644 --- a/kernel/x86/zdot.S +++ b/kernel/x86/zdot.S @@ -283,7 +283,11 @@ popl %ebx popl %esi popl %edi +#if defined(DOUBLE) || defined(XDOUBLE) + ret $0x4 +#else ret +#endif ALIGN_3 .L88: @@ -305,6 +309,10 @@ popl %ebx popl %esi popl %edi - ret +#if defined(DOUBLE) || defined(XDOUBLE) + ret $0x4 +#else + ret +#endif EPILOGUE diff --git a/kernel/x86/zdot_sse2.S b/kernel/x86/zdot_sse2.S index 2a174fb5d..efebe637b 100644 --- a/kernel/x86/zdot_sse2.S +++ b/kernel/x86/zdot_sse2.S @@ -1542,7 +1542,5 @@ popl %esi popl %edi /*remove the hidden return value address from the stack.*/ - popl %ecx - xchgl %ecx, 0(%esp) - ret + ret $0x4 EPILOGUE From d0152ec8caa77c5f122572302c8c9589b3eb2909 Mon Sep 17 00:00:00 2001 From: Xianyi Date: Sat, 17 Sep 2011 02:27:56 +0800 Subject: [PATCH 05/17] Fixed #61 a building bug about setting TARGET and DYNAMIC_ARCH at the same time. --- Makefile.system | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Makefile.system b/Makefile.system index f0487ac50..84f41a78f 100644 --- a/Makefile.system +++ b/Makefile.system @@ -27,7 +27,13 @@ HOSTCC = $(CC) endif ifdef TARGET -GETARCH_FLAGS += -DFORCE_$(TARGET) +GETARCH_FLAGS := -DFORCE_$(TARGET) +endif + +#TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1. +# +ifdef TARGET_CORE +GETARCH_FLAGS := -DFORCE_$(TARGET_CORE) endif ifdef INTERFACE64 From 68cae521dff0eb87a9de52685b8dee0f7b7e7418 Mon Sep 17 00:00:00 2001 From: Xianyi Date: Sat, 17 Sep 2011 02:58:01 +0800 Subject: [PATCH 06/17] Refs #57. The bug about absolute path of shared library on Mac OSX. OSX cann't use relative path in shared library. Thank Mr.Kane for this patch. The detail is in this link (https://github.com/xianyi/OpenBLAS/issues/57). --- Makefile | 5 +++++ Makefile.install | 1 + exports/Makefile | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6789272a3..90a3cda09 100644 --- a/Makefile +++ b/Makefile @@ -56,6 +56,11 @@ ifndef SMP else @echo " (Multi threaded; Max num-threads is $(NUM_THREADS))" endif + +ifeq ($(OSNAME), Darwin) + @echo "Because absolute path issue, $(LIBDYNNAME) may not work under this directory." + @echo "Thus, you need run \"make PREFIX=/your_installation_path/ install\"." +endif @echo shared : diff --git a/Makefile.install b/Makefile.install index 80dafc9c6..1fbb2c430 100644 --- a/Makefile.install +++ b/Makefile.install @@ -50,6 +50,7 @@ ifeq ($(OSNAME), NetBSD) endif ifeq ($(OSNAME), Darwin) -cp $(LIBDYNNAME) $(PREFIX) + -install_name_tool -add_rpath $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/$(LIBDYNNAME) -ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib endif ifeq ($(OSNAME), WINNT) diff --git a/exports/Makefile b/exports/Makefile index f4c9314f9..0a2f3184f 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -85,7 +85,7 @@ libgoto_hpl.def : gensymbol perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F) $(LIBDYNNAME) : ../$(LIBNAME) osx.def - $(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) + $(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name `pwd`/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) symbol.$(SUFFIX) : symbol.S $(CC) $(CFLAGS) -c -o $(@F) $^ From 864c68ffc5a4882c2a88e0c9f8f32ead96271fde Mon Sep 17 00:00:00 2001 From: Xianyi Date: Sat, 17 Sep 2011 03:05:26 +0800 Subject: [PATCH 07/17] Bump the version number. --- Makefile.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.rule b/Makefile.rule index 3b32ded84..a7ba203fc 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.1alpha2.3 +VERSION = 0.1alpha2.4 # You can specify the target architecture, otherwise it's # automatically detected. From 756477bfe3791f13ed270d1d75ad1dfccbf83bc1 Mon Sep 17 00:00:00 2001 From: Xianyi Date: Sat, 17 Sep 2011 07:21:11 +0800 Subject: [PATCH 08/17] Output the installation tip after building complete. --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 90a3cda09..0c82d4a95 100644 --- a/Makefile +++ b/Makefile @@ -62,6 +62,8 @@ ifeq ($(OSNAME), Darwin) @echo "Thus, you need run \"make PREFIX=/your_installation_path/ install\"." endif @echo + @echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"." + @echo shared : ifeq ($(OSNAME), Linux) From 821cbb29958b15bff260d72553b5880bbddf704f Mon Sep 17 00:00:00 2001 From: Xianyi Date: Sat, 17 Sep 2011 07:55:59 +0800 Subject: [PATCH 09/17] Updated the document for 0.1 alpha 2.4. --- Changelog.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Changelog.txt b/Changelog.txt index 7d8a06edb..cd67f333d 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,19 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.1 alpha2.4 +16-Sep-2011 +common: + * Fixed a bug about installation. The header file "fblas77.h" + works fine now. + * Fixed #61 a building bug about setting TARGET and DYNAMIC_ARCH. + * Try to handle absolute path of shared library in OSX. (#57) + Thank Mr.Kane. + +x86/x86_64: + * Fixed #58 zdot/xdot SEGFAULT bug with GCC-4.6 on x86. According + to i386 calling convention, The callee should remove the first + hidden parameter.Thank Mr. John for this patch. + ==================================================================== Version 0.1 alpha2.3 5-Sep-2011 From bcc795621607b59c31fb7b59fc30d3eba15729b9 Mon Sep 17 00:00:00 2001 From: Xianyi Date: Sun, 18 Sep 2011 01:35:12 +0800 Subject: [PATCH 10/17] Refs #57. Continue to fix absolute path issue about shared library on Mac OSX. Used $(CURDIR) instead of pwd in generating shared library on Mac OSX. Add more tips about this issue. Thank Dr Kane O'Donnell. --- Makefile | 11 +++++++++-- Makefile.install | 4 ++-- exports/Makefile | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 573c44c33..56d491077 100644 --- a/Makefile +++ b/Makefile @@ -58,8 +58,15 @@ else endif ifeq ($(OSNAME), Darwin) - @echo "Because absolute path issue, $(LIBDYNNAME) may not work under this directory." - @echo "Thus, you need run \"make PREFIX=/your_installation_path/ install\"." + @echo "WARNING: If you plan to use the dynamic library $(LIBDYNNAME), you must run:" + @echo + @echo "\"make PREFIX=/your_installation_path/ install\"." + @echo + @echo "(or set PREFIX in Makefile.rule and run make install." + @echo "If you want to move the .dylib to a new location later, make sure you change" + @echo "the internal name of the dylib with:" + @echo + @echo "install_name_tool -id /new/absolute/path/to/$(LIBDYNNAME) $(LIBDYNNAME)" endif @echo @echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"." diff --git a/Makefile.install b/Makefile.install index 1fbb2c430..5b5895c1c 100644 --- a/Makefile.install +++ b/Makefile.install @@ -48,9 +48,9 @@ ifeq ($(OSNAME), NetBSD) -cp $(LIBSONAME) $(PREFIX) -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so endif -ifeq ($(OSNAME), Darwin) +ifeq ($(OSNAME), Darwin) -cp $(LIBDYNNAME) $(PREFIX) - -install_name_tool -add_rpath $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/$(LIBDYNNAME) + -install_name_tool -id $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/$(LIBDYNNAME) -ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib endif ifeq ($(OSNAME), WINNT) diff --git a/exports/Makefile b/exports/Makefile index 0a2f3184f..08f496501 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -85,7 +85,7 @@ libgoto_hpl.def : gensymbol perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F) $(LIBDYNNAME) : ../$(LIBNAME) osx.def - $(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name `pwd`/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) + $(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) symbol.$(SUFFIX) : symbol.S $(CC) $(CFLAGS) -c -o $(@F) $^ From d40e5621e95f5633efbbe94b07c3309713ba6432 Mon Sep 17 00:00:00 2001 From: Xianyi Date: Sun, 18 Sep 2011 05:07:00 +0800 Subject: [PATCH 11/17] Change the installation folder into /include and /lib. --- Makefile.install | 69 ++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/Makefile.install b/Makefile.install index 5b5895c1c..2778a491f 100644 --- a/Makefile.install +++ b/Makefile.install @@ -3,6 +3,9 @@ export GOTOBLAS_MAKEFILE = 1 -include $(TOPDIR)/Makefile.conf_last include ./Makefile.system +OPENBLAS_INCLUDE_DIR:=$(PREFIX)/include +OPENBLAS_LIBRARY_DIR:=$(PREFIX)/lib + .PHONY : install .NOTPARALLEL : install @@ -11,55 +14,57 @@ lib.grd : install : lib.grd @-mkdir -p $(PREFIX) - @echo Generating openblas_config.h in $(PREFIX) + @-mkdir -p $(OPENBLAS_INCLUDE_DIR) + @-mkdir -p $(OPENBLAS_LIBRARY_DIR) + @echo Generating openblas_config.h in $(OPENBLAS_INCLUDE_DIR) #for inc - @echo \#ifndef OPENBLAS_CONFIG_H > $(PREFIX)/openblas_config.h - @echo \#define OPENBLAS_CONFIG_H >> $(PREFIX)/openblas_config.h - @cat config_last.h >> $(PREFIX)/openblas_config.h - @echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(PREFIX)/openblas_config.h - @cat openblas_config_template.h >> $(PREFIX)/openblas_config.h - @echo \#endif >> $(PREFIX)/openblas_config.h + @echo \#ifndef OPENBLAS_CONFIG_H > $(OPENBLAS_INCLUDE_DIR)/openblas_config.h + @echo \#define OPENBLAS_CONFIG_H >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h + @cat config_last.h >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h + @echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h + @cat openblas_config_template.h >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h + @echo \#endif >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h - @echo Generating f77blas.h in $(PREFIX) - @echo \#ifndef OPENBLAS_F77BLAS_H > $(PREFIX)/f77blas.h - @echo \#define OPENBLAS_F77BLAS_H >> $(PREFIX)/f77blas.h - @echo \#include \"openblas_config.h\" >> $(PREFIX)/f77blas.h - @cat common_interface.h >> $(PREFIX)/f77blas.h - @echo \#endif >> $(PREFIX)/f77blas.h + @echo Generating f77blas.h in $(OPENBLAS_INCLUDE_DIR) + @echo \#ifndef OPENBLAS_F77BLAS_H > $(OPENBLAS_INCLUDE_DIR)/f77blas.h + @echo \#define OPENBLAS_F77BLAS_H >> $(OPENBLAS_INCLUDE_DIR)/f77blas.h + @echo \#include \"openblas_config.h\" >> $(OPENBLAS_INCLUDE_DIR)/f77blas.h + @cat common_interface.h >> $(OPENBLAS_INCLUDE_DIR)/f77blas.h + @echo \#endif >> $(OPENBLAS_INCLUDE_DIR)/f77blas.h - @echo Generating cblas.h in $(PREFIX) - @sed 's/common/openblas_config/g' cblas.h > $(PREFIX)/cblas.h + @echo Generating cblas.h in $(OPENBLAS_INCLUDE_DIR) + @sed 's/common/openblas_config/g' cblas.h > $(OPENBLAS_INCLUDE_DIR)/cblas.h #for install static library - @echo Copy the static library to $(PREFIX) - @cp $(LIBNAME) $(PREFIX) - @-ln -fs $(PREFIX)/$(LIBNAME) $(PREFIX)/libopenblas.$(LIBSUFFIX) + @echo Copy the static library to $(OPENBLAS_LIBRARY_DIR) + @cp $(LIBNAME) $(OPENBLAS_LIBRARY_DIR) + @-ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBNAME) $(OPENBLAS_LIBRARY_DIR)/libopenblas.$(LIBSUFFIX) #for install shared library - @echo Copy the shared library to $(PREFIX) + @echo Copy the shared library to $(OPENBLAS_LIBRARY_DIR) ifeq ($(OSNAME), Linux) - -cp $(LIBSONAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so + -cp $(LIBSONAME) $(OPENBLAS_LIBRARY_DIR) + -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBSONAME) $(OPENBLAS_LIBRARY_DIR)/libopenblas.so endif ifeq ($(OSNAME), FreeBSD) - -cp $(LIBSONAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so + -cp $(LIBSONAME) $(OPENBLAS_LIBRARY_DIR) + -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBSONAME) $(OPENBLAS_LIBRARY_DIR)/libopenblas.so endif ifeq ($(OSNAME), NetBSD) - -cp $(LIBSONAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so + -cp $(LIBSONAME) $(OPENBLAS_LIBRARY_DIR) + -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBSONAME) $(OPENBLAS_LIBRARY_DIR)/libopenblas.so endif ifeq ($(OSNAME), Darwin) - -cp $(LIBDYNNAME) $(PREFIX) - -install_name_tool -id $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/$(LIBDYNNAME) - -ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib + -cp $(LIBDYNNAME) $(OPENBLAS_LIBRARY_DIR) + -install_name_tool -id $(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) + -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(OPENBLAS_LIBRARY_DIR)/libopenblas.dylib endif ifeq ($(OSNAME), WINNT) - -cp $(LIBDLLNAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll + -cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR) + -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)/libopenblas.dll endif ifeq ($(OSNAME), CYGWIN_NT) - -cp $(LIBDLLNAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll + -cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR) + -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)/libopenblas.dll endif @echo Install OK! From 1d31c79dc938183203be5e16cc6851a0bfa1a5e6 Mon Sep 17 00:00:00 2001 From: Xianyi Date: Sun, 18 Sep 2011 05:46:08 +0800 Subject: [PATCH 12/17] Prepared the document for 0.1 alpha 2.4 version. --- Changelog.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Changelog.txt b/Changelog.txt index cd67f333d..48c5a727d 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,13 +1,15 @@ OpenBLAS ChangeLog ==================================================================== Version 0.1 alpha2.4 -16-Sep-2011 +18-Sep-2011 common: * Fixed a bug about installation. The header file "fblas77.h" works fine now. * Fixed #61 a building bug about setting TARGET and DYNAMIC_ARCH. * Try to handle absolute path of shared library in OSX. (#57) - Thank Mr.Kane. + Thank Dr Kane O'Donnell. + * Changed the installation folder layout to $(PREFIX)/include and + $(PREFIX)/lib x86/x86_64: * Fixed #58 zdot/xdot SEGFAULT bug with GCC-4.6 on x86. According From 57658a8c14b0121aa53f1b6a3ad980f858a170ea Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Sun, 9 Oct 2011 15:14:48 +0800 Subject: [PATCH 13/17] ref #62. Added the user friendly message with USE_OPENMP=1. The users should use OMP_NUM_THREADS. When OpenBLAS is compiled with USE_OPENMP=1, it ignores OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags.Therefore, you should use OMP_NUM_THREADS. Without setting OMP_NUM_THREADS, a process will use maximal number of threads on a computing node. Thus, if there are 2 processes on the computing node, the thread will contend against other threads on CPU cores. As a result, the application will hang. --- Makefile | 7 +++++++ README | 2 +- ctest/Makefile | 21 ++++++++++++++++++++ test/Makefile | 53 ++++++++++++++++++++++++++++++++++++++------------ 4 files changed, 70 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 56d491077..af8b793c8 100644 --- a/Makefile +++ b/Makefile @@ -57,6 +57,13 @@ else @echo " (Multi threaded; Max num-threads is $(NUM_THREADS))" endif +ifeq ($(USE_OPENMP), 1) + @echo + @echo " Use OpenMP in the multithreading. Becasue of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, " + @echo " you should use OMP_NUM_THREADS environment variable to control the number of threads." + @echo +endif + ifeq ($(OSNAME), Darwin) @echo "WARNING: If you plan to use the dynamic library $(LIBDYNNAME), you must run:" @echo diff --git a/README b/README index 248741544..79ab48d8b 100644 --- a/README +++ b/README @@ -48,7 +48,7 @@ export OMP_NUM_THREADS=4 The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS. -If you compile this lib with USE_OPENMP=1, you should only set OMP_NUM_THREADS environment variable. +If you compile this lib with USE_OPENMP=1, you should set OMP_NUM_THREADS environment variable. OpenBLAS ignores OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS with USE_OPENMP=1. 4.2 Set the number of threads with calling functions. for example, void goto_set_num_threads(int num_threads); diff --git a/ctest/Makefile b/ctest/Makefile index c15a68109..1e07bd154 100644 --- a/ctest/Makefile +++ b/ctest/Makefile @@ -36,22 +36,43 @@ ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o constant.o all :: all1 all2 all3 all1: xscblat1 xdcblat1 xccblat1 xzcblat1 +ifeq ($(USE_OPENMP), 1) + OMP_NUM_THREADS=2 ./xscblat1 + OMP_NUM_THREADS=2 ./xdcblat1 + OMP_NUM_THREADS=2 ./xccblat1 + OMP_NUM_THREADS=2 ./xzcblat1 +else OPENBLAS_NUM_THREADS=2 ./xscblat1 OPENBLAS_NUM_THREADS=2 ./xdcblat1 OPENBLAS_NUM_THREADS=2 ./xccblat1 OPENBLAS_NUM_THREADS=2 ./xzcblat1 +endif all2: xscblat2 xdcblat2 xccblat2 xzcblat2 +ifeq ($(USE_OPENMP), 1) + OMP_NUM_THREADS=2 ./xscblat2 < sin2 + OMP_NUM_THREADS=2 ./xdcblat2 < din2 + OMP_NUM_THREADS=2 ./xccblat2 < cin2 + OMP_NUM_THREADS=2 ./xzcblat2 < zin2 +else OPENBLAS_NUM_THREADS=2 ./xscblat2 < sin2 OPENBLAS_NUM_THREADS=2 ./xdcblat2 < din2 OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2 OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2 +endif all3: xscblat3 xdcblat3 xccblat3 xzcblat3 +ifeq ($(USE_OPENMP), 1) + OMP_NUM_THREADS=2 ./xscblat3 < sin3 + OMP_NUM_THREADS=2 ./xdcblat3 < din3 + OMP_NUM_THREADS=2 ./xccblat3 < cin3 + OMP_NUM_THREADS=2 ./xzcblat3 < zin3 +else OPENBLAS_NUM_THREADS=2 ./xscblat3 < sin3 OPENBLAS_NUM_THREADS=2 ./xdcblat3 < din3 OPENBLAS_NUM_THREADS=2 ./xccblat3 < cin3 OPENBLAS_NUM_THREADS=2 ./xzcblat3 < zin3 +endif clean :: rm -f x* diff --git a/test/Makefile b/test/Makefile index a5308e446..2df499b11 100644 --- a/test/Makefile +++ b/test/Makefile @@ -4,29 +4,46 @@ include ../Makefile.system all :: level1 level2 level3 level1 : sblat1 dblat1 cblat1 zblat1 - OPENBLAS_NUM_THREADS=1 ./sblat1 - OPENBLAS_NUM_THREADS=1 ./dblat1 - OPENBLAS_NUM_THREADS=1 ./cblat1 - OPENBLAS_NUM_THREADS=1 ./zblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat1 ifdef SMP +ifeq ($(USE_OPENMP), 1) + OMP_NUM_THREADS=2 ./sblat1 + OMP_NUM_THREADS=2 ./dblat1 + OMP_NUM_THREADS=2 ./cblat1 + OMP_NUM_THREADS=2 ./zblat1 +else OPENBLAS_NUM_THREADS=2 ./sblat1 OPENBLAS_NUM_THREADS=2 ./dblat1 OPENBLAS_NUM_THREADS=2 ./cblat1 OPENBLAS_NUM_THREADS=2 ./zblat1 endif +endif level2 : sblat2 dblat2 cblat2 zblat2 rm -f ?BLAT2.SUMM - OPENBLAS_NUM_THREADS=1 ./sblat2 < ./sblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat2 < ./sblat2.dat @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 - OPENBLAS_NUM_THREADS=1 ./dblat2 < ./dblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat2 < ./dblat2.dat @$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 - OPENBLAS_NUM_THREADS=1 ./cblat2 < ./cblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat2 < ./cblat2.dat @$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 - OPENBLAS_NUM_THREADS=1 ./zblat2 < ./zblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat2 < ./zblat2.dat @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 ifdef SMP rm -f ?BLAT2.SUMM +ifeq ($(USE_OPENMP), 1) + OMP_NUM_THREADS=2 ./sblat2 < ./sblat2.dat + @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 + OMP_NUM_THREADS=2 ./dblat2 < ./dblat2.dat + @$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 + OMP_NUM_THREADS=2 ./cblat2 < ./cblat2.dat + @$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 + OMP_NUM_THREADS=2 ./zblat2 < ./zblat2.dat + @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 +else OPENBLAS_NUM_THREADS=2 ./sblat2 < ./sblat2.dat @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 OPENBLAS_NUM_THREADS=2 ./dblat2 < ./dblat2.dat @@ -36,19 +53,30 @@ ifdef SMP OPENBLAS_NUM_THREADS=2 ./zblat2 < ./zblat2.dat @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 endif +endif level3 : sblat3 dblat3 cblat3 zblat3 rm -f ?BLAT3.SUMM - OPENBLAS_NUM_THREADS=1 ./sblat3 < ./sblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat3 < ./sblat3.dat @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 - OPENBLAS_NUM_THREADS=1 ./dblat3 < ./dblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat3 < ./dblat3.dat @$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 - OPENBLAS_NUM_THREADS=1 ./cblat3 < ./cblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat3 < ./cblat3.dat @$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 - OPENBLAS_NUM_THREADS=1 ./zblat3 < ./zblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat3 < ./zblat3.dat @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 ifdef SMP rm -f ?BLAT3.SUMM +ifeq ($(USE_OPENMP), 1) + OMP_NUM_THREADS=2 ./sblat3 < ./sblat3.dat + @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 + OMP_NUM_THREADS=2 ./dblat3 < ./dblat3.dat + @$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 + OMP_NUM_THREADS=2 ./cblat3 < ./cblat3.dat + @$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 + OMP_NUM_THREADS=2 ./zblat3 < ./zblat3.dat + @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 +else OPENBLAS_NUM_THREADS=2 ./sblat3 < ./sblat3.dat @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 OPENBLAS_NUM_THREADS=2 ./dblat3 < ./dblat3.dat @@ -58,6 +86,7 @@ ifdef SMP OPENBLAS_NUM_THREADS=2 ./zblat3 < ./zblat3.dat @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 endif +endif FLDFLAGS = $(FFLAGS:-fPIC=) CEXTRALIB = From 66a3c6df4e4bcdb11031588a5d24409c0746d0b5 Mon Sep 17 00:00:00 2001 From: traits Date: Sun, 9 Oct 2011 17:25:44 +0800 Subject: [PATCH 14/17] Ref #63. Fixed generating DLL bug on ming-w64. --- exports/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exports/Makefile b/exports/Makefile index 08f496501..69050989c 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -66,7 +66,7 @@ ifeq ($(BINARY32), 1) -lib /machine:i386 /def:libgoto2.def else $(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ - --entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) + --entry $(FU)dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) -lib /machine:X64 /def:libgoto2.def endif From ba31b19c00550673410250f6b33aa70f7a4fa400 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Sun, 16 Oct 2011 22:56:19 +0800 Subject: [PATCH 15/17] Ref #62. In OpenMP implementation, check the return value of omp_get_max_threads(). It makes sure the return value as same as blas_cpu_numbers which is an internal global variable to store the number of threads in OpenBLAS. --- common_thread.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/common_thread.h b/common_thread.h index d74af3287..dc963a635 100644 --- a/common_thread.h +++ b/common_thread.h @@ -39,6 +39,11 @@ #ifndef COMMON_THREAD #define COMMON_THREAD +#ifdef USE_OPENMP +#include +extern void goto_set_num_threads(int nthreads); +#endif + /* Basic Thread Debugging */ #undef SMP_DEBUG @@ -126,6 +131,10 @@ extern int blas_server_avail; static __inline int num_cpu_avail(int level) { +#ifdef USE_OPENMP + int openmp_nthreads=0; +#endif + if ((blas_cpu_number == 1) #ifdef USE_OPENMP @@ -133,6 +142,13 @@ static __inline int num_cpu_avail(int level) { #endif ) return 1; +#ifdef USE_OPENMP + openmp_nthreads=omp_get_max_threads(); + if (blas_cpu_number != openmp_nthreads) { + goto_set_num_threads(openmp_nthreads); + } +#endif + return blas_cpu_number; } From c852ce398109e12f9b18871102295f29eecb1f1b Mon Sep 17 00:00:00 2001 From: traits Date: Tue, 18 Oct 2011 10:23:17 +0800 Subject: [PATCH 16/17] Ref #65. Fixed 64-bit Windows calling convention bug in cdot and zdot. According to 64-bit Windows calling convention, the return value is in %rax instead of %xmm0 in cdot kernel. In zdot, the caller allocates a memory space for return value and sets this memory address to the first hidden parameter. Thus, the callee (zdot) should assign the result to this memory space and return the memory address in %rax. --- kernel/x86_64/zdot_sse.S | 4 ++++ kernel/x86_64/zdot_sse2.S | 18 +++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/kernel/x86_64/zdot_sse.S b/kernel/x86_64/zdot_sse.S index 3302b9088..13804e0f8 100644 --- a/kernel/x86_64/zdot_sse.S +++ b/kernel/x86_64/zdot_sse.S @@ -3483,6 +3483,10 @@ subss %xmm3, %xmm1 #endif unpcklps %xmm1, %xmm0 + +#ifdef WINDOWS_ABI + movq %xmm0, %rax +#endif RESTOREREGISTERS diff --git a/kernel/x86_64/zdot_sse2.S b/kernel/x86_64/zdot_sse2.S index 77fa8e378..63acecc08 100644 --- a/kernel/x86_64/zdot_sse2.S +++ b/kernel/x86_64/zdot_sse2.S @@ -39,14 +39,19 @@ #define ASSEMBLER #include "common.h" +#ifndef WINDOWS_ABI #define N ARG1 /* rdi */ #define X ARG2 /* rsi */ #define INCX ARG3 /* rdx */ #define Y ARG4 /* rcx */ -#ifndef WINDOWS_ABI #define INCY ARG5 /* r8 */ #else -#define INCY %r10 +#define RESULT_ADDRESS ARG1 /*rcx*/ +#define N ARG2 /* rdx */ +#define X ARG3 /* r8 */ +#define INCX ARG4 /* r9*/ +#define Y %r10 +#define INCY %r11 #endif #include "l1param.h" @@ -64,7 +69,8 @@ PROFCODE #ifdef WINDOWS_ABI - movq 40(%rsp), INCY + movq 40(%rsp), Y + movq 48(%rsp), INCY #endif SAVEREGISTERS @@ -1544,6 +1550,12 @@ subsd %xmm3, %xmm1 #endif +#ifdef WINDOWS_ABI + movq RESULT_ADDRESS, %rax + movsd %xmm0, (%rax) + movsd %xmm1, 8(%rax) +#endif + RESTOREREGISTERS ret From 19f5b5c13226ca469b0e0bc29a62d532d3770d36 Mon Sep 17 00:00:00 2001 From: traits Date: Tue, 18 Oct 2011 18:44:23 +0800 Subject: [PATCH 17/17] Fixed #66 the bug in zgemv kernel with transpose matrix on 64-bit MingW (Windows). --- kernel/x86_64/zgemv_t.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/x86_64/zgemv_t.S b/kernel/x86_64/zgemv_t.S index d7f9d49fe..14abc8bfa 100644 --- a/kernel/x86_64/zgemv_t.S +++ b/kernel/x86_64/zgemv_t.S @@ -131,8 +131,8 @@ movq OLD_LDA, LDA movq OLD_X, X - movaps %xmm3, %xmm0 - movss OLD_ALPHA_I, %xmm1 + movapd %xmm3, %xmm0 + movsd OLD_ALPHA_I, %xmm1 #endif movq OLD_INCX, INCX