diff --git a/Changelog.txt b/Changelog.txt index b431c9723..48c5a727d 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,29 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.1 alpha2.4 +18-Sep-2011 +common: + * Fixed a bug about installation. The header file "fblas77.h" + works fine now. + * Fixed #61 a building bug about setting TARGET and DYNAMIC_ARCH. + * Try to handle absolute path of shared library in OSX. (#57) + Thank Dr Kane O'Donnell. + * Changed the installation folder layout to $(PREFIX)/include and + $(PREFIX)/lib + +x86/x86_64: + * Fixed #58 zdot/xdot SEGFAULT bug with GCC-4.6 on x86. According + to i386 calling convention, The callee should remove the first + hidden parameter.Thank Mr. John for this patch. + +==================================================================== +Version 0.1 alpha2.3 +5-Sep-2011 + +x86/x86_64: + * Added DTB_ENTRIES into dynamic arch setting parameters. Now, + it can read DTB_ENTRIES on runtime. (Refs issue #55 on github) + ==================================================================== Version 0.1 alpha2.2 14-Jul-2011 diff --git a/Makefile b/Makefile index d86fbadf3..af8b793c8 100644 --- a/Makefile +++ b/Makefile @@ -56,6 +56,27 @@ ifndef SMP else @echo " (Multi threaded; Max num-threads is $(NUM_THREADS))" endif + +ifeq ($(USE_OPENMP), 1) + @echo + @echo " Use OpenMP in the multithreading. Becasue of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, " + @echo " you should use OMP_NUM_THREADS environment variable to control the number of threads." + @echo +endif + +ifeq ($(OSNAME), Darwin) + @echo "WARNING: If you plan to use the dynamic library $(LIBDYNNAME), you must run:" + @echo + @echo "\"make PREFIX=/your_installation_path/ install\"." + @echo + @echo "(or set PREFIX in Makefile.rule and run make install." + @echo "If you want to move the .dylib to a new location later, make sure you change" + @echo "the internal name of the dylib with:" + @echo + @echo "install_name_tool -id /new/absolute/path/to/$(LIBDYNNAME) $(LIBDYNNAME)" +endif + @echo + @echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"." @echo shared : diff --git a/Makefile.install b/Makefile.install index 80dafc9c6..2778a491f 100644 --- a/Makefile.install +++ b/Makefile.install @@ -3,6 +3,9 @@ export GOTOBLAS_MAKEFILE = 1 -include $(TOPDIR)/Makefile.conf_last include ./Makefile.system +OPENBLAS_INCLUDE_DIR:=$(PREFIX)/include +OPENBLAS_LIBRARY_DIR:=$(PREFIX)/lib + .PHONY : install .NOTPARALLEL : install @@ -11,54 +14,57 @@ lib.grd : install : lib.grd @-mkdir -p $(PREFIX) - @echo Generating openblas_config.h in $(PREFIX) + @-mkdir -p $(OPENBLAS_INCLUDE_DIR) + @-mkdir -p $(OPENBLAS_LIBRARY_DIR) + @echo Generating openblas_config.h in $(OPENBLAS_INCLUDE_DIR) #for inc - @echo \#ifndef OPENBLAS_CONFIG_H > $(PREFIX)/openblas_config.h - @echo \#define OPENBLAS_CONFIG_H >> $(PREFIX)/openblas_config.h - @cat config_last.h >> $(PREFIX)/openblas_config.h - @echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(PREFIX)/openblas_config.h - @cat openblas_config_template.h >> $(PREFIX)/openblas_config.h - @echo \#endif >> $(PREFIX)/openblas_config.h + @echo \#ifndef OPENBLAS_CONFIG_H > $(OPENBLAS_INCLUDE_DIR)/openblas_config.h + @echo \#define OPENBLAS_CONFIG_H >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h + @cat config_last.h >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h + @echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h + @cat openblas_config_template.h >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h + @echo \#endif >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h - @echo Generating f77blas.h in $(PREFIX) - @echo \#ifndef OPENBLAS_F77BLAS_H > $(PREFIX)/f77blas.h - @echo \#define OPENBLAS_F77BLAS_H >> $(PREFIX)/f77blas.h - @echo \#include \"openblas_config.h\" >> $(PREFIX)/f77blas.h - @cat common_interface.h >> $(PREFIX)/f77blas.h - @echo \#endif >> $(PREFIX)/f77blas.h + @echo Generating f77blas.h in $(OPENBLAS_INCLUDE_DIR) + @echo \#ifndef OPENBLAS_F77BLAS_H > $(OPENBLAS_INCLUDE_DIR)/f77blas.h + @echo \#define OPENBLAS_F77BLAS_H >> $(OPENBLAS_INCLUDE_DIR)/f77blas.h + @echo \#include \"openblas_config.h\" >> $(OPENBLAS_INCLUDE_DIR)/f77blas.h + @cat common_interface.h >> $(OPENBLAS_INCLUDE_DIR)/f77blas.h + @echo \#endif >> $(OPENBLAS_INCLUDE_DIR)/f77blas.h - @echo Generating cblas.h in $(PREFIX) - @sed 's/common/openblas_config/g' cblas.h > $(PREFIX)/cblas.h + @echo Generating cblas.h in $(OPENBLAS_INCLUDE_DIR) + @sed 's/common/openblas_config/g' cblas.h > $(OPENBLAS_INCLUDE_DIR)/cblas.h #for install static library - @echo Copy the static library to $(PREFIX) - @cp $(LIBNAME) $(PREFIX) - @-ln -fs $(PREFIX)/$(LIBNAME) $(PREFIX)/libopenblas.$(LIBSUFFIX) + @echo Copy the static library to $(OPENBLAS_LIBRARY_DIR) + @cp $(LIBNAME) $(OPENBLAS_LIBRARY_DIR) + @-ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBNAME) $(OPENBLAS_LIBRARY_DIR)/libopenblas.$(LIBSUFFIX) #for install shared library - @echo Copy the shared library to $(PREFIX) + @echo Copy the shared library to $(OPENBLAS_LIBRARY_DIR) ifeq ($(OSNAME), Linux) - -cp $(LIBSONAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so + -cp $(LIBSONAME) $(OPENBLAS_LIBRARY_DIR) + -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBSONAME) $(OPENBLAS_LIBRARY_DIR)/libopenblas.so endif ifeq ($(OSNAME), FreeBSD) - -cp $(LIBSONAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so + -cp $(LIBSONAME) $(OPENBLAS_LIBRARY_DIR) + -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBSONAME) $(OPENBLAS_LIBRARY_DIR)/libopenblas.so endif ifeq ($(OSNAME), NetBSD) - -cp $(LIBSONAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so + -cp $(LIBSONAME) $(OPENBLAS_LIBRARY_DIR) + -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBSONAME) $(OPENBLAS_LIBRARY_DIR)/libopenblas.so endif -ifeq ($(OSNAME), Darwin) - -cp $(LIBDYNNAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib +ifeq ($(OSNAME), Darwin) + -cp $(LIBDYNNAME) $(OPENBLAS_LIBRARY_DIR) + -install_name_tool -id $(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) + -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(OPENBLAS_LIBRARY_DIR)/libopenblas.dylib endif ifeq ($(OSNAME), WINNT) - -cp $(LIBDLLNAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll + -cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR) + -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)/libopenblas.dll endif ifeq ($(OSNAME), CYGWIN_NT) - -cp $(LIBDLLNAME) $(PREFIX) - -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll + -cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR) + -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)/libopenblas.dll endif @echo Install OK! diff --git a/Makefile.rule b/Makefile.rule index 8d3d73ac9..a7ba203fc 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.1alpha2.2 +VERSION = 0.1alpha2.4 # You can specify the target architecture, otherwise it's # automatically detected. diff --git a/Makefile.system b/Makefile.system index f0487ac50..84f41a78f 100644 --- a/Makefile.system +++ b/Makefile.system @@ -27,7 +27,13 @@ HOSTCC = $(CC) endif ifdef TARGET -GETARCH_FLAGS += -DFORCE_$(TARGET) +GETARCH_FLAGS := -DFORCE_$(TARGET) +endif + +#TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1. +# +ifdef TARGET_CORE +GETARCH_FLAGS := -DFORCE_$(TARGET_CORE) endif ifdef INTERFACE64 diff --git a/README b/README index b67db1169..c8c2c2c55 100644 --- a/README +++ b/README @@ -48,7 +48,7 @@ export OMP_NUM_THREADS=4 The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS. -If you compile this lib with USE_OPENMP=1, you should only set OMP_NUM_THREADS environment variable. +If you compile this lib with USE_OPENMP=1, you should set OMP_NUM_THREADS environment variable. OpenBLAS ignores OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS with USE_OPENMP=1. 4.2 Set the number of threads with calling functions. for example, void goto_set_num_threads(int num_threads); diff --git a/common_param.h b/common_param.h index c4580cc22..e978193d4 100644 --- a/common_param.h +++ b/common_param.h @@ -44,6 +44,7 @@ #ifdef DYNAMIC_ARCH typedef struct { + int dtb_entries; int offsetA, offsetB, align; int sgemm_p, sgemm_q, sgemm_r; @@ -813,6 +814,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); extern gotoblas_t *gotoblas; +#define DTB_ENTRIES gotoblas -> dtb_entries #define GEMM_OFFSET_A gotoblas -> offsetA #define GEMM_OFFSET_B gotoblas -> offsetB #define GEMM_ALIGN gotoblas -> align @@ -863,6 +865,8 @@ extern gotoblas_t *gotoblas; #else +#define DTB_ENTRIES DTB_DEFAULT_ENTRIES + #define GEMM_OFFSET_A GEMM_DEFAULT_OFFSET_A #define GEMM_OFFSET_B GEMM_DEFAULT_OFFSET_B #define GEMM_ALIGN GEMM_DEFAULT_ALIGN @@ -997,14 +1001,14 @@ extern gotoblas_t *gotoblas; #endif #ifdef XDOUBLE -#define GEMM3M_UNROLL_M QGEMM_DEFAULT_UNROLL_M -#define GEMM3M_UNROLL_N QGEMM_DEFAULT_UNROLL_N +#define GEMM3M_UNROLL_M QGEMM_UNROLL_M +#define GEMM3M_UNROLL_N QGEMM_UNROLL_N #elif defined(DOUBLE) -#define GEMM3M_UNROLL_M DGEMM_DEFAULT_UNROLL_M -#define GEMM3M_UNROLL_N DGEMM_DEFAULT_UNROLL_N +#define GEMM3M_UNROLL_M DGEMM_UNROLL_M +#define GEMM3M_UNROLL_N DGEMM_UNROLL_N #else -#define GEMM3M_UNROLL_M SGEMM_DEFAULT_UNROLL_M -#define GEMM3M_UNROLL_N SGEMM_DEFAULT_UNROLL_N +#define GEMM3M_UNROLL_M SGEMM_UNROLL_M +#define GEMM3M_UNROLL_N SGEMM_UNROLL_N #endif diff --git a/common_thread.h b/common_thread.h index d74af3287..dc963a635 100644 --- a/common_thread.h +++ b/common_thread.h @@ -39,6 +39,11 @@ #ifndef COMMON_THREAD #define COMMON_THREAD +#ifdef USE_OPENMP +#include +extern void goto_set_num_threads(int nthreads); +#endif + /* Basic Thread Debugging */ #undef SMP_DEBUG @@ -126,6 +131,10 @@ extern int blas_server_avail; static __inline int num_cpu_avail(int level) { +#ifdef USE_OPENMP + int openmp_nthreads=0; +#endif + if ((blas_cpu_number == 1) #ifdef USE_OPENMP @@ -133,6 +142,13 @@ static __inline int num_cpu_avail(int level) { #endif ) return 1; +#ifdef USE_OPENMP + openmp_nthreads=omp_get_max_threads(); + if (blas_cpu_number != openmp_nthreads) { + goto_set_num_threads(openmp_nthreads); + } +#endif + return blas_cpu_number; } diff --git a/cpuid_alpha.c b/cpuid_alpha.c index ca786d550..adcc314c3 100644 --- a/cpuid_alpha.c +++ b/cpuid_alpha.c @@ -72,7 +72,7 @@ void get_cpuconfig(void){ printf("#define L1_DATA_LINESIZE 32\n"); printf("#define L2_SIZE 2097152\n"); printf("#define L2_LINESIZE 32\n"); - printf("#define DTB_ENTRIES 32\n"); + printf("#define DTB_DEFAULT_ENTRIES 32\n"); printf("#define DTB_SIZE 8192\n"); break; @@ -81,7 +81,7 @@ void get_cpuconfig(void){ printf("#define L1_DATA_LINESIZE 32\n"); printf("#define L2_SIZE 2097152\n"); printf("#define L2_LINESIZE 64\n"); - printf("#define DTB_ENTRIES 64\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_SIZE 8192\n"); break; @@ -90,7 +90,7 @@ void get_cpuconfig(void){ printf("#define L1_DATA_LINESIZE 64\n"); printf("#define L2_SIZE 4194304\n"); printf("#define L2_LINESIZE 64\n"); - printf("#define DTB_ENTRIES 64\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_SIZE 8192\n"); break; } diff --git a/cpuid_ia64.c b/cpuid_ia64.c index 7f0fa6d2f..d372182a7 100644 --- a/cpuid_ia64.c +++ b/cpuid_ia64.c @@ -133,6 +133,6 @@ void get_cpuconfig(void){ printf("#define L2_SIZE 1572864\n"); printf("#define L2_LINESIZE 128\n"); printf("#define DTB_SIZE 16384\n"); - printf("#define DTB_ENTRIES 128\n"); + printf("#define DTB_DEFAULT_ENTRIES 128\n"); } diff --git a/cpuid_mips.c b/cpuid_mips.c index 7d0b5cc62..f50a4ec3e 100644 --- a/cpuid_mips.c +++ b/cpuid_mips.c @@ -146,7 +146,7 @@ void get_cpuconfig(void){ printf("#define L1_DATA_LINESIZE 32\n"); printf("#define L2_SIZE 512488\n"); printf("#define L2_LINESIZE 32\n"); - printf("#define DTB_ENTRIES 64\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_SIZE 4096\n"); printf("#define L2_ASSOCIATIVE 4\n"); }else{ @@ -155,7 +155,7 @@ void get_cpuconfig(void){ printf("#define L1_DATA_LINESIZE 32\n"); printf("#define L2_SIZE 512488\n"); printf("#define L2_LINESIZE 32\n"); - printf("#define DTB_ENTRIES 32\n"); + printf("#define DTB_DEFAULT_ENTRIES 32\n"); printf("#define DTB_SIZE 4096\n"); printf("#define L2_ASSOCIATIVE 8\n"); } diff --git a/cpuid_power.c b/cpuid_power.c index 46ff30a3a..24ff78f05 100644 --- a/cpuid_power.c +++ b/cpuid_power.c @@ -165,7 +165,7 @@ void get_cpuconfig(void){ printf("#define L1_DATA_LINESIZE 128\n"); printf("#define L2_SIZE 524288\n"); printf("#define L2_LINESIZE 128 \n"); - printf("#define DTB_ENTRIES 128\n"); + printf("#define DTB_DEFAULT_ENTRIES 128\n"); printf("#define DTB_SIZE 4096\n"); printf("#define L2_ASSOCIATIVE 8\n"); diff --git a/cpuid_sparc.c b/cpuid_sparc.c index b65c69de4..b1e212b2f 100644 --- a/cpuid_sparc.c +++ b/cpuid_sparc.c @@ -50,7 +50,7 @@ void get_subdirname(void){ void get_cpuconfig(void){ printf("#define V9\n"); - printf("#define DTB_ENTRIES 32\n"); + printf("#define DTB_DEFAULT_ENTRIES 32\n"); } void get_libname(void){ diff --git a/cpuid_x86.c b/cpuid_x86.c index 6e3e74f82..b68f53573 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1393,7 +1393,7 @@ void get_cpuconfig(void){ if (info.size > 0) { printf("#define DTB_SIZE %d\n", info.size * 1024); printf("#define DTB_ASSOCIATIVE %d\n", info.associative); - printf("#define DTB_ENTRIES %d\n", info.linesize); + printf("#define DTB_DEFAULT_ENTRIES %d\n", info.linesize); } features = get_cputype(GET_FEATURE); @@ -1422,7 +1422,7 @@ void get_cpuconfig(void){ features = get_coretype(); if (features > 0) printf("#define CORE_%s\n", corename[features]); } else { - printf("#define DTB_ENTRIES 16\n"); + printf("#define DTB_DEFAULT_ENTRIES 16\n"); printf("#define L1_CODE_SIZE 8192\n"); printf("#define L1_DATA_SIZE 8192\n"); printf("#define L2_SIZE 0\n"); diff --git a/ctest/Makefile b/ctest/Makefile index c15a68109..1e07bd154 100644 --- a/ctest/Makefile +++ b/ctest/Makefile @@ -36,22 +36,43 @@ ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o constant.o all :: all1 all2 all3 all1: xscblat1 xdcblat1 xccblat1 xzcblat1 +ifeq ($(USE_OPENMP), 1) + OMP_NUM_THREADS=2 ./xscblat1 + OMP_NUM_THREADS=2 ./xdcblat1 + OMP_NUM_THREADS=2 ./xccblat1 + OMP_NUM_THREADS=2 ./xzcblat1 +else OPENBLAS_NUM_THREADS=2 ./xscblat1 OPENBLAS_NUM_THREADS=2 ./xdcblat1 OPENBLAS_NUM_THREADS=2 ./xccblat1 OPENBLAS_NUM_THREADS=2 ./xzcblat1 +endif all2: xscblat2 xdcblat2 xccblat2 xzcblat2 +ifeq ($(USE_OPENMP), 1) + OMP_NUM_THREADS=2 ./xscblat2 < sin2 + OMP_NUM_THREADS=2 ./xdcblat2 < din2 + OMP_NUM_THREADS=2 ./xccblat2 < cin2 + OMP_NUM_THREADS=2 ./xzcblat2 < zin2 +else OPENBLAS_NUM_THREADS=2 ./xscblat2 < sin2 OPENBLAS_NUM_THREADS=2 ./xdcblat2 < din2 OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2 OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2 +endif all3: xscblat3 xdcblat3 xccblat3 xzcblat3 +ifeq ($(USE_OPENMP), 1) + OMP_NUM_THREADS=2 ./xscblat3 < sin3 + OMP_NUM_THREADS=2 ./xdcblat3 < din3 + OMP_NUM_THREADS=2 ./xccblat3 < cin3 + OMP_NUM_THREADS=2 ./xzcblat3 < zin3 +else OPENBLAS_NUM_THREADS=2 ./xscblat3 < sin3 OPENBLAS_NUM_THREADS=2 ./xdcblat3 < din3 OPENBLAS_NUM_THREADS=2 ./xccblat3 < cin3 OPENBLAS_NUM_THREADS=2 ./xzcblat3 < zin3 +endif clean :: rm -f x* diff --git a/exports/Makefile b/exports/Makefile index f4c9314f9..69050989c 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -66,7 +66,7 @@ ifeq ($(BINARY32), 1) -lib /machine:i386 /def:libgoto2.def else $(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ - --entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) + --entry $(FU)dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) -lib /machine:X64 /def:libgoto2.def endif @@ -85,7 +85,7 @@ libgoto_hpl.def : gensymbol perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F) $(LIBDYNNAME) : ../$(LIBNAME) osx.def - $(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) + $(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) symbol.$(SUFFIX) : symbol.S $(CC) $(CFLAGS) -c -o $(@F) $^ diff --git a/getarch.c b/getarch.c index 8864753b7..df052df8a 100644 --- a/getarch.c +++ b/getarch.c @@ -130,7 +130,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENTIUM2 " \ "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX" #define LIBNAME "p2" #define CORENAME "P5" @@ -144,7 +144,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENTIUM3 " \ "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE " #define LIBNAME "coppermine" #define CORENAME "COPPERMINE" @@ -158,7 +158,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENTIUM3 " \ "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=524288 -DL2_LINESIZE=32 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE " #define LIBNAME "katmai" #define CORENAME "KATMAI" @@ -172,7 +172,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENTIUM4 " \ "-DL1_DATA_SIZE=8192 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 " #define LIBNAME "northwood" #define CORENAME "NORTHWOOD" @@ -186,7 +186,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENTIUM4 " \ "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3" #define LIBNAME "prescott" #define CORENAME "PRESCOTT" @@ -200,7 +200,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENTIUMM " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 " #define LIBNAME "banias" #define CORENAME "BANIAS" @@ -214,7 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENTIUMM " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 " #define LIBNAME "yonah" #define CORENAME "YONAH" @@ -228,7 +228,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DCORE2 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=256 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3" #define LIBNAME "core2" #define CORENAME "CORE2" @@ -242,7 +242,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPENRYN " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=256 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1" #define LIBNAME "penryn" #define CORENAME "PENRYN" @@ -257,7 +257,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ "-DL3_SIZE=16777216 -DL3_LINESIZE=64 " \ - "-DDTB_ENTRIES=256 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1" #define LIBNAME "dunnington" #define CORENAME "DUNNINGTON" @@ -271,7 +271,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DNEHALEM " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2" #define LIBNAME "nehalem" #define CORENAME "NEHALEM" @@ -285,7 +285,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DATOM " \ "-DL1_DATA_SIZE=24576 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3" #define LIBNAME "atom" #define CORENAME "ATOM" @@ -299,7 +299,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DATHLON " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ + "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE " #define LIBNAME "athlon" #define CORENAME "ATHLON" @@ -313,7 +313,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DOPTERON " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ + "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 " #define LIBNAME "opteron" #define CORENAME "OPTERON" @@ -327,7 +327,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DOPTERON " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ + "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3" #define LIBNAME "opteron" #define CORENAME "OPTERON" @@ -341,7 +341,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DBARCELONA " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL3_SIZE=2097152 " \ - "-DDTB_ENTRIES=48 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ + "-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 -DHAVE_3DNOW " \ "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \ "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU" #define LIBNAME "barcelona" @@ -356,7 +356,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DGENERIC " \ "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2" #define LIBNAME "generic" #define CORENAME "GENERIC" @@ -370,7 +370,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DVIAC3 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=65536 -DL2_LINESIZE=32 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 " \ + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 " \ "-DHAVE_MMX -DHAVE_SSE " #define LIBNAME "viac3" #define CORENAME "VIAC3" @@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DNANO " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3" #define LIBNAME "nano" #define CORENAME "NANO" @@ -398,7 +398,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPOWER3 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=2097152 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=256 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "power3" #define CORENAME "POWER3" #endif @@ -411,7 +411,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPOWER4 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 " #define LIBNAME "power4" #define CORENAME "POWER4" #endif @@ -424,7 +424,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPOWER5 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 " #define LIBNAME "power5" #define CORENAME "POWER5" #endif @@ -437,7 +437,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPOWER6 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "power6" #define CORENAME "POWER6" #endif @@ -450,7 +450,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPPCG4 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "ppcg4" #define CORENAME "PPCG4" #endif @@ -463,7 +463,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPPC970 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "ppc970" #define CORENAME "PPC970" #endif @@ -476,7 +476,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPPC970 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=1024976 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "ppc970mp" #define CORENAME "PPC970" #endif @@ -489,7 +489,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPPC440 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=16384 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " #define LIBNAME "ppc440" #define CORENAME "PPC440" #endif @@ -502,7 +502,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DPPC440FP2 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=16384 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " #define LIBNAME "ppc440FP2" #define CORENAME "PPC440FP2" #endif @@ -515,7 +515,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DCELL " \ "-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "cell" #define CORENAME "CELL" #endif @@ -528,7 +528,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DSICORTEX " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ - "-DDTB_ENTRIES=32 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "mips" #define CORENAME "sicortex" #endif @@ -542,7 +542,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DLOONGSON3A " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ - "-DDTB_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " #define LIBNAME "loongson3a" #define CORENAME "LOONGSON3A" #else @@ -555,7 +555,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SUBDIRNAME "ia64" #define ARCHCONFIG "-DITANIUM2 " \ "-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \ - "-DL2_SIZE=1572864 -DL2_LINESIZE=128 -DDTB_SIZE=16384 -DDTB_ENTRIES=128 " + "-DL2_SIZE=1572864 -DL2_LINESIZE=128 -DDTB_SIZE=16384 -DDTB_DEFAULT_ENTRIES=128 " #define LIBNAME "itanium2" #define CORENAME "itanium2" #endif @@ -567,7 +567,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SUBDIRNAME "sparc" #define ARCHCONFIG "-DSPARC -DV9 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ - "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_ENTRIES=64 " + "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 " #define LIBNAME "sparc" #define CORENAME "sparc" #endif @@ -579,7 +579,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SUBDIRNAME "sparc" #define ARCHCONFIG "-DSPARC -DV7 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ - "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_ENTRIES=64 " + "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 " #define LIBNAME "sparcv7" #define CORENAME "sparcv7" #endif @@ -592,7 +592,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DGENERIC " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \ - "-DDTB_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " #define LIBNAME "generic" #define CORENAME "generic" #endif diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index d3734bbd9..aa45d47f8 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -49,6 +49,8 @@ static void init_parameter(void); gotoblas_t TABLE_NAME = { + DTB_DEFAULT_ENTRIES , + GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, 0, 0, 0, diff --git a/kernel/x86/gemv_n.S b/kernel/x86/gemv_n.S index 13fd1ed67..652c0bb0f 100644 --- a/kernel/x86/gemv_n.S +++ b/kernel/x86/gemv_n.S @@ -48,7 +48,7 @@ #endif #ifndef P -#define P DTB_ENTRIES +#define P DTB_DEFAULT_ENTRIES #endif #define STACK 16 diff --git a/kernel/x86/qgemv_n.S b/kernel/x86/qgemv_n.S index 842423260..e33bce2df 100644 --- a/kernel/x86/qgemv_n.S +++ b/kernel/x86/qgemv_n.S @@ -48,7 +48,7 @@ #endif #ifndef P -#define P DTB_ENTRIES +#define P DTB_DEFAULT_ENTRIES #endif #define STACK 16 diff --git a/kernel/x86/xdot.S b/kernel/x86/xdot.S index 4a5af4642..929763271 100644 --- a/kernel/x86/xdot.S +++ b/kernel/x86/xdot.S @@ -307,7 +307,11 @@ popl %ebx popl %esi popl %edi +#if defined(F_INTERFACE) && defined(RETURN_BY_STACK) + ret $0x4 +#else ret +#endif ALIGN_3 .L88: @@ -326,6 +330,10 @@ popl %ebx popl %esi popl %edi - ret +#if defined(F_INTERFACE) && defined(RETURN_BY_STACK) + ret $0x4 +#else + ret +#endif EPILOGUE diff --git a/kernel/x86/xgemv_n.S b/kernel/x86/xgemv_n.S index 0bf44455b..32447ba7e 100644 --- a/kernel/x86/xgemv_n.S +++ b/kernel/x86/xgemv_n.S @@ -44,11 +44,11 @@ #endif #if defined(PENTIUM4) || defined(ATHLON) -#define P (DTB_ENTRIES / 2) +#define P (DTB_DEFAULT_ENTRIES / 2) #endif #ifndef P -#define P DTB_ENTRIES +#define P DTB_DEFAULT_ENTRIES #endif #define STACK 16 diff --git a/kernel/x86/zdot.S b/kernel/x86/zdot.S index aa4481f97..9d8866ad0 100644 --- a/kernel/x86/zdot.S +++ b/kernel/x86/zdot.S @@ -283,7 +283,11 @@ popl %ebx popl %esi popl %edi +#if defined(DOUBLE) || defined(XDOUBLE) + ret $0x4 +#else ret +#endif ALIGN_3 .L88: @@ -305,6 +309,10 @@ popl %ebx popl %esi popl %edi - ret +#if defined(DOUBLE) || defined(XDOUBLE) + ret $0x4 +#else + ret +#endif EPILOGUE diff --git a/kernel/x86/zdot_sse2.S b/kernel/x86/zdot_sse2.S index 2a174fb5d..efebe637b 100644 --- a/kernel/x86/zdot_sse2.S +++ b/kernel/x86/zdot_sse2.S @@ -1542,7 +1542,5 @@ popl %esi popl %edi /*remove the hidden return value address from the stack.*/ - popl %ecx - xchgl %ecx, 0(%esp) - ret + ret $0x4 EPILOGUE diff --git a/kernel/x86/zgemv_n.S b/kernel/x86/zgemv_n.S index 8e2b2b842..a3c9174e6 100644 --- a/kernel/x86/zgemv_n.S +++ b/kernel/x86/zgemv_n.S @@ -44,11 +44,11 @@ #endif #if defined(PENTIUM4) || defined(ATHLON) -#define P ((DTB_ENTRIES) >> 1) +#define P ((DTB_DEFAULT_ENTRIES) >> 1) #endif #ifndef P -#define P DTB_ENTRIES +#define P DTB_DEFAULT_ENTRIES #endif #define STACK 16 diff --git a/kernel/x86_64/zdot_sse.S b/kernel/x86_64/zdot_sse.S index 3302b9088..13804e0f8 100644 --- a/kernel/x86_64/zdot_sse.S +++ b/kernel/x86_64/zdot_sse.S @@ -3483,6 +3483,10 @@ subss %xmm3, %xmm1 #endif unpcklps %xmm1, %xmm0 + +#ifdef WINDOWS_ABI + movq %xmm0, %rax +#endif RESTOREREGISTERS diff --git a/kernel/x86_64/zdot_sse2.S b/kernel/x86_64/zdot_sse2.S index 77fa8e378..63acecc08 100644 --- a/kernel/x86_64/zdot_sse2.S +++ b/kernel/x86_64/zdot_sse2.S @@ -39,14 +39,19 @@ #define ASSEMBLER #include "common.h" +#ifndef WINDOWS_ABI #define N ARG1 /* rdi */ #define X ARG2 /* rsi */ #define INCX ARG3 /* rdx */ #define Y ARG4 /* rcx */ -#ifndef WINDOWS_ABI #define INCY ARG5 /* r8 */ #else -#define INCY %r10 +#define RESULT_ADDRESS ARG1 /*rcx*/ +#define N ARG2 /* rdx */ +#define X ARG3 /* r8 */ +#define INCX ARG4 /* r9*/ +#define Y %r10 +#define INCY %r11 #endif #include "l1param.h" @@ -64,7 +69,8 @@ PROFCODE #ifdef WINDOWS_ABI - movq 40(%rsp), INCY + movq 40(%rsp), Y + movq 48(%rsp), INCY #endif SAVEREGISTERS @@ -1544,6 +1550,12 @@ subsd %xmm3, %xmm1 #endif +#ifdef WINDOWS_ABI + movq RESULT_ADDRESS, %rax + movsd %xmm0, (%rax) + movsd %xmm1, 8(%rax) +#endif + RESTOREREGISTERS ret diff --git a/kernel/x86_64/zgemv_t.S b/kernel/x86_64/zgemv_t.S index d7f9d49fe..14abc8bfa 100644 --- a/kernel/x86_64/zgemv_t.S +++ b/kernel/x86_64/zgemv_t.S @@ -131,8 +131,8 @@ movq OLD_LDA, LDA movq OLD_X, X - movaps %xmm3, %xmm0 - movss OLD_ALPHA_I, %xmm1 + movapd %xmm3, %xmm0 + movsd OLD_ALPHA_I, %xmm1 #endif movq OLD_INCX, INCX diff --git a/test/Makefile b/test/Makefile index a5308e446..2df499b11 100644 --- a/test/Makefile +++ b/test/Makefile @@ -4,29 +4,46 @@ include ../Makefile.system all :: level1 level2 level3 level1 : sblat1 dblat1 cblat1 zblat1 - OPENBLAS_NUM_THREADS=1 ./sblat1 - OPENBLAS_NUM_THREADS=1 ./dblat1 - OPENBLAS_NUM_THREADS=1 ./cblat1 - OPENBLAS_NUM_THREADS=1 ./zblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat1 + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat1 ifdef SMP +ifeq ($(USE_OPENMP), 1) + OMP_NUM_THREADS=2 ./sblat1 + OMP_NUM_THREADS=2 ./dblat1 + OMP_NUM_THREADS=2 ./cblat1 + OMP_NUM_THREADS=2 ./zblat1 +else OPENBLAS_NUM_THREADS=2 ./sblat1 OPENBLAS_NUM_THREADS=2 ./dblat1 OPENBLAS_NUM_THREADS=2 ./cblat1 OPENBLAS_NUM_THREADS=2 ./zblat1 endif +endif level2 : sblat2 dblat2 cblat2 zblat2 rm -f ?BLAT2.SUMM - OPENBLAS_NUM_THREADS=1 ./sblat2 < ./sblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat2 < ./sblat2.dat @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 - OPENBLAS_NUM_THREADS=1 ./dblat2 < ./dblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat2 < ./dblat2.dat @$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 - OPENBLAS_NUM_THREADS=1 ./cblat2 < ./cblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat2 < ./cblat2.dat @$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 - OPENBLAS_NUM_THREADS=1 ./zblat2 < ./zblat2.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat2 < ./zblat2.dat @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 ifdef SMP rm -f ?BLAT2.SUMM +ifeq ($(USE_OPENMP), 1) + OMP_NUM_THREADS=2 ./sblat2 < ./sblat2.dat + @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 + OMP_NUM_THREADS=2 ./dblat2 < ./dblat2.dat + @$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 + OMP_NUM_THREADS=2 ./cblat2 < ./cblat2.dat + @$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 + OMP_NUM_THREADS=2 ./zblat2 < ./zblat2.dat + @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 +else OPENBLAS_NUM_THREADS=2 ./sblat2 < ./sblat2.dat @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 OPENBLAS_NUM_THREADS=2 ./dblat2 < ./dblat2.dat @@ -36,19 +53,30 @@ ifdef SMP OPENBLAS_NUM_THREADS=2 ./zblat2 < ./zblat2.dat @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 endif +endif level3 : sblat3 dblat3 cblat3 zblat3 rm -f ?BLAT3.SUMM - OPENBLAS_NUM_THREADS=1 ./sblat3 < ./sblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat3 < ./sblat3.dat @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 - OPENBLAS_NUM_THREADS=1 ./dblat3 < ./dblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat3 < ./dblat3.dat @$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 - OPENBLAS_NUM_THREADS=1 ./cblat3 < ./cblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat3 < ./cblat3.dat @$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 - OPENBLAS_NUM_THREADS=1 ./zblat3 < ./zblat3.dat + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat3 < ./zblat3.dat @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 ifdef SMP rm -f ?BLAT3.SUMM +ifeq ($(USE_OPENMP), 1) + OMP_NUM_THREADS=2 ./sblat3 < ./sblat3.dat + @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 + OMP_NUM_THREADS=2 ./dblat3 < ./dblat3.dat + @$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 + OMP_NUM_THREADS=2 ./cblat3 < ./cblat3.dat + @$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 + OMP_NUM_THREADS=2 ./zblat3 < ./zblat3.dat + @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 +else OPENBLAS_NUM_THREADS=2 ./sblat3 < ./sblat3.dat @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 OPENBLAS_NUM_THREADS=2 ./dblat3 < ./dblat3.dat @@ -58,6 +86,7 @@ ifdef SMP OPENBLAS_NUM_THREADS=2 ./zblat3 < ./zblat3.dat @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 endif +endif FLDFLAGS = $(FFLAGS:-fPIC=) CEXTRALIB =