From b9d89f8aaa82845d09463f36829e189b0e28c15d Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Wed, 31 Aug 2011 18:21:37 +0800 Subject: [PATCH 1/2] Fixed the bug about installation. f77blas.h works OK now. --- Makefile | 7 +++++++ common_interface.h | 11 +++++++++++ openblas_config_template.h | 20 ++++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/Makefile b/Makefile index 6789272a3..d86fbadf3 100644 --- a/Makefile +++ b/Makefile @@ -118,6 +118,13 @@ endif #Save the config files for installation cp Makefile.conf Makefile.conf_last cp config.h config_last.h +ifdef QUAD_PRECISION + echo "#define QUAD_PRECISION">> config_last.h +endif +ifeq ($(EXPRECISION), 1) + echo "#define EXPRECISION">> config_last.h +endif +## ifdef DYNAMIC_ARCH $(MAKE) -C kernel commonlibs || exit 1 for d in $(DYNAMIC_CORE) ; \ diff --git a/common_interface.h b/common_interface.h index 36bf5aa48..898d91001 100644 --- a/common_interface.h +++ b/common_interface.h @@ -38,6 +38,11 @@ #ifndef ASSEMBLER +#ifdef __cplusplus +extern "C" { + /* Assume C declarations for C++ */ +#endif /* __cplusplus */ + int BLASFUNC(xerbla)(char *, blasint *info, blasint); FLOATRET BLASFUNC(sdot) (blasint *, float *, blasint *, float *, blasint *); @@ -733,4 +738,10 @@ xdouble BLASFUNC(qlamch)(char *); FLOATRET BLASFUNC(slamc3)(float *, float *); double BLASFUNC(dlamc3)(double *, double *); xdouble BLASFUNC(qlamc3)(xdouble *, xdouble *); + +#ifdef __cplusplus +} + +#endif /* __cplusplus */ + #endif diff --git a/openblas_config_template.h b/openblas_config_template.h index 9fb80aa4f..8bf972593 100644 --- a/openblas_config_template.h +++ b/openblas_config_template.h @@ -6,6 +6,16 @@ #define BLASFUNC(FUNC) FUNC #endif +#ifdef QUAD_PRECISION +typedef struct { + unsigned long x[2]; +} xdouble; +#elif defined EXPRECISION +#define xdouble long double +#else +#define xdouble double +#endif + #if defined(OS_WINDOWS) && defined(__64BIT__) typedef long long BLASLONG; typedef unsigned long long BLASULONG; @@ -19,3 +29,13 @@ typedef BLASLONG blasint; #else typedef int blasint; #endif + +#if defined(XDOUBLE) || defined(DOUBLE) +#define FLOATRET FLOAT +#else +#ifdef NEED_F2CCONV +#define FLOATRET double +#else +#define FLOATRET float +#endif +#endif From 7b410b7f0e94edde2a606593086694ae6bb17be8 Mon Sep 17 00:00:00 2001 From: Zhang Xiianyi Date: Wed, 14 Sep 2011 23:52:51 +0800 Subject: [PATCH 2/2] Fixed #58 zdot SEGFAULT bug with GCC-4.6. Thank Mr. John for this patch. In i386 calling convention, the caller put the address of return value of zdot into the first hidden parameter. Thus, the callee should delete this address before return. Actually, I have fixed the same bug on x86/zdot_sse2.S (issue #32). However, that is not a good implementation which uses 3 instructions. Mr. John told me used "ret $0x4" to skip the first hidden address (4 bytes). --- kernel/x86/xdot.S | 10 +++++++++- kernel/x86/zdot.S | 10 +++++++++- kernel/x86/zdot_sse2.S | 4 +--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/kernel/x86/xdot.S b/kernel/x86/xdot.S index 4a5af4642..929763271 100644 --- a/kernel/x86/xdot.S +++ b/kernel/x86/xdot.S @@ -307,7 +307,11 @@ popl %ebx popl %esi popl %edi +#if defined(F_INTERFACE) && defined(RETURN_BY_STACK) + ret $0x4 +#else ret +#endif ALIGN_3 .L88: @@ -326,6 +330,10 @@ popl %ebx popl %esi popl %edi - ret +#if defined(F_INTERFACE) && defined(RETURN_BY_STACK) + ret $0x4 +#else + ret +#endif EPILOGUE diff --git a/kernel/x86/zdot.S b/kernel/x86/zdot.S index aa4481f97..9d8866ad0 100644 --- a/kernel/x86/zdot.S +++ b/kernel/x86/zdot.S @@ -283,7 +283,11 @@ popl %ebx popl %esi popl %edi +#if defined(DOUBLE) || defined(XDOUBLE) + ret $0x4 +#else ret +#endif ALIGN_3 .L88: @@ -305,6 +309,10 @@ popl %ebx popl %esi popl %edi - ret +#if defined(DOUBLE) || defined(XDOUBLE) + ret $0x4 +#else + ret +#endif EPILOGUE diff --git a/kernel/x86/zdot_sse2.S b/kernel/x86/zdot_sse2.S index 2a174fb5d..efebe637b 100644 --- a/kernel/x86/zdot_sse2.S +++ b/kernel/x86/zdot_sse2.S @@ -1542,7 +1542,5 @@ popl %esi popl %edi /*remove the hidden return value address from the stack.*/ - popl %ecx - xchgl %ecx, 0(%esp) - ret + ret $0x4 EPILOGUE