diff --git a/common_x86.h b/common_x86.h index f096e9074..0cb242c4e 100644 --- a/common_x86.h +++ b/common_x86.h @@ -56,41 +56,65 @@ static void __inline blas_lock(volatile BLASULONG *address){ do { while (*address) {YIELDING;}; +#if defined(_MSC_VER) && !defined(__clang__) + // use intrinsic instead of inline assembly + ret = _InterlockedExchange(address, 1); + // inline assembly + /*__asm { + mov eax, address + mov ebx, 1 + xchg [eax], ebx + mov ret, ebx + }*/ +#else __asm__ __volatile__( "xchgl %0, %1\n" : "=r"(ret), "=m"(*address) : "0"(1), "m"(*address) : "memory"); +#endif } while (ret); } static __inline unsigned long long rpcc(void){ +#if defined(_MSC_VER) && !defined(__clang__) + return __rdtsc(); // use MSVC intrinsic +#else unsigned int a, d; __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); return ((unsigned long long)a + ((unsigned long long)d << 32)); +#endif }; static __inline unsigned long getstackaddr(void){ +#if defined(_MSC_VER) && !defined(__clang__) + return (unsigned long)_ReturnAddress(); // use MSVC intrinsic +#else unsigned long addr; __asm__ __volatile__ ("mov %%esp, %0" : "=r"(addr) : : "memory"); return addr; +#endif }; static __inline long double sqrt_long(long double val) { +#if defined(_MSC_VER) && !defined(__clang__) + return sqrt(val); // not sure if this will use fsqrt +#else long double result; __asm__ __volatile__ ("fldt %1\n" "fsqrt\n" "fstpt %0\n" : "=m" (result) : "m"(val)); return result; +#endif } #define SQRT(a) sqrt_long(a) @@ -146,9 +170,14 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ y = blas_quick_divide_table[y]; +#if defined(_MSC_VER) && !defined(__clang__) + (void*)result; + return x*y; +#else __asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); return result; +#endif } #endif diff --git a/driver/others/memory.c b/driver/others/memory.c index fa364785b..70bfa7a57 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -137,8 +137,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define BITMASK(a, b, c) ((((a) >> (b)) & (c))) +#if defined(_MSC_VER) && !defined(__clang__) +#define CONSTRUCTOR __cdecl +#define DESTRUCTOR __cdecl +#else #define CONSTRUCTOR __attribute__ ((constructor)) #define DESTRUCTOR __attribute__ ((destructor)) +#endif #ifdef DYNAMIC_ARCH gotoblas_t *gotoblas = NULL; @@ -1360,6 +1365,28 @@ void DESTRUCTOR gotoblas_quit(void) { blas_shutdown(); } +#if defined(_MSC_VER) && !defined(__clang__) +BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) +{ + switch (ul_reason_for_call) + { + case DLL_PROCESS_ATTACH: + gotoblas_init(); + break; + case DLL_THREAD_ATTACH: + break; + case DLL_THREAD_DETACH: + break; + case DLL_PROCESS_DETACH: + gotoblas_quit(); + break; + default: + break; + } + return TRUE; +} +#endif + #if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64)) /* Don't call me; this is just work around for PGI / Sun bug */ void gotoblas_dummy_for_PGI(void) {