Added intrinsics for MSVC.

This commit is contained in:
Hank Anderson 2015-02-25 11:52:51 -06:00
parent 84d90d6ed8
commit 5ae8993752
2 changed files with 56 additions and 0 deletions

View File

@ -56,41 +56,65 @@ static void __inline blas_lock(volatile BLASULONG *address){
do {
while (*address) {YIELDING;};
#if defined(_MSC_VER) && !defined(__clang__)
// use intrinsic instead of inline assembly
ret = _InterlockedExchange(address, 1);
// inline assembly
/*__asm {
mov eax, address
mov ebx, 1
xchg [eax], ebx
mov ret, ebx
}*/
#else
__asm__ __volatile__(
"xchgl %0, %1\n"
: "=r"(ret), "=m"(*address)
: "0"(1), "m"(*address)
: "memory");
#endif
} while (ret);
}
static __inline unsigned long long rpcc(void){
#if defined(_MSC_VER) && !defined(__clang__)
return __rdtsc(); // use MSVC intrinsic
#else
unsigned int a, d;
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
return ((unsigned long long)a + ((unsigned long long)d << 32));
#endif
};
static __inline unsigned long getstackaddr(void){
#if defined(_MSC_VER) && !defined(__clang__)
return (unsigned long)_ReturnAddress(); // use MSVC intrinsic
#else
unsigned long addr;
__asm__ __volatile__ ("mov %%esp, %0"
: "=r"(addr) : : "memory");
return addr;
#endif
};
static __inline long double sqrt_long(long double val) {
#if defined(_MSC_VER) && !defined(__clang__)
return sqrt(val); // not sure if this will use fsqrt
#else
long double result;
__asm__ __volatile__ ("fldt %1\n"
"fsqrt\n"
"fstpt %0\n" : "=m" (result) : "m"(val));
return result;
#endif
}
#define SQRT(a) sqrt_long(a)
@ -146,9 +170,14 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
y = blas_quick_divide_table[y];
#if defined(_MSC_VER) && !defined(__clang__)
(void*)result;
return x*y;
#else
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
return result;
#endif
}
#endif

View File

@ -137,8 +137,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
#if defined(_MSC_VER) && !defined(__clang__)
#define CONSTRUCTOR __cdecl
#define DESTRUCTOR __cdecl
#else
#define CONSTRUCTOR __attribute__ ((constructor))
#define DESTRUCTOR __attribute__ ((destructor))
#endif
#ifdef DYNAMIC_ARCH
gotoblas_t *gotoblas = NULL;
@ -1360,6 +1365,28 @@ void DESTRUCTOR gotoblas_quit(void) {
blas_shutdown();
}
#if defined(_MSC_VER) && !defined(__clang__)
BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
gotoblas_init();
break;
case DLL_THREAD_ATTACH:
break;
case DLL_THREAD_DETACH:
break;
case DLL_PROCESS_DETACH:
gotoblas_quit();
break;
default:
break;
}
return TRUE;
}
#endif
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
/* Don't call me; this is just work around for PGI / Sun bug */
void gotoblas_dummy_for_PGI(void) {