Added intrinsics for MSVC.
This commit is contained in:
parent
84d90d6ed8
commit
5ae8993752
29
common_x86.h
29
common_x86.h
|
@ -56,41 +56,65 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
do {
|
do {
|
||||||
while (*address) {YIELDING;};
|
while (*address) {YIELDING;};
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
|
// use intrinsic instead of inline assembly
|
||||||
|
ret = _InterlockedExchange(address, 1);
|
||||||
|
// inline assembly
|
||||||
|
/*__asm {
|
||||||
|
mov eax, address
|
||||||
|
mov ebx, 1
|
||||||
|
xchg [eax], ebx
|
||||||
|
mov ret, ebx
|
||||||
|
}*/
|
||||||
|
#else
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
"xchgl %0, %1\n"
|
"xchgl %0, %1\n"
|
||||||
: "=r"(ret), "=m"(*address)
|
: "=r"(ret), "=m"(*address)
|
||||||
: "0"(1), "m"(*address)
|
: "0"(1), "m"(*address)
|
||||||
: "memory");
|
: "memory");
|
||||||
|
#endif
|
||||||
|
|
||||||
} while (ret);
|
} while (ret);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline unsigned long long rpcc(void){
|
static __inline unsigned long long rpcc(void){
|
||||||
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
|
return __rdtsc(); // use MSVC intrinsic
|
||||||
|
#else
|
||||||
unsigned int a, d;
|
unsigned int a, d;
|
||||||
|
|
||||||
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
|
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
|
||||||
|
|
||||||
return ((unsigned long long)a + ((unsigned long long)d << 32));
|
return ((unsigned long long)a + ((unsigned long long)d << 32));
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
static __inline unsigned long getstackaddr(void){
|
static __inline unsigned long getstackaddr(void){
|
||||||
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
|
return (unsigned long)_ReturnAddress(); // use MSVC intrinsic
|
||||||
|
#else
|
||||||
unsigned long addr;
|
unsigned long addr;
|
||||||
|
|
||||||
__asm__ __volatile__ ("mov %%esp, %0"
|
__asm__ __volatile__ ("mov %%esp, %0"
|
||||||
: "=r"(addr) : : "memory");
|
: "=r"(addr) : : "memory");
|
||||||
|
|
||||||
return addr;
|
return addr;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static __inline long double sqrt_long(long double val) {
|
static __inline long double sqrt_long(long double val) {
|
||||||
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
|
return sqrt(val); // not sure if this will use fsqrt
|
||||||
|
#else
|
||||||
long double result;
|
long double result;
|
||||||
|
|
||||||
__asm__ __volatile__ ("fldt %1\n"
|
__asm__ __volatile__ ("fldt %1\n"
|
||||||
"fsqrt\n"
|
"fsqrt\n"
|
||||||
"fstpt %0\n" : "=m" (result) : "m"(val));
|
"fstpt %0\n" : "=m" (result) : "m"(val));
|
||||||
return result;
|
return result;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#define SQRT(a) sqrt_long(a)
|
#define SQRT(a) sqrt_long(a)
|
||||||
|
@ -146,9 +170,14 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||||
|
|
||||||
y = blas_quick_divide_table[y];
|
y = blas_quick_divide_table[y];
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
|
(void*)result;
|
||||||
|
return x*y;
|
||||||
|
#else
|
||||||
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
|
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -137,8 +137,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
|
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
|
#define CONSTRUCTOR __cdecl
|
||||||
|
#define DESTRUCTOR __cdecl
|
||||||
|
#else
|
||||||
#define CONSTRUCTOR __attribute__ ((constructor))
|
#define CONSTRUCTOR __attribute__ ((constructor))
|
||||||
#define DESTRUCTOR __attribute__ ((destructor))
|
#define DESTRUCTOR __attribute__ ((destructor))
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef DYNAMIC_ARCH
|
#ifdef DYNAMIC_ARCH
|
||||||
gotoblas_t *gotoblas = NULL;
|
gotoblas_t *gotoblas = NULL;
|
||||||
|
@ -1360,6 +1365,28 @@ void DESTRUCTOR gotoblas_quit(void) {
|
||||||
blas_shutdown();
|
blas_shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
|
BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
|
||||||
|
{
|
||||||
|
switch (ul_reason_for_call)
|
||||||
|
{
|
||||||
|
case DLL_PROCESS_ATTACH:
|
||||||
|
gotoblas_init();
|
||||||
|
break;
|
||||||
|
case DLL_THREAD_ATTACH:
|
||||||
|
break;
|
||||||
|
case DLL_THREAD_DETACH:
|
||||||
|
break;
|
||||||
|
case DLL_PROCESS_DETACH:
|
||||||
|
gotoblas_quit();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
|
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
|
||||||
/* Don't call me; this is just work around for PGI / Sun bug */
|
/* Don't call me; this is just work around for PGI / Sun bug */
|
||||||
void gotoblas_dummy_for_PGI(void) {
|
void gotoblas_dummy_for_PGI(void) {
|
||||||
|
|
Loading…
Reference in New Issue