177 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			C
		
	
	
	
			
		
		
	
	
			177 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			C
		
	
	
	
#ifndef GEMV_PARAM_H
 | 
						|
#define GEMV_PARAM_H
 | 
						|
 | 
						|
#ifdef movsd
 | 
						|
#undef movsd
 | 
						|
#endif
 | 
						|
 | 
						|
#undef  movapd
 | 
						|
#define movapd movaps
 | 
						|
 | 
						|
#ifdef ATHLON
 | 
						|
#define ALIGNED_ACCESS
 | 
						|
#define MOVUPS_A	movaps
 | 
						|
#define MOVUPS_XL	movaps
 | 
						|
#define MOVUPS_XS	movaps
 | 
						|
#define MOVUPS_YL	movaps
 | 
						|
#define MOVUPS_YS	movaps
 | 
						|
#define PREFETCH	prefetcht0
 | 
						|
#define PREFETCHSIZE	64 * 3
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef PENTIUM4
 | 
						|
#define ALIGNED_ACCESS
 | 
						|
#define MOVUPS_A	movaps
 | 
						|
#define MOVUPS_XL	movaps
 | 
						|
#define MOVUPS_XS	movaps
 | 
						|
#define MOVUPS_YL	movaps
 | 
						|
#define MOVUPS_YS	movaps
 | 
						|
#define PREFETCH	prefetcht0
 | 
						|
#define PREFETCHSIZE	64 * 2
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef CORE2
 | 
						|
#define ALIGNED_ACCESS
 | 
						|
#define MOVUPS_A	movaps
 | 
						|
#define MOVUPS_XL	movaps
 | 
						|
#define MOVUPS_XS	movaps
 | 
						|
#define MOVUPS_YL	movaps
 | 
						|
#define MOVUPS_YS	movaps
 | 
						|
#define PREFETCH	prefetcht0
 | 
						|
#define PREFETCHSIZE	64 * 4
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef PENRYN
 | 
						|
#define ALIGNED_ACCESS
 | 
						|
#define MOVUPS_A	movaps
 | 
						|
#define MOVUPS_XL	movaps
 | 
						|
#define MOVUPS_XS	movaps
 | 
						|
#define MOVUPS_YL	movaps
 | 
						|
#define MOVUPS_YS	movaps
 | 
						|
#define PREFETCH	prefetcht0
 | 
						|
#define PREFETCHSIZE	64 * 4
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef NEHALEM
 | 
						|
#define MOVUPS_A	movups
 | 
						|
#define MOVUPS_XL	movups
 | 
						|
#define MOVUPS_XS	movups
 | 
						|
#define MOVUPS_YL	movups
 | 
						|
#define MOVUPS_YS	movups
 | 
						|
#define PREFETCH	prefetcht0
 | 
						|
#define PREFETCHW	prefetcht0
 | 
						|
#define PREFETCHSIZE	64 * 3
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef SANDYBRIDGE
 | 
						|
#define MOVUPS_A	movups
 | 
						|
#define MOVUPS_XL	movups
 | 
						|
#define MOVUPS_XS	movups
 | 
						|
#define MOVUPS_YL	movups
 | 
						|
#define MOVUPS_YS	movups
 | 
						|
#define PREFETCH	prefetcht0
 | 
						|
#define PREFETCHW	prefetcht0
 | 
						|
#define PREFETCHSIZE	64 * 3
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef OPTERON
 | 
						|
#define PREFETCH	prefetch
 | 
						|
#define PREFETCHW	prefetchw
 | 
						|
#ifndef COMPLEX
 | 
						|
#define PREFETCHSIZE	64 * 1
 | 
						|
#else
 | 
						|
#define PREFETCHSIZE	64 * 1
 | 
						|
#endif
 | 
						|
#define movsd		movlps
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BARCELONA_OPTIMIZATION)
 | 
						|
#define ALIGNED_ACCESS
 | 
						|
#define MOVUPS_A	movaps
 | 
						|
#define MOVUPS_XL	movaps
 | 
						|
#define MOVUPS_XS	movaps
 | 
						|
#define MOVUPS_YL	movaps
 | 
						|
#define MOVUPS_YS	movaps
 | 
						|
 | 
						|
#define PREFETCH	prefetch
 | 
						|
#define PREFETCHW	prefetchw
 | 
						|
#ifndef COMPLEX
 | 
						|
#define PREFETCHSIZE	64 * 2
 | 
						|
#else
 | 
						|
#define PREFETCHSIZE	64 * 4
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef NANO
 | 
						|
#define ALIGNED_ACCESS
 | 
						|
#define MOVUPS_A	movaps
 | 
						|
#define MOVUPS_XL	movaps
 | 
						|
#define MOVUPS_XS	movaps
 | 
						|
#define MOVUPS_YL	movaps
 | 
						|
#define MOVUPS_YS	movaps
 | 
						|
#define PREFETCH	prefetcht0
 | 
						|
#ifndef COMPLEX
 | 
						|
#define PREFETCHSIZE	64 * 1
 | 
						|
#else
 | 
						|
#define PREFETCHSIZE	64 * 2
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef PREOFFSET
 | 
						|
#ifdef L1_DATA_LINESIZE
 | 
						|
#define PREOFFSET	(L1_DATA_LINESIZE >> 1)
 | 
						|
#else
 | 
						|
#define PREOFFSET	32
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef  GEMV_UNROLL
 | 
						|
#define  GEMV_UNROLL 4
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef ZGEMV_UNROLL
 | 
						|
#define ZGEMV_UNROLL 4
 | 
						|
#endif
 | 
						|
 | 
						|
/* #define COPY_FORCE       */   /* Always copy X or Y to the buffer */
 | 
						|
/* #define NOCOPY_UNALIGNED */   /* Not copy if X or Y is not aligned */
 | 
						|
 | 
						|
#ifdef MOVUPS_A
 | 
						|
#define MOVUPS_A1(OFF, ADDR, REGS)		MOVUPS_A	OFF(ADDR), REGS
 | 
						|
#define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS)	MOVUPS_A	OFF(ADDR, BASE, SCALE), REGS
 | 
						|
#else
 | 
						|
#define MOVUPS_A1(OFF, ADDR, REGS)		movsd	OFF(ADDR), REGS; movhps	OFF + 8(ADDR), REGS
 | 
						|
#define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS)	movsd	OFF(ADDR, BASE, SCALE), REGS; movhps	OFF + 8(ADDR, BASE, SCALE), REGS
 | 
						|
#endif
 | 
						|
 | 
						|
#define MOVRPS_A1(OFF, ADDR, REGS)		movsd	OFF + 8(ADDR), REGS; movhps	OFF(ADDR), REGS
 | 
						|
#define MOVRPS_A2(OFF, ADDR, BASE, SCALE, REGS)	movsd	OFF + 8(ADDR, BASE, SCALE), REGS; movhps	OFF(ADDR, BASE, SCALE), REGS
 | 
						|
 | 
						|
#ifdef MOVUPS_XL
 | 
						|
#define MOVUPS_XL1(OFF, ADDR, REGS)			MOVUPS_XL	OFF(ADDR), REGS
 | 
						|
#else
 | 
						|
#define MOVUPS_XL1(OFF, ADDR, REGS)			movsd	OFF(ADDR), REGS; movhps	OFF + 8(ADDR), REGS
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef MOVUPS_XS
 | 
						|
#define MOVUPS_XS1(OFF, ADDR, REGS)			MOVUPS_XS	REGS, OFF(ADDR)
 | 
						|
#else
 | 
						|
#define MOVUPS_XS1(OFF, ADDR, REGS)			movsd	REGS, OFF(ADDR); movhps	REGS, OFF + 8(ADDR)
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef MOVUPS_YL
 | 
						|
#define MOVUPS_YL1(OFF, ADDR, REGS)			MOVUPS_YL	OFF(ADDR), REGS
 | 
						|
#else
 | 
						|
#define MOVUPS_YL1(OFF, ADDR, REGS)			movsd	OFF(ADDR), REGS; movhps	OFF + 8(ADDR), REGS
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef MOVUPS_YS
 | 
						|
#define MOVUPS_YS1(OFF, ADDR, REGS)			MOVUPS_YS	REGS, OFF(ADDR)
 | 
						|
#else
 | 
						|
#define MOVUPS_YS1(OFF, ADDR, REGS)			movsd	REGS, OFF(ADDR); movhps	REGS, OFF + 8(ADDR)
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
 | 
						|
#endif
 |