Add conjugate condition to gemv.
This commit is contained in:
		
							parent
							
								
									a32e56500a
								
							
						
					
					
						commit
						2d78fb05c8
					
				|  | @ -1,34 +1,81 @@ | ||||||
| #include "common.h"  | #include "common.h"  | ||||||
| 
 | 
 | ||||||
| //These are auto-tuning codes on Loongson-3A platform. 
 | //typedef int BLASLONG;
 | ||||||
|  | //typedef double FLOAT;
 | ||||||
| 
 | 
 | ||||||
| //#define prefetch(x) __builtin_prefetch(x)
 |  | ||||||
| //#define prefetch(x) do {_mm_prefetch((char *)(x), _MM_HINT_T0);} while(0)
 |  | ||||||
| #define prefetch(x) __asm__ __volatile__("ld $0, %0"::"m"(x)) | #define prefetch(x) __asm__ __volatile__("ld $0, %0"::"m"(x)) | ||||||
| #define likely(x) __builtin_expect(!!(x), 1) | #define likely(x) __builtin_expect(!!(x), 1) | ||||||
| #define unlikely(x) __builtin_expect(!!(x), 0) | #define unlikely(x) __builtin_expect(!!(x), 0) | ||||||
| 
 | 
 | ||||||
| #define spec_loop_alpha1 do {Y[ii] += A[jj + ii] * X[k]; Y[ii + 1] += A[jj + ii + 1] * X[k]; Y[ii + 1] += A[jj + ii] * X[k + 1]; Y[ii] -= A[jj + ii + 1] * X[k + 1]; ii += 2;} while(0) | #if !defined(CONJ) && !defined(XCONJ) | ||||||
| //#define spec_loop_alpha1 do {Y[ii] += A[jj + ii] * X[k] - A[jj + ii + 1] * X[k + 1]; Y[ii + 1] += A[jj + ii + 1] * X[k] + A[jj + ii] * X[k + 1]; ii += 2;} while(0)
 | #define spec_loop_alpha1	spec_loop_alpha1_0 | ||||||
| #define spec_loop do {rTmp = A[jj + ii] * X[k] - A[jj + ii + 1] * X[k + 1]; iTmp = A[jj + ii] * X[k + 1] + A[jj + ii + 1] * X[k]; Y[ii] += rTmp * rALPHA - iTmp * iALPHA; Y[ii + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2;} while(0) | #define spec_loop			spec_loop_0 | ||||||
| #define norm_loop_alpha1 do {Y[iii] += A[jj + ii] * X[k] - A[jj + ii + 1] * X[k + 1]; Y[iii + 1] += A[jj + ii] * X[k + 1] + A[jj + ii + 1] * X[k]; ii += 2; iii += INCY * 2;} while(0) | #define norm_loop_alpha1	norm_loop_alpha1_0 | ||||||
| #define norm_loop do {rTmp = A[jj + ii] * X[k] - A[jj + ii + 1] * X[k + 1]; iTmp = A[jj + ii] * X[k + 1] + A[jj + ii + 1] * X[k]; Y[iii] += rTmp * rALPHA - iTmp * iALPHA; Y[iii + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2; iii += INCY * 2;} while(0) | #define norm_loop			norm_loop_0 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #if  defined(CONJ) && !defined(XCONJ) | ||||||
|  | #define spec_loop_alpha1 	spec_loop_alpha1_1 | ||||||
|  | #define spec_loop			spec_loop_1 | ||||||
|  | #define norm_loop_alpha1	norm_loop_alpha1_1 | ||||||
|  | #define norm_loop			norm_loop_1 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #if  !defined(CONJ) && defined(XCONJ) | ||||||
|  | #define spec_loop_alpha1 	spec_loop_alpha1_2 | ||||||
|  | #define spec_loop			spec_loop_2 | ||||||
|  | #define norm_loop_alpha1	norm_loop_alpha1_2 | ||||||
|  | #define norm_loop			norm_loop_2 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #if  defined(CONJ) && defined(XCONJ) | ||||||
|  | #define spec_loop_alpha1 	spec_loop_alpha1_3 | ||||||
|  | #define spec_loop			spec_loop_3 | ||||||
|  | #define norm_loop_alpha1	norm_loop_alpha1_3 | ||||||
|  | #define norm_loop			norm_loop_3 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #define spec_loop_alpha1_0 do {Y[ii] += A[jj + ii] * X[k]; Y[ii + 1] += A[jj + ii + 1] * X[k]; Y[ii + 1] += A[jj + ii] * X[k + 1]; Y[ii] -= A[jj + ii + 1] * X[k + 1]; ii += 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define spec_loop_alpha1_1 do {Y[ii] += A[jj + ii] * X[k]; Y[ii + 1] -= A[jj + ii + 1] * X[k]; Y[ii + 1] += A[jj + ii] * X[k + 1]; Y[ii] += A[jj + ii + 1] * X[k + 1]; ii += 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define spec_loop_alpha1_2 do {Y[ii] += A[jj + ii] * X[k]; Y[ii + 1] += A[jj + ii + 1] * X[k]; Y[ii + 1] -= A[jj + ii] * X[k + 1]; Y[ii] += A[jj + ii + 1] * X[k + 1]; ii += 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define spec_loop_alpha1_3 do {Y[ii] += A[jj + ii] * X[k]; Y[ii + 1] -= A[jj + ii + 1] * X[k]; Y[ii + 1] -= A[jj + ii] * X[k + 1]; Y[ii] -= A[jj + ii + 1] * X[k + 1]; ii += 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define spec_loop_0 do {rTmp = A[jj + ii] * X[k] - A[jj + ii + 1] * X[k + 1]; iTmp = A[jj + ii] * X[k + 1] + A[jj + ii + 1] * X[k]; Y[ii] += rTmp * rALPHA - iTmp * iALPHA; Y[ii + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define spec_loop_1 do {rTmp = A[jj + ii] * X[k] + A[jj + ii + 1] * X[k + 1]; iTmp = A[jj + ii] * X[k + 1] - A[jj + ii + 1] * X[k]; Y[ii] += rTmp * rALPHA - iTmp * iALPHA; Y[ii + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define spec_loop_2 do {rTmp = A[jj + ii] * X[k] + A[jj + ii + 1] * X[k + 1]; iTmp = -A[jj + ii] * X[k + 1] + A[jj + ii + 1] * X[k]; Y[ii] += rTmp * rALPHA - iTmp * iALPHA; Y[ii + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define spec_loop_3 do {rTmp = A[jj + ii] * X[k] - A[jj + ii + 1] * X[k + 1]; iTmp = -A[jj + ii] * X[k + 1] - A[jj + ii + 1] * X[k]; Y[ii] += rTmp * rALPHA - iTmp * iALPHA; Y[ii + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define norm_loop_alpha1_0 do {Y[iii] += A[jj + ii] * X[k] - A[jj + ii + 1] * X[k + 1]; Y[iii + 1] += A[jj + ii] * X[k + 1] + A[jj + ii + 1] * X[k]; ii += 2; iii += INCY * 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define norm_loop_alpha1_1 do {Y[iii] += A[jj + ii] * X[k] + A[jj + ii + 1] * X[k + 1]; Y[iii + 1] += A[jj + ii] * X[k + 1] - A[jj + ii + 1] * X[k]; ii += 2; iii += INCY * 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define norm_loop_alpha1_2 do {Y[iii] += A[jj + ii] * X[k] + A[jj + ii + 1] * X[k + 1]; Y[iii + 1] += -A[jj + ii] * X[k + 1] + A[jj + ii + 1] * X[k]; ii += 2; iii += INCY * 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define norm_loop_alpha1_3 do {Y[iii] += A[jj + ii] * X[k] - A[jj + ii + 1] * X[k + 1]; Y[iii + 1] += -A[jj + ii] * X[k + 1] - A[jj + ii + 1] * X[k]; ii += 2; iii += INCY * 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define norm_loop_0 do {rTmp = A[jj + ii] * X[k] - A[jj + ii + 1] * X[k + 1]; iTmp = A[jj + ii] * X[k + 1] + A[jj + ii + 1] * X[k]; Y[iii] += rTmp * rALPHA - iTmp * iALPHA; Y[iii + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2; iii += INCY * 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define norm_loop_1 do {rTmp = A[jj + ii] * X[k] + A[jj + ii + 1] * X[k + 1]; iTmp = A[jj + ii] * X[k + 1] - A[jj + ii + 1] * X[k]; Y[iii] += rTmp * rALPHA - iTmp * iALPHA; Y[iii + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2; iii += INCY * 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define norm_loop_2 do {rTmp = A[jj + ii] * X[k] + A[jj + ii + 1] * X[k + 1]; iTmp = -A[jj + ii] * X[k + 1] + A[jj + ii + 1] * X[k]; Y[iii] += rTmp * rALPHA - iTmp * iALPHA; Y[iii + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2; iii += INCY * 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define norm_loop_3 do {rTmp = A[jj + ii] * X[k] - A[jj + ii + 1] * X[k + 1]; iTmp = -A[jj + ii] * X[k + 1] - A[jj + ii + 1] * X[k]; Y[iii] += rTmp * rALPHA - iTmp * iALPHA; Y[iii + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2; iii += INCY * 2;} while(0) | ||||||
| 
 | 
 | ||||||
| int CNAME(BLASLONG M, BLASLONG N, BLASLONG UNUSED, FLOAT rALPHA, FLOAT iALPHA, FLOAT *A, BLASLONG LDA, FLOAT *X, BLASLONG INCX, FLOAT *Y, BLASLONG INCY, FLOAT *BUFFER) { | int CNAME(BLASLONG M, BLASLONG N, BLASLONG UNUSED, FLOAT rALPHA, FLOAT iALPHA, FLOAT *A, BLASLONG LDA, FLOAT *X, BLASLONG INCX, FLOAT *Y, BLASLONG INCY, FLOAT *BUFFER) { | ||||||
| 
 | 
 | ||||||
| 	if(!rALPHA && iALPHA) | 	if(!rALPHA && iALPHA) | ||||||
| 		return 0; | 		return 0; | ||||||
| 
 | 
 | ||||||
| //	if(INCX < 0)
 |  | ||||||
| //		INCX = -INCX;
 |  | ||||||
| //	if(INCY < 0)
 |  | ||||||
| //		INCY = -INCY;
 |  | ||||||
| 
 |  | ||||||
| 	BLASLONG fahead = 60; | 	BLASLONG fahead = 60; | ||||||
| 	BLASLONG spec_unroll = 2; | 	BLASLONG spec_unroll = 2; | ||||||
| 	BLASLONG tMQ = M - M % spec_unroll; | 	BLASLONG tMQ = M - M % spec_unroll; | ||||||
| 	BLASLONG j = 0, k = 0, jj=0; | 	BLASLONG j = 0, k = 0, jj = 0; | ||||||
| 
 |  | ||||||
| 
 | 
 | ||||||
| 	if(rALPHA == 1 && iALPHA == 0) { | 	if(rALPHA == 1 && iALPHA == 0) { | ||||||
| 		if(INCY == 1) { | 		if(INCY == 1) { | ||||||
|  |  | ||||||
|  | @ -1,33 +1,67 @@ | ||||||
| #include "common.h"  | #include "common.h"  | ||||||
| 
 | 
 | ||||||
| //These are auto-tuning codes on Loongson-3A platform. 
 |  | ||||||
| //#define prefetch(x) __builtin_prefetch(x)
 |  | ||||||
| //#define prefetch(x) do {_mm_prefetch((char *)(x), _MM_HINT_T0);} while(0)
 |  | ||||||
| #define prefetch(x) __asm__ __volatile__("ld $0, %0"::"m"(x)) | #define prefetch(x) __asm__ __volatile__("ld $0, %0"::"m"(x)) | ||||||
| #define likely(x) __builtin_expect(!!(x), 1) | #define likely(x) __builtin_expect(!!(x), 1) | ||||||
| #define unlikely(x) __builtin_expect(!!(x), 0) | #define unlikely(x) __builtin_expect(!!(x), 0) | ||||||
| 
 | 
 | ||||||
| #define spec_loop_alpha1 do {Y[k] += A[jj + ii] * X[ii]; Y[k + 1] += A[jj + ii + 1] * X[ii]; Y[k + 1] += A[jj + ii] * X[ii + 1]; Y[k] -= A[jj + ii + 1] * X[ii + 1]; ii += 2;} while(0) | #if !defined(CONJ) && !defined(XCONJ) | ||||||
| //#define spec_loop_alpha1 do {Y[ii] += A[jj + ii] * X[k] - A[jj + ii + 1] * X[k + 1]; Y[ii + 1] += A[jj + ii + 1] * X[k] + A[jj + ii] * X[k + 1]; ii += 2;} while(0)
 | #define spec_loop_alpha1	spec_loop_alpha1_0 | ||||||
| #define spec_loop do {rTmp = A[jj + ii] * X[ii] - A[jj + ii + 1] * X[ii + 1]; iTmp = A[jj + ii] * X[ii + 1] + A[jj + ii + 1] * X[ii]; Y[k] += rTmp * rALPHA - iTmp * iALPHA; Y[k + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2;} while(0) | #define spec_loop			spec_loop_0 | ||||||
| #define norm_loop_alpha1 do {Y[k] += A[jj + ii] * X[iii] - A[jj + ii + 1] * X[iii + 1]; Y[k + 1] += A[jj + ii] * X[iii + 1] + A[jj + ii + 1] * X[iii]; ii += 2; iii += INCX * 2;} while(0) | #define norm_loop_alpha1	norm_loop_alpha1_0 | ||||||
| #define norm_loop do {rTmp = A[jj + ii] * X[iii] - A[jj + ii + 1] * X[iii + 1]; iTmp = A[jj + ii] * X[iii + 1] + A[jj + ii + 1] * X[iii]; Y[k] += rTmp * rALPHA - iTmp * iALPHA; Y[k + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2; iii += INCX * 2;} while(0) | #define norm_loop			norm_loop_0 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #if  defined(CONJ) && !defined(XCONJ) | ||||||
|  | #define spec_loop_alpha1 	spec_loop_alpha1_1 | ||||||
|  | #define spec_loop			spec_loop_1 | ||||||
|  | #define norm_loop_alpha1	norm_loop_alpha1_1 | ||||||
|  | #define norm_loop			norm_loop_1 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #if  !defined(CONJ) && defined(XCONJ) | ||||||
|  | #define spec_loop_alpha1 	spec_loop_alpha1_2 | ||||||
|  | #define spec_loop			spec_loop_2 | ||||||
|  | #define norm_loop_alpha1	norm_loop_alpha1_2 | ||||||
|  | #define norm_loop			norm_loop_2 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #if  defined(CONJ) && defined(XCONJ) | ||||||
|  | #define spec_loop_alpha1 	spec_loop_alpha1_3 | ||||||
|  | #define spec_loop			spec_loop_3 | ||||||
|  | #define norm_loop_alpha1	norm_loop_alpha1_3 | ||||||
|  | #define norm_loop			norm_loop_3 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #define spec_loop_alpha1_0 do {Y[k] += A[jj + ii] * X[ii]; Y[k + 1] += A[jj + ii + 1] * X[ii]; Y[k + 1] += A[jj + ii] * X[ii + 1]; Y[k] -= A[jj + ii + 1] * X[ii + 1]; ii += 2;} while(0) | ||||||
|  | #define spec_loop_alpha1_1 do {Y[k] += A[jj + ii] * X[ii]; Y[k + 1] -= A[jj + ii + 1] * X[ii]; Y[k + 1] += A[jj + ii] * X[ii + 1]; Y[k] += A[jj + ii + 1] * X[ii + 1]; ii += 2;} while(0) | ||||||
|  | #define spec_loop_alpha1_2 do {Y[k] += A[jj + ii] * X[ii]; Y[k + 1] += A[jj + ii + 1] * X[ii]; Y[k + 1] -= A[jj + ii] * X[ii + 1]; Y[k] += A[jj + ii + 1] * X[ii + 1]; ii += 2;} while(0) | ||||||
|  | #define spec_loop_alpha1_3 do {Y[k] += A[jj + ii] * X[ii]; Y[k + 1] -= A[jj + ii + 1] * X[ii]; Y[k + 1] -= A[jj + ii] * X[ii + 1]; Y[k] -= A[jj + ii + 1] * X[ii + 1]; ii += 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define spec_loop_0 do {rTmp = A[jj + ii] * X[ii] - A[jj + ii + 1] * X[ii + 1]; iTmp = A[jj + ii] * X[ii + 1] + A[jj + ii + 1] * X[ii]; Y[k] += rTmp * rALPHA - iTmp * iALPHA; Y[k + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2;} while(0) | ||||||
|  | #define spec_loop_1 do {rTmp = A[jj + ii] * X[ii] + A[jj + ii + 1] * X[ii + 1]; iTmp = A[jj + ii] * X[ii + 1] - A[jj + ii + 1] * X[ii]; Y[k] += rTmp * rALPHA - iTmp * iALPHA; Y[k + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2;} while(0) | ||||||
|  | #define spec_loop_2 do {rTmp = A[jj + ii] * X[ii] + A[jj + ii + 1] * X[ii + 1]; iTmp = -A[jj + ii] * X[ii + 1] + A[jj + ii + 1] * X[ii]; Y[k] += rTmp * rALPHA - iTmp * iALPHA; Y[k + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2;} while(0) | ||||||
|  | #define spec_loop_3 do {rTmp = A[jj + ii] * X[ii] - A[jj + ii + 1] * X[ii + 1]; iTmp = -A[jj + ii] * X[ii + 1] - A[jj + ii + 1] * X[ii]; Y[k] += rTmp * rALPHA - iTmp * iALPHA; Y[k + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define norm_loop_alpha1_0 do {Y[k] += A[jj + ii] * X[iii] - A[jj + ii + 1] * X[iii + 1]; Y[k + 1] += A[jj + ii] * X[iii + 1] + A[jj + ii + 1] * X[iii]; ii += 2; iii += INCX * 2;} while(0) | ||||||
|  | #define norm_loop_alpha1_1 do {Y[k] += A[jj + ii] * X[iii] + A[jj + ii + 1] * X[iii + 1]; Y[k + 1] += A[jj + ii] * X[iii + 1] - A[jj + ii + 1] * X[iii]; ii += 2; iii += INCX * 2;} while(0) | ||||||
|  | #define norm_loop_alpha1_2 do {Y[k] += A[jj + ii] * X[iii] + A[jj + ii + 1] * X[iii + 1]; Y[k + 1] += -A[jj + ii] * X[iii + 1] + A[jj + ii + 1] * X[iii]; ii += 2; iii += INCX * 2;} while(0) | ||||||
|  | #define norm_loop_alpha1_3 do {Y[k] += A[jj + ii] * X[iii] - A[jj + ii + 1] * X[iii + 1]; Y[k + 1] += -A[jj + ii] * X[iii + 1] - A[jj + ii + 1] * X[iii]; ii += 2; iii += INCX * 2;} while(0) | ||||||
|  | 
 | ||||||
|  | #define norm_loop_0 do {rTmp = A[jj + ii] * X[iii] - A[jj + ii + 1] * X[iii + 1]; iTmp = A[jj + ii] * X[iii + 1] + A[jj + ii + 1] * X[iii]; Y[k] += rTmp * rALPHA - iTmp * iALPHA; Y[k + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2; iii += INCX * 2;} while(0) | ||||||
|  | #define norm_loop_1 do {rTmp = A[jj + ii] * X[iii] + A[jj + ii + 1] * X[iii + 1]; iTmp = A[jj + ii] * X[iii + 1] - A[jj + ii + 1] * X[iii]; Y[k] += rTmp * rALPHA - iTmp * iALPHA; Y[k + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2; iii += INCX * 2;} while(0) | ||||||
|  | #define norm_loop_2 do {rTmp = A[jj + ii] * X[iii] + A[jj + ii + 1] * X[iii + 1]; iTmp = -A[jj + ii] * X[iii + 1] + A[jj + ii + 1] * X[iii]; Y[k] += rTmp * rALPHA - iTmp * iALPHA; Y[k + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2; iii += INCX * 2;} while(0) | ||||||
|  | #define norm_loop_3 do {rTmp = A[jj + ii] * X[iii] - A[jj + ii + 1] * X[iii + 1]; iTmp = -A[jj + ii] * X[iii + 1] - A[jj + ii + 1] * X[iii]; Y[k] += rTmp * rALPHA - iTmp * iALPHA; Y[k + 1] += rTmp * iALPHA + iTmp * rALPHA; ii += 2; iii += INCX * 2;} while(0) | ||||||
| 
 | 
 | ||||||
| int CNAME(BLASLONG M, BLASLONG N, BLASLONG UNUSED, FLOAT rALPHA, FLOAT iALPHA, FLOAT *A, BLASLONG LDA, FLOAT *X, BLASLONG INCX, FLOAT *Y, BLASLONG INCY, FLOAT *BUFFER) { | int CNAME(BLASLONG M, BLASLONG N, BLASLONG UNUSED, FLOAT rALPHA, FLOAT iALPHA, FLOAT *A, BLASLONG LDA, FLOAT *X, BLASLONG INCX, FLOAT *Y, BLASLONG INCY, FLOAT *BUFFER) { | ||||||
| 
 | 
 | ||||||
| 	if(!rALPHA && iALPHA) | 	if(!rALPHA && iALPHA) | ||||||
| 		return 0; | 		return 0; | ||||||
| 
 | 
 | ||||||
| //	if(INCX < 0)
 |  | ||||||
| //		INCX = -INCX;
 |  | ||||||
| //	if(INCY < 0)
 |  | ||||||
| //		INCY = -INCY;
 |  | ||||||
| 
 |  | ||||||
| 	BLASLONG fahead = 30; | 	BLASLONG fahead = 30; | ||||||
| 	BLASLONG spec_unroll = 2; | 	BLASLONG spec_unroll = 2; | ||||||
| 	BLASLONG tMQ = M - M % spec_unroll; | 	BLASLONG tMQ = M - M % spec_unroll; | ||||||
| 	BLASLONG j = 0, k = 0, jj=0; | 	BLASLONG j = 0, k = 0, jj = 0; | ||||||
| 
 |  | ||||||
| 
 | 
 | ||||||
| 	if(rALPHA == 1 && iALPHA == 0) { | 	if(rALPHA == 1 && iALPHA == 0) { | ||||||
| 		if(INCX == 1) { | 		if(INCX == 1) { | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue