Remove all trailing whitespace except lapack-netlib

Signed-off-by: Timothy Gu <timothygu99@gmail.com>
This commit is contained in:
Timothy Gu
2014-06-27 12:05:18 -07:00
parent d10db52edb
commit 6c2ead30f0
1423 changed files with 21229 additions and 21229 deletions

View File

@@ -61,7 +61,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
lda = args -> lda;
ipiv = (blasint *)args -> c;
offset = 0;
if (range_n) {
m -= range_n[0];
n = range_n[1] - range_n[0];
@@ -71,13 +71,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
info = 0;
b = a;
for (j = 0; j < n; j++) {
len = MIN(j, m);
for (i = 0; i < len; i++) {
ip = ipiv[i + offset] - 1 - offset;
ip = ipiv[i + offset] - 1 - offset;
if (ip != i) {
temp1 = *(b + i);
temp2 = *(b + ip);
@@ -85,7 +85,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
*(b + ip) = temp1;
}
}
for (i = 1; i < len; i++) {
b[i] -= DOTU_K(i, a + i, lda, b, 1);
}

View File

@@ -63,7 +63,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
lda = args -> lda;
ipiv = (blasint *)args -> c;
offset = 0;
if (range_n) {
m -= range_n[0];
n = range_n[1] - range_n[0];
@@ -73,13 +73,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
info = 0;
b = a;
for (j = 0; j < n; j++) {
len = MIN(j, m);
for (i = 0; i < len; i++) {
ip = ipiv[i + offset] - 1 - offset;
ip = ipiv[i + offset] - 1 - offset;
if (ip != i) {
temp1 = *(b + i * 2 + 0);
temp2 = *(b + i * 2 + 1);
@@ -91,7 +91,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
*(b + ip * 2 + 1) = temp2;
}
}
ZTRSV_NLU(len, a, lda, b, 1, sb);
if (j < m) {
@@ -124,7 +124,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
}
if (j + 1 < m) {
SCAL_K(m - j - 1, 0, 0, temp3, temp4,
SCAL_K(m - j - 1, 0, 0, temp3, temp4,
b + (j + 1) * 2, 1, NULL, 0, NULL, 0);
}
} else {

View File

@@ -44,7 +44,7 @@ static FLOAT dm1 = -1.;
double sqrt(double);
//In this case, the recursive getrf_parallel may overflow the stack.
//Instead, use malloc to alloc job_t.
//Instead, use malloc to alloc job_t.
#if MAX_CPU_NUMBER > GETRF_MEM_ALLOC_THRESHOLD
#define USE_ALLOC_HEAP
#endif
@@ -123,21 +123,21 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
for (jjs = js; jjs < js + min_j; jjs += GEMM_UNROLL_N){
min_jj = js + min_j - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
if (0 && GEMM_UNROLL_N <= 8) {
LASWP_NCOPY(min_jj, off + 1, off + k,
LASWP_NCOPY(min_jj, off + 1, off + k,
c + (- off + jjs * lda) * COMPSIZE, lda,
ipiv, sbb + k * (jjs - js) * COMPSIZE);
} else {
LASWP_PLUS(min_jj, off + 1, off + k, ZERO,
LASWP_PLUS(min_jj, off + 1, off + k, ZERO,
#ifdef COMPLEX
ZERO,
#endif
c + (- off + jjs * lda) * COMPSIZE, lda, NULL, 0, ipiv, 1);
GEMM_ONCOPY (k, min_jj, c + jjs * lda * COMPSIZE, lda, sbb + (jjs - js) * k * COMPSIZE);
}
@@ -145,13 +145,13 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
for (is = 0; is < k; is += GEMM_P) {
min_i = k - is;
if (min_i > GEMM_P) min_i = GEMM_P;
TRSM_KERNEL_LT(min_i, min_jj, k, dm1,
#ifdef COMPLEX
ZERO,
#endif
sb + k * is * COMPSIZE,
sbb + (jjs - js) * k * COMPSIZE,
sbb + (jjs - js) * k * COMPSIZE,
c + (is + jjs * lda) * COMPSIZE, lda, is);
}
}
@@ -161,9 +161,9 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
for (is = 0; is < m; is += GEMM_P){
min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P;
GEMM_ITCOPY (k, min_i, b + is * COMPSIZE, lda, sa);
GEMM_KERNEL_N(min_i, min_j, k, dm1,
#ifdef COMPLEX
ZERO,
@@ -234,7 +234,7 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
c += range_m[0] * COMPSIZE;
div_n = (n_to - n_from + DIVIDE_RATE - 1) / DIVIDE_RATE;
buffer[0] = sbb;
@@ -243,10 +243,10 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
}
for (xxx = n_from, bufferside = 0; xxx < n_to; xxx += div_n, bufferside ++) {
for (i = 0; i < args -> nthreads; i++)
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {};
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){
min_jj = MIN(n_to, xxx + div_n) - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
@@ -254,43 +254,43 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
if (0 && GEMM_UNROLL_N <= 8) {
printf("helllo\n");
LASWP_NCOPY(min_jj, off + 1, off + k,
LASWP_NCOPY(min_jj, off + 1, off + k,
b + (- off + jjs * lda) * COMPSIZE, lda,
ipiv, buffer[bufferside] + (jjs - xxx) * k * COMPSIZE);
} else {
LASWP_PLUS(min_jj, off + 1, off + k, ZERO,
LASWP_PLUS(min_jj, off + 1, off + k, ZERO,
#ifdef COMPLEX
ZERO,
#endif
b + (- off + jjs * lda) * COMPSIZE, lda, NULL, 0, ipiv, 1);
GEMM_ONCOPY (k, min_jj, b + jjs * lda * COMPSIZE, lda,
GEMM_ONCOPY (k, min_jj, b + jjs * lda * COMPSIZE, lda,
buffer[bufferside] + (jjs - xxx) * k * COMPSIZE);
}
for (is = 0; is < k; is += GEMM_P) {
min_i = k - is;
if (min_i > GEMM_P) min_i = GEMM_P;
TRSM_KERNEL_LT(min_i, min_jj, k, dm1,
#ifdef COMPLEX
ZERO,
#endif
sb + k * is * COMPSIZE,
buffer[bufferside] + (jjs - xxx) * k * COMPSIZE,
buffer[bufferside] + (jjs - xxx) * k * COMPSIZE,
b + (is + jjs * lda) * COMPSIZE, lda, is);
}
}
for (i = 0; i < args -> nthreads; i++)
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
}
flag[mypos * CACHE_LINE_SIZE] = 0;
if (m == 0) {
for (xxx = 0; xxx < DIVIDE_RATE; xxx++) {
job[mypos].working[mypos][CACHE_LINE_SIZE * xxx] = 0;
@@ -301,21 +301,21 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
min_i = m - is;
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
} else
} else
if (min_i > GEMM_P) {
min_i = ((min_i + 1) / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
}
ICOPY_OPERATION(k, min_i, a, lda, 0, is, sa);
current = mypos;
do {
div_n = (range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE;
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
if ((current != mypos) && (!is)) {
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {};
}
@@ -323,18 +323,18 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), k,
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
c, lda, is, xxx);
if (is + min_i >= m) {
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
}
}
current ++;
if (current >= args -> nthreads) current = 0;
} while (current != mypos);
}
for (i = 0; i < args -> nthreads; i++) {
for (xxx = 0; xxx < DIVIDE_RATE; xxx++) {
while (job[mypos].working[i][CACHE_LINE_SIZE * xxx] ) {};
@@ -382,7 +382,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_REAL;
#endif
#endif
#else
#ifdef XDOUBLE
mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -390,7 +390,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
#endif
#endif
m = args -> m;
@@ -408,7 +408,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
}
if (m <= 0 || n <= 0) return 0;
newarg.c = ipiv;
newarg.lda = lda;
@@ -428,14 +428,14 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
bk = mn;
if (bk > next_bk) bk = next_bk;
range_n_new[0] = offset;
range_n_new[1] = offset + bk;
iinfo = CNAME(args, NULL, range_n_new, sa, sb, 0);
if (iinfo && !info) info = iinfo;
#ifdef USE_ALLOC_HEAP
job = (job_t*)malloc(MAX_CPU_NUMBER * sizeof(job_t));
if(job==NULL){
@@ -449,24 +449,24 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
TRSM_ILTCOPY(bk, bk, a, lda, 0, sb);
sbb = (FLOAT *)((((BLASULONG)(sb + bk * bk * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
is = 0;
num_cpu = 0;
while (is < mn) {
width = (FORMULA1(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
if (width > mn - is - bk) width = mn - is - bk;
if (width < bk) {
next_bk = (FORMULA2(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N) & ~(GEMM_UNROLL_N - 1);
if (next_bk > bk) next_bk = bk;
width = next_bk;
if (width > mn - is - bk) width = mn - is - bk;
}
if (num_cpu > 0) exec_blas_async_wait(num_cpu, &queue[0]);
mm = m - bk - is;
@@ -479,7 +479,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.n = nn;
newarg.k = bk;
newarg.ldb = is + offset;
nn -= width;
range_n_mine[0] = 0;
@@ -489,16 +489,16 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
range_M[0] = 0;
num_cpu = 0;
while (nn > 0){
if (mm >= nn) {
width = blas_quickdivide(nn + args -> nthreads - num_cpu, args -> nthreads - num_cpu - 1);
if (nn < width) width = nn;
nn -= width;
range_N[num_cpu + 1] = range_N[num_cpu] + width;
width = blas_quickdivide(mm + args -> nthreads - num_cpu, args -> nthreads - num_cpu - 1);
if (mm < width) width = mm;
if (nn <= 0) width = mm;
@@ -517,7 +517,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
if (mm <= 0) width = nn;
nn -= width;
range_N[num_cpu + 1] = range_N[num_cpu] + width;
}
queue[num_cpu].mode = mode;
@@ -529,13 +529,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
flag[num_cpu * CACHE_LINE_SIZE] = 1;
num_cpu ++;
}
newarg.nthreads = num_cpu;
if (num_cpu > 0) {
for (j = 0; j < num_cpu; j++) {
for (i = 0; i < num_cpu; i++) {
@@ -550,20 +550,20 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
bk = mn - is;
if (bk > next_bk) bk = next_bk;
range_n_new[0] = offset + is;
range_n_new[1] = offset + is + bk;
if (num_cpu > 0) {
queue[num_cpu - 1].next = NULL;
exec_blas_async(0, &queue[0]);
inner_basic_thread(&newarg, NULL, range_n_mine, sa, sbb, -1);
iinfo = GETRF_SINGLE(args, NULL, range_n_new, sa, sbb, 0);
if (iinfo && !info) info = iinfo + is;
for (i = 0; i < num_cpu; i ++) while (flag[i * CACHE_LINE_SIZE]) {};
@@ -577,19 +577,19 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
iinfo = GETRF_SINGLE(args, NULL, range_n_new, sa, sbb, 0);
if (iinfo && !info) info = iinfo + is;
}
}
next_bk = init_bk;
is = 0;
while (is < mn) {
bk = mn - is;
if (bk > next_bk) bk = next_bk;
width = (FORMULA1(m, n, is, bk, args -> nthreads) + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
if (width > mn - is - bk) width = mn - is - bk;
@@ -598,13 +598,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
if (next_bk > bk) next_bk = bk;
}
blas_level1_thread(mode, bk, is + bk + offset + 1, mn + offset, (void *)dummyalpha,
blas_level1_thread(mode, bk, is + bk + offset + 1, mn + offset, (void *)dummyalpha,
a + (- offset + is * lda) * COMPSIZE, lda, NULL, 0,
ipiv, 1, (void *)LASWP_PLUS, args -> nthreads);
is += bk;
}
#ifdef USE_ALLOC_HEAP
free(job);
#endif
@@ -638,7 +638,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_REAL;
#endif
#endif
#else
#ifdef XDOUBLE
mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -646,7 +646,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
#endif
#endif
m = args -> m;
@@ -664,7 +664,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
}
if (m <= 0 || n <= 0) return 0;
newarg.c = ipiv;
newarg.lda = lda;
newarg.common = NULL;
@@ -700,9 +700,9 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
range_n_new[0] = offset;
range_n_new[1] = offset + bk;
info = CNAME(args, NULL, range_n_new, sa, sb, 0);
TRSM_ILTCOPY(bk, bk, a, lda, 0, sb);
is = 0;
@@ -714,7 +714,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
width = FORMULA1(m, n, is, bk, args -> nthreads);
width = (width + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
if (width < bk) {
next_bk = FORMULA2(m, n, is, bk, args -> nthreads);
@@ -729,7 +729,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
width = next_bk;
}
if (width > mn - is - bk) {
next_bk = mn - is - bk;
width = next_bk;
@@ -742,10 +742,10 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
range[0] = 0;
range[1] = width;
num_cpu = 1;
nn -= width;
newarg.a = sb;
newarg.b = a + (is + is * lda) * COMPSIZE;
newarg.d = (void *)flag;
@@ -753,16 +753,16 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.n = n - bk - is;
newarg.k = bk;
newarg.ldb = is + offset;
while (nn > 0){
width = blas_quickdivide(nn + args -> nthreads - num_cpu, args -> nthreads - num_cpu);
nn -= width;
if (nn < 0) width = width + nn;
range[num_cpu + 1] = range[num_cpu] + width;
queue[num_cpu].mode = mode;
//queue[num_cpu].routine = inner_advanced_thread;
queue[num_cpu].routine = (void *)inner_basic_thread;
@@ -776,21 +776,21 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
num_cpu ++;
}
queue[num_cpu - 1].next = NULL;
is += bk;
bk = n - is;
if (bk > next_bk) bk = next_bk;
range_n_new[0] = offset + is;
range_n_new[1] = offset + is + bk;
if (num_cpu > 1) {
exec_blas_async(1, &queue[1]);
#if 0
inner_basic_thread(&newarg, NULL, &range[0], sa, sbb, 0);
@@ -823,30 +823,30 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
#endif
for (i = 1; i < num_cpu; i ++) while (flag[i * CACHE_LINE_SIZE]) {};
TRSM_ILTCOPY(bk, bk, a + (is + is * lda) * COMPSIZE, lda, 0, sb);
} else {
inner_basic_thread(&newarg, NULL, &range[0], sa, sbb, -1);
iinfo = GETRF_SINGLE(args, NULL, range_n_new, sa, sbb, 0);
}
if (iinfo && !info) info = iinfo + is;
}
next_bk = init_bk;
bk = init_bk;
is = 0;
while (is < mn) {
bk = mn - is;
if (bk > next_bk) bk = next_bk;
width = FORMULA1(m, n, is, bk, args -> nthreads);
width = (width + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
@@ -867,13 +867,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
width = next_bk;
}
blas_level1_thread(mode, bk, is + bk + offset + 1, mn + offset, (void *)dummyalpha,
blas_level1_thread(mode, bk, is + bk + offset + 1, mn + offset, (void *)dummyalpha,
a + (- offset + is * lda) * COMPSIZE, lda, NULL, 0,
ipiv, 1, (void *)LASWP_PLUS, args -> nthreads);
is += bk;
}
return info;
}

View File

@@ -68,7 +68,7 @@ static void inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
c += range_n[0] * lda * COMPSIZE;
d += range_n[0] * lda * COMPSIZE;
}
for (js = 0; js < n; js += REAL_GEMM_R) {
min_j = n - js;
if (min_j > REAL_GEMM_R) min_j = REAL_GEMM_R;
@@ -76,32 +76,32 @@ static void inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
for (jjs = js; jjs < js + min_j; jjs += GEMM_UNROLL_N){
min_jj = js + min_j - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
#if 0
LASWP_NCOPY(min_jj, off + 1, off + k,
LASWP_NCOPY(min_jj, off + 1, off + k,
c + (- off + jjs * lda) * COMPSIZE, lda,
ipiv, sb + k * (jjs - js) * COMPSIZE);
#else
LASWP_PLUS(min_jj, off + 1, off + k, ZERO,
LASWP_PLUS(min_jj, off + 1, off + k, ZERO,
#ifdef COMPLEX
ZERO,
#endif
c + (- off + jjs * lda) * COMPSIZE, lda, NULL, 0, ipiv, 1);
GEMM_ONCOPY (k, min_jj, c + jjs * lda * COMPSIZE, lda, sb + (jjs - js) * k * COMPSIZE);
#endif
for (is = 0; is < k; is += GEMM_P) {
min_i = k - is;
if (min_i > GEMM_P) min_i = GEMM_P;
TRSM_KERNEL_LT(min_i, min_jj, k, dm1,
#ifdef COMPLEX
ZERO,
#endif
(FLOAT *)args -> a + k * is * COMPSIZE,
sb + (jjs - js) * k * COMPSIZE,
sb + (jjs - js) * k * COMPSIZE,
c + (is + jjs * lda) * COMPSIZE, lda, is);
}
}
@@ -109,9 +109,9 @@ static void inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
for (is = 0; is < m; is += GEMM_P){
min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P;
GEMM_ITCOPY (k, min_i, b + is * COMPSIZE, lda, sa);
GEMM_KERNEL_N(min_i, min_j, k, dm1,
#ifdef COMPLEX
ZERO,
@@ -141,7 +141,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_REAL;
#endif
#endif
#else
#ifdef XDOUBLE
mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -149,7 +149,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
#endif
#endif
m = args -> m;
@@ -167,7 +167,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
}
if (m <= 0 || n <= 0) return 0;
mn = MIN(m, n);
blocking = (mn / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
@@ -177,13 +177,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
info = GETF2(args, NULL, range_n, sa, sb, 0);
return info;
}
sbb = (FLOAT *)((((BLASULONG)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
info = 0;
for (j = 0; j < mn; j += blocking) {
jb = mn - j;
if (jb > blocking) jb = blocking;
@@ -198,9 +198,9 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
if (iinfo && !info) info = iinfo + j;
if (j + jb < n) {
TRSM_ILTCOPY(jb, jb, offsetA + j * COMPSIZE, lda, 0, sb);
newarg.m = m - jb - j;
newarg.n = n - jb - j;
newarg.k = jb;
@@ -215,7 +215,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.nthreads = args -> nthreads;
gemm_thread_n(mode, &newarg, NULL, NULL, (void *)inner_thread, sa, sbb, args -> nthreads);
}
}
@@ -226,7 +226,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
ZERO,
#endif
a - (offset - j * lda) * COMPSIZE, lda, NULL, 0 , ipiv, 1);
}
return info;

View File

@@ -71,7 +71,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
}
if (m <= 0 || n <= 0) return 0;
mn = MIN(m, n);
blocking = (mn / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
@@ -81,13 +81,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
info = GETF2(args, NULL, range_n, sa, sb, 0);
return info;
}
sbb = (FLOAT *)((((BLASULONG)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
info = 0;
for (j = 0; j < mn; j += blocking) {
jb = mn - j;
if (jb > blocking) jb = blocking;
@@ -102,53 +102,53 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
if (iinfo && !info) info = iinfo + j;
if (j + jb < n) {
TRSM_ILTCOPY(jb, jb, offsetA + j * COMPSIZE, lda, 0, sb);
for (js = j + jb; js < n; js += REAL_GEMM_R){
jmin = n - js;
if (jmin > REAL_GEMM_R) jmin = REAL_GEMM_R;
for (jjs = js; jjs < js + jmin; jjs += GEMM_UNROLL_N){
min_jj = js + jmin - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
#if 1
LASWP_PLUS(min_jj, j + offset + 1, j + jb + offset, ZERO,
#if 1
LASWP_PLUS(min_jj, j + offset + 1, j + jb + offset, ZERO,
#ifdef COMPLEX
ZERO,
#endif
a + (- offset + jjs * lda) * COMPSIZE, lda, NULL, 0 , ipiv, 1);
GEMM_ONCOPY (jb, min_jj, a + (j + jjs * lda) * COMPSIZE, lda, sbb + jb * (jjs - js) * COMPSIZE);
#else
LASWP_NCOPY(min_jj, j + offset + 1, j + jb + offset,
LASWP_NCOPY(min_jj, j + offset + 1, j + jb + offset,
a + (- offset + jjs * lda) * COMPSIZE, lda, ipiv, sbb + jb * (jjs - js) * COMPSIZE);
#endif
for (jc = 0; jc < jb; jc += GEMM_P) {
jcmin = jb - jc;
if (jcmin > GEMM_P) jcmin = GEMM_P;
TRSM_KERNEL_LT(jcmin, min_jj, jb, dm1,
#ifdef COMPLEX
ZERO,
#endif
sb + jb * jc * COMPSIZE,
sbb + jb * (jjs - js) * COMPSIZE,
sbb + jb * (jjs - js) * COMPSIZE,
a + (j + jc + jjs * lda) * COMPSIZE, lda, jc);
}
}
for (is = j + jb; is < m; is += GEMM_P){
imin = m - is;
if (imin > GEMM_P) imin = GEMM_P;
GEMM_ITCOPY (jb, imin, offsetA + is * COMPSIZE, lda, sa);
GEMM_KERNEL_N(imin, jmin, jb, dm1,
#ifdef COMPLEX
ZERO,
@@ -158,7 +158,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
}
}
}
for (j = 0; j < mn; j += jb) {
jb = MIN(mn - j, blocking);
LASWP_PLUS(jb, j + jb + offset + 1, mn + offset, ZERO,
@@ -166,7 +166,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
ZERO,
#endif
a - (offset - j * lda) * COMPSIZE, lda, NULL, 0 , ipiv, 1);
}
return info;

View File

@@ -51,14 +51,14 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
}
#ifndef TRANS
LASWP_PLUS(n, 1, args -> m, ZERO,
LASWP_PLUS(n, 1, args -> m, ZERO,
(FLOAT *)args -> b + off * args -> ldb * COMPSIZE, args -> ldb, NULL, 0, args -> c, 1);
TRSM_LNLU (args, range_m, range_n, sa, sb, 0);
TRSM_LNUN (args, range_m, range_n, sa, sb, 0);
#else
TRSM_LTUN (args, range_m, range_n, sa, sb, 0);
TRSM_LTLU (args, range_m, range_n, sa, sb, 0);
LASWP_MINUS(n, 1, args -> m, ZERO,
LASWP_MINUS(n, 1, args -> m, ZERO,
(FLOAT *)args -> b + off * args -> ldb * COMPSIZE, args -> ldb, NULL, 0, args -> c, -1);
#endif
@@ -81,7 +81,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_REAL;
#endif
#endif
gemm_thread_n(mode, args, NULL, NULL, inner_thread, sa, sb, args -> nthreads);
}
@@ -97,7 +97,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_REAL | (1 << BLAS_TRANSA_SHIFT);
#else
mode = BLAS_SINGLE | BLAS_REAL | (1 << BLAS_TRANSA_SHIFT);
#endif
#endif
gemm_thread_n(mode, args, NULL, NULL, inner_thread, sa, sb, args -> nthreads);
}

View File

@@ -104,7 +104,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
#endif
gemm_thread_n(mode, args, NULL, NULL, inner_thread, sa, sb, args -> nthreads);
}

View File

@@ -45,11 +45,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
LASWP_PLUS (args -> n, 1, args -> m, ZERO, ZERO, args -> b, args -> ldb, NULL, 0, args -> c, 1);
TRSM_LNLU(args, range_m, range_n, sa, sb, 0);
TRSM_LNUN(args, range_m, range_n, sa, sb, 0);
TRSM_LNUN(args, range_m, range_n, sa, sb, 0);
#elif TRANS == 2
TRSM_LTUN(args, range_m, range_n, sa, sb, 0);
TRSM_LTLU(args, range_m, range_n, sa, sb, 0);
LASWP_MINUS(args -> n, 1, args -> m, ZERO, ZERO, args -> b, args -> ldb, NULL, 0, args -> c, -1);
#elif TRANS == 3
LASWP_PLUS (args -> n, 1, args -> m, ZERO, ZERO, args -> b, args -> ldb, NULL, 0, args -> c, 1);

View File

@@ -12,7 +12,7 @@ ZLASWP = ../generic/zlaswp_k.c
endif
LASWP_DEPS = ../generic/laswp_k_1.c ../generic/laswp_k_2.c \
../generic/laswp_k_4.c ../generic/laswp_k_8.c
../generic/laswp_k_4.c ../generic/laswp_k_8.c
ZLASWP_DEPS = ../generic/zlaswp_k_1.c ../generic/zlaswp_k_2.c \
../generic/zlaswp_k_4.c

View File

@@ -45,7 +45,7 @@
#define a2 (a1 - 1)
#endif
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG lda,
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG lda,
FLOAT *dummy2, BLASLONG dumy3, blasint *ipiv, BLASLONG incx){
BLASLONG i, j, ip1, ip2, rows;
@@ -53,7 +53,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
FLOAT *a1;
FLOAT *b1, *b2;
FLOAT A1, A2, B1, B2;
a--;
k1 --;
@@ -64,7 +64,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#endif
if (n <= 0) return 0;
rows = k2-k1;
if (rows <=0) return 0;
if (rows == 1) {
@@ -72,7 +72,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
ip1 = *ipiv;
a1 = a + k1 + 1;
b1 = a + ip1;
if(a1 == b1) return 0;
for(j=0; j<n; j++){
@@ -102,10 +102,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
piv += incx;
ip2 = *piv;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
i = (rows >> 1);
i--;
@@ -136,22 +136,22 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
A2 = *a2;
B1 = *b1;
B2 = *b2;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -168,11 +168,11 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a1 = A2;
*a2 = B1;
*b1 = A1;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -184,10 +184,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b2 = A2;
}
}
b1 = a + ip1;
b2 = a + ip2;
#ifndef MINUS
a1 += 2;
#else
@@ -205,12 +205,12 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -227,11 +227,11 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a1 = A2;
*a2 = B1;
*b1 = A1;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -249,26 +249,26 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#else
a1 -= 2;
#endif
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
b1 = a + ip1;
A1 = *a1;
B1 = *b1;
*a1 = B1;
*b1 = A1;
}
a += lda;
j --;
} while (j > 0);
}
return 0;
}
}

View File

@@ -47,7 +47,7 @@
#define a4 (a3 - 1)
#endif
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG lda,
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG lda,
FLOAT *dummy2, BLASLONG dumy3, blasint *ipiv, BLASLONG incx){
BLASLONG i, j, ip1, ip2, rows;
@@ -55,7 +55,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
FLOAT *a1, *a3;
FLOAT *b1, *b2, *b3, *b4;
FLOAT A1, A2, B1, B2, A3, A4, B3, B4;
a--;
k1 --;
@@ -66,7 +66,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#endif
if (n <= 0) return 0;
j = (n >> 1);
rows = k2-k1;
if (rows <=0) return 0;
@@ -75,7 +75,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
ip1 = *ipiv;
a1 = a + k1 + 1;
b1 = a + ip1;
if(a1 == b1) return 0;
for(j=0; j<n; j++){
@@ -93,28 +93,28 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
if (j > 0) {
do {
piv = ipiv;
#ifndef MINUS
a1 = a + k1 + 1;
#else
a1 = a + k2;
#endif
a3 = a1 + 1 * lda;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
i = ((rows) >> 1);
// Loop pipeline
i--;
@@ -137,31 +137,31 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
B2 = *b2;
B3 = *b3;
B4 = *b4;
A1 = *a1;
A2 = *a2;
A3 = *a3;
A4 = *a4;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
*a3 = A4;
*a4 = A3;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
*a4 = B4;
*b4 = A4;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -186,13 +186,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a3 = A4;
*a4 = B3;
*b3 = A3;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
*a3 = B3;
*b3 = A3;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -211,13 +211,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b4 = A4;
}
}
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
#ifndef MINUS
a1 += 2;
a3 += 2;
@@ -233,7 +233,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
B2 = *b2;
B3 = *b3;
B4 = *b4;
A1 = *a1;
A2 = *a2;
A3 = *a3;
@@ -245,14 +245,14 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a2 = A1;
*a3 = A4;
*a4 = A3;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
*a4 = B4;
*b4 = A4;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -277,13 +277,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a3 = A4;
*a4 = B3;
*b3 = A3;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
*a3 = B3;
*b3 = A3;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -310,9 +310,9 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
a3 -= 2;
#endif
//Remain
//Remain
i = ((rows) & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
@@ -328,7 +328,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a3 = B3;
*b3 = A3;
}
a += 2 * lda;
j --;
} while (j > 0);
@@ -342,15 +342,15 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#else
a1 = a + k2;
#endif
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
i = ((rows) >> 1);
i --;
@@ -359,22 +359,22 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
A2 = *a2;
B1 = *b1;
B2 = *b2;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -391,11 +391,11 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a1 = A2;
*a2 = B1;
*b1 = A1;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -407,10 +407,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b2 = A2;
}
}
b1 = a + ip1;
b2 = a + ip2;
#ifndef MINUS
a1 += 2;
#else
@@ -418,7 +418,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#endif
i --;
}
//Loop Ending (n=1)
A1 = *a1;
A2 = *a2;
@@ -428,12 +428,12 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -450,11 +450,11 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a1 = A2;
*a2 = B1;
*b1 = A1;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -472,13 +472,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#else
a1 -= 2;
#endif
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
b1 = a + ip1;
A1 = *a1;
B1 = *b1;
@@ -488,5 +488,5 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
}
return 0;
}
}

View File

@@ -51,7 +51,7 @@
#define a8 (a7 - 1)
#endif
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG lda,
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG lda,
FLOAT *dummy2, BLASLONG dumy3, blasint *ipiv, BLASLONG incx){
BLASLONG i, j, ip1, ip2, rows;
@@ -61,7 +61,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
FLOAT *b5, *b6, *b7, *b8;
FLOAT A1, A2, B1, B2, A3, A4, B3, B4;
FLOAT A5, A6, B5, B6, A7, A8, B7, B8;
a--;
k1 --;
@@ -80,7 +80,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
ip1 = *ipiv;
a1 = a + k1 + 1;
b1 = a + ip1;
if(a1 == b1) return 0;
for(j=0; j<n; j++){
@@ -105,7 +105,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#else
a1 = a + k2;
#endif
a3 = a1 + 1 * lda;
a5 = a1 + 2 * lda;
a7 = a1 + 3 * lda;
@@ -114,10 +114,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
piv += incx;
ip2 = *piv;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
b5 = b1 + 2 * lda;
@@ -126,7 +126,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
b8 = b2 + 3 * lda;
i = ((k2 - k1) >> 1);
i--; //Loop pipeline
//Main Loop
while (i > 0) {
@@ -147,12 +147,12 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
B6 = *b6;
B7 = *b7;
B8 = *b8;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
@@ -163,7 +163,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a6 = A5;
*a7 = A8;
*a8 = A7;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
@@ -174,7 +174,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a8 = B8;
*b8 = A8;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -215,7 +215,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a7 = A8;
*a8 = B7;
*b7 = A7;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
@@ -225,7 +225,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b5 = A5;
*a7 = B7;
*b7 = A7;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -258,17 +258,17 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b8 = A8;
}
}
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
b5 = b1 + 2 * lda;
b6 = b2 + 2 * lda;
b7 = b1 + 3 * lda;
b8 = b2 + 3 * lda;
#ifndef MINUS
a1 += 2;
a3 += 2;
@@ -312,7 +312,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a6 = A5;
*a7 = A8;
*a8 = A7;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
@@ -323,7 +323,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a8 = B8;
*b8 = A8;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -364,7 +364,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a7 = A8;
*a8 = B7;
*b7 = A7;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
@@ -374,7 +374,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b5 = A5;
*a7 = B7;
*b7 = A7;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -420,9 +420,9 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
a7 -= 2;
#endif
//Remain
//Remain
i = ((rows) & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
@@ -449,9 +449,9 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a7 = B7;
*b7 = A7;
}
a += 4 * lda;
j --;
} while (j > 0);
}
@@ -464,20 +464,20 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#else
a1 = a + k2;
#endif
a3 = a1 + 1 * lda;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
i = ((rows) >> 1);
i--;
@@ -486,31 +486,31 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
A2 = *a2;
A3 = *a3;
A4 = *a4;
B1 = *b1;
B2 = *b2;
B3 = *b3;
B4 = *b4;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
*a3 = A4;
*a4 = A3;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
*a4 = B4;
*b4 = A4;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -535,13 +535,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a3 = A4;
*a4 = B3;
*b3 = A3;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
*a3 = B3;
*b3 = A3;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -560,13 +560,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b4 = A4;
}
}
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
#ifndef MINUS
a1 += 2;
a3 += 2;
@@ -576,13 +576,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#endif
i --;
}
//Loop Ending
B1 = *b1;
B2 = *b2;
B3 = *b3;
B4 = *b4;
A1 = *a1;
A2 = *a2;
A3 = *a3;
@@ -594,14 +594,14 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a2 = A1;
*a3 = A4;
*a4 = A3;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
*a4 = B4;
*b4 = A4;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -626,13 +626,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a3 = A4;
*a4 = B3;
*b3 = A3;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
*a3 = B3;
*b3 = A3;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -660,7 +660,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#endif
i = ((rows) & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
@@ -675,7 +675,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a3 = B3;
*b3 = A3;
}
a += 2 * lda;
}
@@ -687,15 +687,15 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#else
a1 = a + k2;
#endif
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
i = ((rows) >> 1);
i --;
@@ -704,22 +704,22 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
A2 = *a2;
B1 = *b1;
B2 = *b2;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -736,11 +736,11 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a1 = A2;
*a2 = B1;
*b1 = A1;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -752,10 +752,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b2 = A2;
}
}
b1 = a + ip1;
b2 = a + ip2;
#ifndef MINUS
a1 += 2;
#else
@@ -763,7 +763,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#endif
i --;
}
//Loop Ending (n=1)
A1 = *a1;
A2 = *a2;
@@ -773,12 +773,12 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -795,11 +795,11 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a1 = A2;
*a2 = B1;
*b1 = A1;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -817,13 +817,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#else
a1 -= 2;
#endif
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
b1 = a + ip1;
A1 = *a1;
B1 = *b1;
@@ -833,5 +833,5 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
}
return 0;
}
}

View File

@@ -59,7 +59,7 @@
#define a16 (a15 - 1)
#endif
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG lda,
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG lda,
FLOAT *dummy2, BLASLONG dumy3, blasint *ipiv, BLASLONG incx){
BLASLONG i, j, ip1, ip2, rows;
@@ -74,7 +74,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
FLOAT A5, A6, B5, B6, A7, A8, B7, B8;
FLOAT A9, A10, B9, B10, A11, A12, B11, B12;
FLOAT A13, A14, B13, B14, A15, A16, B15, B16;
a--;
k1 --;
@@ -93,7 +93,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
ip1 = *ipiv;
a1 = a + k1 + 1;
b1 = a + ip1;
if(a1 == b1) return 0;
for(j=0; j<n; j++){
@@ -118,7 +118,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#else
a1 = a + k2;
#endif
a3 = a1 + 1 * lda;
a5 = a1 + 2 * lda;
a7 = a1 + 3 * lda;
@@ -131,10 +131,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
piv += incx;
ip2 = *piv;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
b5 = b1 + 2 * lda;
@@ -164,7 +164,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
B6 = *b6;
B7 = *b7;
B8 = *b8;
B9 = *b9;
B10 = *b10;
B11 = *b11;
@@ -173,7 +173,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
B14 = *b14;
B15 = *b15;
B16 = *b16;
A1 = *a1;
A2 = *a2;
A3 = *a3;
@@ -196,7 +196,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
piv += incx;
ip2 = *piv;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
@@ -215,7 +215,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a14 = A13;
*a15 = A16;
*a16 = A15;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
@@ -235,7 +235,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a16 = B16;
*b16 = A16;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -311,7 +311,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a15 = A16;
*a16 = B15;
*b15 = A15;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
@@ -330,7 +330,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b13 = A13;
*a15 = B15;
*b15 = A15;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -393,17 +393,17 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b16 = A16;
}
}
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
b5 = b1 + 2 * lda;
b6 = b2 + 2 * lda;
b7 = b1 + 3 * lda;
b8 = b2 + 3 * lda;
b9 = b1 + 4 * lda;
b10 = b2 + 4 * lda;
b11 = b1 + 5 * lda;
@@ -443,7 +443,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
B6 = *b6;
B7 = *b7;
B8 = *b8;
B9 = *b9;
B10 = *b10;
B11 = *b11;
@@ -452,7 +452,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
B14 = *b14;
B15 = *b15;
B16 = *b16;
A1 = *a1;
A2 = *a2;
A3 = *a3;
@@ -488,7 +488,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a14 = A13;
*a15 = A16;
*a16 = A15;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
@@ -508,7 +508,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a16 = B16;
*b16 = A16;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -584,7 +584,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a15 = A16;
*a16 = B15;
*b15 = A15;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
@@ -603,7 +603,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b13 = A13;
*a15 = B15;
*b15 = A15;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -666,7 +666,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b16 = A16;
}
}
#ifndef MINUS
a1 += 2;
@@ -686,10 +686,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
a11 -= 2;
a13 -= 2;
a15 -= 2;
#endif
#endif
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
@@ -697,7 +697,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
b5 = b1 + 2 * lda;
b7 = b1 + 3 * lda;
b9 = b1 + 4 * lda;
b11 = b1 + 5 * lda;
b13 = b1 + 6 * lda;
@@ -740,9 +740,9 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a15 = B15;
*b15 = A15;
}
a += 8 * lda;
j --;
} while (j > 0);
}
@@ -755,19 +755,19 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#else
a1 = a + k2;
#endif
a3 = a1 + 1 * lda;
a5 = a1 + 2 * lda;
a7 = a1 + 3 * lda;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
b5 = b1 + 2 * lda;
@@ -787,7 +787,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
A6 = *a6;
A7 = *a7;
A8 = *a8;
B1 = *b1;
B2 = *b2;
B3 = *b3;
@@ -796,12 +796,12 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
B6 = *b6;
B7 = *b7;
B8 = *b8;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
@@ -812,7 +812,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a6 = A5;
*a7 = A8;
*a8 = A7;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
@@ -823,7 +823,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a8 = B8;
*b8 = A8;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -864,7 +864,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a7 = A8;
*a8 = B7;
*b7 = A7;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
@@ -874,7 +874,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b5 = A5;
*a7 = B7;
*b7 = A7;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -907,17 +907,17 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b8 = A8;
}
}
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
b5 = b1 + 2 * lda;
b6 = b2 + 2 * lda;
b7 = b1 + 3 * lda;
b8 = b2 + 3 * lda;
#ifndef MINUS
a1 += 2;
a3 += 2;
@@ -959,7 +959,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a6 = A5;
*a7 = A8;
*a8 = A7;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
@@ -970,7 +970,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a8 = B8;
*b8 = A8;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -1011,7 +1011,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a7 = A8;
*a8 = B7;
*b7 = A7;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
@@ -1021,7 +1021,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b5 = A5;
*a7 = B7;
*b7 = A7;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -1068,7 +1068,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#endif
i = (rows & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
@@ -1094,7 +1094,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a7 = B7;
*b7 = A7;
}
a += 4 * lda;
}
@@ -1106,20 +1106,20 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#else
a1 = a + k2;
#endif
a3 = a1 + 1 * lda;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
i = ((rows) >> 1);
i--;
@@ -1128,31 +1128,31 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
A2 = *a2;
A3 = *a3;
A4 = *a4;
B1 = *b1;
B2 = *b2;
B3 = *b3;
B4 = *b4;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
*a3 = A4;
*a4 = A3;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
*a4 = B4;
*b4 = A4;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -1177,13 +1177,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a3 = A4;
*a4 = B3;
*b3 = A3;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
*a3 = B3;
*b3 = A3;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -1202,13 +1202,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b4 = A4;
}
}
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
#ifndef MINUS
a1 += 2;
a3 += 2;
@@ -1218,13 +1218,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#endif
i --;
}
//Loop Ending
B1 = *b1;
B2 = *b2;
B3 = *b3;
B4 = *b4;
A1 = *a1;
A2 = *a2;
A3 = *a3;
@@ -1236,14 +1236,14 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a2 = A1;
*a3 = A4;
*a4 = A3;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
*a4 = B4;
*b4 = A4;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -1268,13 +1268,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a3 = A4;
*a4 = B3;
*b3 = A3;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
*a3 = B3;
*b3 = A3;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -1302,7 +1302,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#endif
i = ((rows) & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
@@ -1317,7 +1317,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a3 = B3;
*b3 = A3;
}
a += 2 * lda;
}
@@ -1329,15 +1329,15 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#else
a1 = a + k2;
#endif
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
i = ((rows) >> 1);
i --;
@@ -1346,22 +1346,22 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
A2 = *a2;
B1 = *b1;
B2 = *b2;
ip1 = *piv;
piv += incx;
ip2 = *piv;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -1378,11 +1378,11 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a1 = A2;
*a2 = B1;
*b1 = A1;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -1394,10 +1394,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b2 = A2;
}
}
b1 = a + ip1;
b2 = a + ip2;
#ifndef MINUS
a1 += 2;
#else
@@ -1405,7 +1405,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#endif
i --;
}
//Loop Ending (n=1)
A1 = *a1;
A2 = *a2;
@@ -1415,12 +1415,12 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
if (b2 == a1) {
*a1 = A2;
*a2 = A1;
} else
} else
if (b2 != a2) {
*a2 = B2;
*b2 = A2;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -1437,11 +1437,11 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a1 = A2;
*a2 = B1;
*b1 = A1;
} else
} else
if (b2 == a2) {
*a1 = B1;
*b1 = A1;
} else
} else
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -1459,13 +1459,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
#else
a1 -= 2;
#endif
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv;
b1 = a + ip1;
b1 = a + ip1;
A1 = *a1;
B1 = *b1;
@@ -1475,5 +1475,5 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
}
return 0;
}
}

View File

@@ -45,8 +45,8 @@
#define a2 (a1 - 2)
#endif
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
FLOAT *a, BLASLONG lda,
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
FLOAT *a, BLASLONG lda,
FLOAT *dummy2, BLASLONG dumy3, blasint *ipiv, BLASLONG incx){
BLASLONG i, j, ip1, ip2, rows;
@@ -79,7 +79,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
#endif
b1 = a + ip1;
if(a1 == b1) return 0;
for(j=0; j<n; j++){
@@ -92,7 +92,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
a1 += lda;
b1 += lda;
}
@@ -114,10 +114,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
piv += incx;
ip2 = *piv * 2;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
i = ((k2 - k1) >> 1);
i --;
//Loop pipeline
@@ -152,26 +152,26 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
ip1 = *piv * 2;
piv += incx;
ip2 = *piv * 2;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
} else
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -196,13 +196,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a2 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -221,10 +221,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b2 + 1) = A4;
}
}
b1 = a + ip1;
b2 = a + ip2;
#ifndef MINUS
a1 += 4;
#else
@@ -243,22 +243,22 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
} else
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -283,13 +283,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a2 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -308,8 +308,8 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b2 + 1) = A4;
}
}
#ifndef MINUS
a1 += 4;
#else
@@ -318,7 +318,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv * 2;
b1 = a + ip1;
@@ -332,13 +332,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b1 + 0) = A1;
*(b1 + 1) = A2;
}
a += lda;
j --;
} while (j > 0);
}
return 0;
}
}

View File

@@ -45,8 +45,8 @@
#define a2 (a1 - 2)
#endif
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
FLOAT *a, BLASLONG lda,
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
FLOAT *a, BLASLONG lda,
FLOAT *dummy2, BLASLONG dumy3, blasint *ipiv, BLASLONG incx){
BLASLONG i, j, ip1, ip2, rows;
@@ -81,7 +81,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
#endif
b1 = a + ip1;
if(a1 == b1) return 0;
for(j=0; j<n; j++){
@@ -94,7 +94,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
a1 += lda;
b1 += lda;
}
@@ -116,10 +116,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
piv += incx;
ip2 = *piv * 2;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
i = (rows >> 1);
i--;
@@ -154,7 +154,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
B5 = *(b1 + 0 + lda);
B6 = *(b1 + 1 + lda);
B7 = *(b2 + 0 + lda);
@@ -164,7 +164,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
piv += incx;
ip2 = *piv * 2;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
@@ -175,7 +175,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a1 + 1 + lda) = A8;
*(a2 + 0 + lda) = A5;
*(a2 + 1 + lda) = A6;
} else
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
@@ -186,7 +186,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b2 + 0 + lda) = A7;
*(b2 + 1 + lda) = A8;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -227,7 +227,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a2 + 1 + lda) = B6;
*(b1 + 0 + lda) = A5;
*(b1 + 1 + lda) = A6;
} else
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -237,7 +237,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a1 + 1 + lda) = B6;
*(b1 + 0 + lda) = A5;
*(b1 + 1 + lda) = A6;
} else
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -270,10 +270,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b2 + 1 + lda) = A8;
}
}
b1 = a + ip1;
b2 = a + ip2;
#ifndef MINUS
a1 += 4;
#else
@@ -296,7 +296,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
B5 = *(b1 + 0 + lda);
B6 = *(b1 + 1 + lda);
B7 = *(b2 + 0 + lda);
@@ -312,7 +312,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a1 + 1 + lda) = A8;
*(a2 + 0 + lda) = A5;
*(a2 + 1 + lda) = A6;
} else
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
@@ -323,7 +323,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b2 + 0 + lda) = A7;
*(b2 + 1 + lda) = A8;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -364,7 +364,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a2 + 1 + lda) = B6;
*(b1 + 0 + lda) = A5;
*(b1 + 1 + lda) = A6;
} else
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -374,7 +374,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a1 + 1 + lda) = B6;
*(b1 + 0 + lda) = A5;
*(b1 + 1 + lda) = A6;
} else
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -407,9 +407,9 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b2 + 1 + lda) = A8;
}
}
#ifndef MINUS
a1 += 4;
#else
@@ -418,7 +418,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv * 2;
b1 = a + ip1;
@@ -440,30 +440,30 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b1 + 0 + lda) = A3;
*(b1 + 1 + lda) = A4;
}
a += 2 * lda;
j --;
} while (j > 0);
}
if (n & 1) {
piv = ipiv;
#ifndef MINUS
a1 = a + (k1 + 1) * 2;
#else
a1 = a + k2 * 2;
#endif
ip1 = *piv * 2;
piv += incx;
ip2 = *piv * 2;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
i = (rows >> 1);
i--;
@@ -478,26 +478,26 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
ip1 = *piv * 2;
piv += incx;
ip2 = *piv * 2;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
} else
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -522,13 +522,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a2 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -547,10 +547,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b2 + 1) = A4;
}
}
b1 = a + ip1;
b2 = a + ip2;
#ifndef MINUS
a1 += 4;
#else
@@ -567,21 +567,21 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
} else
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -606,13 +606,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a2 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -631,16 +631,16 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b2 + 1) = A4;
}
}
#ifndef MINUS
a1 += 4;
#else
a1 -= 4;
#endif
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv * 2;
b1 = a + ip1;
@@ -657,5 +657,5 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
}
return 0;
}
}

View File

@@ -51,8 +51,8 @@
#define a8 (a7 - 2)
#endif
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
FLOAT *a, BLASLONG lda,
int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
FLOAT *a, BLASLONG lda,
FLOAT *dummy2, BLASLONG dumy3, blasint *ipiv, BLASLONG incx){
BLASLONG i, j, ip1, ip2, rows;
@@ -89,7 +89,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
#endif
b1 = a + ip1;
if(a1 == b1) return 0;
for(j=0; j<n; j++){
@@ -102,7 +102,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
a1 += lda;
b1 += lda;
}
@@ -128,7 +128,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
piv += incx;
ip2 = *piv * 2;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
@@ -185,7 +185,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
piv += incx;
ip2 = *piv * 2;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
@@ -204,7 +204,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a7 + 1) = A16;
*(a8 + 0) = A13;
*(a8 + 1) = A14;
} else
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
@@ -223,7 +223,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b8 + 0) = A15;
*(b8 + 1) = A16;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -297,7 +297,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a8 + 1) = B14;
*(b7 + 0) = A13;
*(b7 + 1) = A14;
} else
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -315,7 +315,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a7 + 1) = B14;
*(b7 + 0) = A13;
*(b7 + 1) = A14;
} else
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -377,10 +377,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b8 + 1) = A16;
}
}
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + 1 * lda;
b4 = b2 + 1 * lda;
b5 = b1 + 2 * lda;
@@ -401,7 +401,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
#endif
i --;
}
//Loop Ending
A1 = *(a1 + 0);
A2 = *(a1 + 1);
@@ -438,7 +438,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
B14 = *(b7 + 1);
B15 = *(b8 + 0);
B16 = *(b8 + 1);
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
@@ -457,7 +457,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a7 + 1) = A16;
*(a8 + 0) = A13;
*(a8 + 1) = A14;
} else
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
@@ -476,7 +476,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b8 + 0) = A15;
*(b8 + 1) = A16;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -550,7 +550,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a8 + 1) = B14;
*(b7 + 0) = A13;
*(b7 + 1) = A14;
} else
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -568,7 +568,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a7 + 1) = B14;
*(b7 + 0) = A13;
*(b7 + 1) = A14;
} else
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -630,7 +630,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b8 + 1) = A16;
}
}
#ifndef MINUS
a1 += 4;
a3 += 4;
@@ -644,7 +644,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
#endif
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv * 2;
b1 = a + ip1;
@@ -688,9 +688,9 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b7 + 0) = A7;
*(b7 + 1) = A8;
}
a += 4 * lda;
j --;
} while (j > 0);
}
@@ -705,18 +705,18 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
#endif
a3 = a1 + lda;
ip1 = *piv * 2;
piv += incx;
ip2 = *piv * 2;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + lda;
b4 = b2 + lda;
i = (rows >> 1);
i--;
@@ -727,7 +727,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
A2 = *(a1 + 1);
A3 = *(a2 + 0);
A4 = *(a2 + 1);
A5 = *(a3 + 0);
A6 = *(a3 + 1);
A7 = *(a4 + 0);
@@ -737,17 +737,17 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
B5 = *(b3 + 0);
B6 = *(b3 + 1);
B7 = *(b4 + 0);
B8 = *(b4 + 1);
ip1 = *piv * 2;
piv += incx;
ip2 = *piv * 2;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
@@ -758,7 +758,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a3 + 1) = A8;
*(a4 + 0) = A5;
*(a4 + 1) = A6;
} else
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
@@ -769,7 +769,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b4 + 0) = A7;
*(b4 + 1) = A8;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -810,7 +810,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a4 + 1) = B6;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
} else
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -820,7 +820,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a3 + 1) = B6;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
} else
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -853,13 +853,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b4 + 1) = A8;
}
}
b1 = a + ip1;
b2 = a + ip2;
b3 = b1 + lda;
b4 = b2 + lda;
#ifndef MINUS
a1 += 4;
a3 += 4;
@@ -874,7 +874,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
A2 = *(a1 + 1);
A3 = *(a2 + 0);
A4 = *(a2 + 1);
A5 = *(a3 + 0);
A6 = *(a3 + 1);
A7 = *(a4 + 0);
@@ -884,13 +884,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
B5 = *(b3 + 0);
B6 = *(b3 + 1);
B7 = *(b4 + 0);
B8 = *(b4 + 1);
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
@@ -901,7 +901,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a3 + 1) = A8;
*(a4 + 0) = A5;
*(a4 + 1) = A6;
} else
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
@@ -912,7 +912,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b4 + 0) = A7;
*(b4 + 1) = A8;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -953,7 +953,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a4 + 1) = B6;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
} else
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -963,7 +963,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a3 + 1) = B6;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
} else
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -996,7 +996,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b4 + 1) = A8;
}
}
#ifndef MINUS
a1 += 4;
a3 += 4;
@@ -1007,7 +1007,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv * 2;
@@ -1031,28 +1031,28 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b3 + 0) = A3;
*(b3 + 1) = A4;
}
a += 2 * lda;
}
if (n & 1) {
piv = ipiv;
#ifndef MINUS
a1 = a + (k1 + 1) * 2;
#else
a1 = a + k2 * 2;
#endif
ip1 = *piv * 2;
piv += incx;
ip2 = *piv * 2;
piv += incx;
b1 = a + ip1;
b2 = a + ip2;
i = (rows >> 1);
i--;
@@ -1067,26 +1067,26 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
ip1 = *piv * 2;
piv += incx;
ip2 = *piv * 2;
piv += incx;
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
} else
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -1111,13 +1111,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a2 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -1136,10 +1136,10 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b2 + 1) = A4;
}
}
b1 = a + ip1;
b2 = a + ip2;
#ifndef MINUS
a1 += 4;
#else
@@ -1156,21 +1156,21 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
B2 = *(b1 + 1);
B3 = *(b2 + 0);
B4 = *(b2 + 1);
if (b1 == a1) {
if (b2 == a1) {
*(a1 + 0) = A3;
*(a1 + 1) = A4;
*(a2 + 0) = A1;
*(a2 + 1) = A2;
} else
} else
if (b2 != a2) {
*(a2 + 0) = B3;
*(a2 + 1) = B4;
*(b2 + 0) = A3;
*(b2 + 1) = A4;
}
} else
} else
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -1195,13 +1195,13 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(a2 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
} else
if (b2 == a2) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
*(b1 + 0) = A1;
*(b1 + 1) = A2;
} else
} else
if (b2 == b1) {
*(a1 + 0) = B1;
*(a1 + 1) = B2;
@@ -1220,16 +1220,16 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
*(b2 + 1) = A4;
}
}
#ifndef MINUS
a1 += 4;
#else
a1 -= 4;
#endif
//Remain
i = (rows & 1);
if (i > 0) {
ip1 = *piv * 2;
b1 = a + ip1;
@@ -1246,5 +1246,5 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT dummy4,
}
return 0;
}
}

View File

@@ -52,7 +52,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
@@ -61,13 +61,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
for (i = 0; i < n; i++) {
SCAL_K(i + 1, 0, 0, *(a + i + i * lda), a + i, lda, NULL, 0, NULL, 0);
if (i < n - 1) {
aii = DOTU_K(n - i - 1, a + i + 1 + i * lda, 1, a + i + 1 + i * lda, 1);
*(a + i + i * lda) += aii;
GEMV_T(n - i - 1, i, 0, dp1,
GEMV_T(n - i - 1, i, 0, dp1,
a + (i + 1) , lda,
a + (i + 1) + i * lda, 1,
a + i , lda, sb);

View File

@@ -52,7 +52,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
@@ -61,13 +61,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
for (i = 0; i < n; i++) {
SCAL_K(i + 1, 0, 0, *(a + i + i * lda), a + i * lda, 1, NULL, 0, NULL, 0);
if (i < n - 1) {
aii = DOTU_K(n - i - 1, a + i + (i + 1)* lda, lda, a + i + (i + 1) * lda, lda);
*(a + i + i * lda) += aii;
GEMV_N(i, n - i - 1, 0, dp1,
GEMV_N(i, n - i - 1, 0, dp1,
a + (i + 1) * lda, lda,
a + i + (i + 1) * lda, lda,
a + i * lda, 1, sb);

View File

@@ -52,7 +52,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
@@ -62,16 +62,16 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
SCAL_K(i + 1, 0, 0, *(a + (i + i * lda) * COMPSIZE + 0), ZERO,
a + i * COMPSIZE, lda, NULL, 0, NULL, 0);
if (i < n - 1) {
temp[0] = DOTC_K(n - i - 1,
a + (i + 1 + i * lda) * COMPSIZE, 1,
a + (i + 1 + i * lda) * COMPSIZE, 1);
GET_IMAGE(temp[1]);
*(a + (i + i * lda) * COMPSIZE + 0) += temp[0];
*(a + (i + i * lda) * COMPSIZE + 1) = ZERO;
GEMV_U(n - i - 1, i, 0, dp1, ZERO,
a + ((i + 1) ) * COMPSIZE, lda,
a + ((i + 1) + i * lda) * COMPSIZE, 1,

View File

@@ -52,24 +52,24 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
}
for (i = 0; i < n; i++) {
SCAL_K(i + 1, 0, 0,
SCAL_K(i + 1, 0, 0,
*(a + (i + i * lda) * COMPSIZE + 0), ZERO,
a + i * lda * COMPSIZE, 1, NULL, 0, NULL, 0);
if (i < n - 1) {
temp[0] = DOTC_K(n - i - 1, a + (i + (i + 1) * lda) * COMPSIZE, lda, a + (i + (i + 1) * lda) * COMPSIZE, lda);
GET_IMAGE(temp[1]);
*(a + (i + i * lda) * COMPSIZE + 0) += temp[0];
*(a + (i + i * lda) * COMPSIZE + 1) = ZERO;
GEMV_O(i, n - i - 1, 0, dp1, ZERO,
a + ( (i + 1) * lda) * COMPSIZE, lda,
a + (i + (i + 1) * lda) * COMPSIZE, lda,

View File

@@ -54,7 +54,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_REAL;
#endif
#endif
#else
#ifdef XDOUBLE
mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -62,11 +62,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
#endif
#endif
if (args -> nthreads == 1) {
LAUUM_L_SINGLE(args, NULL, NULL, sa, sb, 0);
LAUUM_L_SINGLE(args, NULL, NULL, sa, sb, 0);
return 0;
}
@@ -87,7 +87,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.alpha = alpha;
newarg.beta = NULL;
newarg.nthreads = args -> nthreads;
blocking = (n / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
if (blocking > GEMM_Q) blocking = GEMM_Q;
@@ -95,7 +95,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
bk = n - i;
if (bk > blocking) bk = blocking;
newarg.n = i;
newarg.k = bk;
newarg.a = a + i * COMPSIZE;
@@ -118,6 +118,6 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
CNAME(&newarg, NULL, NULL, sa, sb, 0);
}
return 0;
}

View File

@@ -91,7 +91,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
@@ -107,11 +107,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
for (j = 0; j < n; j += blocking) {
bk = MIN(blocking, n - j);
if (j > 0 ){
TRMM_ILNCOPY(bk, bk, a + (j + j * lda) * COMPSIZE, lda, 0, 0, sb);
for (ls = 0; ls < j; ls += REAL_GEMM_R) {
min_l = j - ls;
if (min_l > REAL_GEMM_R) min_l = REAL_GEMM_R;
@@ -127,97 +127,97 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
} else {
aa = sb2;
}
for (jjs = ls; jjs < ls + min_l; jjs += GEMM_P){
min_jj = ls + min_l - jjs;
if (min_jj > GEMM_P) min_jj = GEMM_P;
GEMM_ONCOPY(bk, min_jj, a + (j + jjs * lda) * COMPSIZE, lda, sb2 + (jjs - ls) * bk * COMPSIZE);
SYRK_KERNEL(min_i, min_jj, bk, dp1,
aa,
sb2 + (jjs - ls) * bk * COMPSIZE,
a + (ls + jjs * lda) * COMPSIZE, lda,
SYRK_KERNEL(min_i, min_jj, bk, dp1,
aa,
sb2 + (jjs - ls) * bk * COMPSIZE,
a + (ls + jjs * lda) * COMPSIZE, lda,
ls - jjs);
}
for(is = ls + min_i; is < j ; is += GEMM_P){
min_i = j - is;
if (min_i > GEMM_P) min_i = GEMM_P;
GEMM_INCOPY(bk, min_i, a + (j + is * lda)* COMPSIZE, lda, sa);
SYRK_KERNEL(min_i, min_l, bk, dp1,
sa,
sb2,
a + (is + ls * lda) * COMPSIZE, lda,
SYRK_KERNEL(min_i, min_l, bk, dp1,
sa,
sb2,
a + (is + ls * lda) * COMPSIZE, lda,
is - ls);
}
for (ks = 0; ks < bk; ks += GEMM_P) {
min_k = bk - ks;
if (min_k > GEMM_P) min_k = GEMM_P;
TRMM_KERNEL(min_k, min_l, bk, dp1,
#ifdef COMPLEX
ZERO,
#endif
sb + ks * bk * COMPSIZE,
sb2,
a + (ks + j + ls * lda) * COMPSIZE, lda, ks);
a + (ks + j + ls * lda) * COMPSIZE, lda, ks);
}
#else
min_i = j - ls;
if (min_i > GEMM_P) min_i = GEMM_P;
GEMM_INCOPY(bk, min_i, a + (j + ls * lda)* COMPSIZE, lda, sa);
for (jjs = ls; jjs < ls + min_l; jjs += GEMM_P){
min_jj = ls + min_l - jjs;
if (min_jj > GEMM_P) min_jj = GEMM_P;
GEMM_ONCOPY(bk, min_jj, a + (j + jjs * lda) * COMPSIZE, lda, sb2 + (jjs - ls) * bk * COMPSIZE);
SYRK_KERNEL(min_i, min_jj, bk, dp1,
sa,
sb2 + (jjs - ls) * bk * COMPSIZE,
a + (ls + jjs * lda) * COMPSIZE, lda,
SYRK_KERNEL(min_i, min_jj, bk, dp1,
sa,
sb2 + (jjs - ls) * bk * COMPSIZE,
a + (ls + jjs * lda) * COMPSIZE, lda,
ls - jjs);
}
for(is = ls + min_i; is < j ; is += GEMM_P){
min_i = j - is;
if (min_i > GEMM_P) min_i = GEMM_P;
GEMM_INCOPY(bk, min_i, a + (j + is * lda)* COMPSIZE, lda, sa);
SYRK_KERNEL(min_i, min_l, bk, dp1,
sa,
sb2,
a + (is + ls * lda) * COMPSIZE, lda,
SYRK_KERNEL(min_i, min_l, bk, dp1,
sa,
sb2,
a + (is + ls * lda) * COMPSIZE, lda,
is - ls);
}
for (ks = 0; ks < bk; ks += GEMM_P) {
min_k = bk - ks;
if (min_k > GEMM_P) min_k = GEMM_P;
TRMM_KERNEL(min_k, min_l, bk, dp1,
#ifdef COMPLEX
ZERO,
#endif
sb + ks * bk * COMPSIZE,
sb2,
a + (ks + j + ls * lda) * COMPSIZE, lda, ks);
a + (ks + j + ls * lda) * COMPSIZE, lda, ks);
}
#endif
}
}
if (!range_n) {
range_N[0] = j;
range_N[1] = j + bk;
@@ -225,9 +225,9 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
range_N[0] = range_n[0] + j;
range_N[1] = range_n[0] + j + bk;
}
CNAME(args, NULL, range_N, sa, sb, 0);
}
return 0;

View File

@@ -54,7 +54,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_REAL;
#endif
#endif
#else
#ifdef XDOUBLE
mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -62,11 +62,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
#endif
#endif
if (args -> nthreads == 1) {
LAUUM_U_SINGLE(args, NULL, NULL, sa, sb, 0);
LAUUM_U_SINGLE(args, NULL, NULL, sa, sb, 0);
return 0;
}
@@ -95,7 +95,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
bk = n - i;
if (bk > blocking) bk = blocking;
newarg.n = i;
newarg.k = bk;
newarg.a = a + ( i * lda) * COMPSIZE;
@@ -118,6 +118,6 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
CNAME(&newarg, NULL, NULL, sa, sb, 0);
}
return 0;
}

View File

@@ -91,7 +91,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
@@ -117,74 +117,74 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
min_l = j - ls;
#if 0
if (min_l > REAL_GEMM_R) min_l = REAL_GEMM_R;
min_i = ls + min_l;
if (min_i > GEMM_P) min_i = GEMM_P;
if (ls > 0) {
GEMM_ITCOPY(bk, min_i, a + (j * lda) * COMPSIZE, lda, sa);
aa = sa;
} else {
aa = sb2;
}
for (jjs = ls; jjs < ls + min_l; jjs += GEMM_P){
min_jj = ls + min_l - jjs;
if (min_jj > GEMM_P) min_jj = GEMM_P;
GEMM_OTCOPY(bk, min_jj, a + (jjs + j * lda) * COMPSIZE, lda, sb2 + (jjs - ls) * bk * COMPSIZE);
SYRK_KERNEL(min_i, min_jj, bk, dp1,
aa,
sb2 + (jjs - ls) * bk * COMPSIZE,
SYRK_KERNEL(min_i, min_jj, bk, dp1,
aa,
sb2 + (jjs - ls) * bk * COMPSIZE,
a + (jjs * lda) * COMPSIZE, lda, - jjs);
}
if (ls + REAL_GEMM_R >= j ) {
for (ks = 0; ks < bk; ks += GEMM_P) {
min_k = bk - ks;
if (min_k > GEMM_P) min_k = GEMM_P;
TRMM_KERNEL(min_i, min_k, bk, dp1,
#ifdef COMPLEX
ZERO,
#endif
aa,
sb + ks * bk * COMPSIZE,
a + ((ks + j) * lda) * COMPSIZE, lda, -ks);
a + ((ks + j) * lda) * COMPSIZE, lda, -ks);
}
}
for(is = min_i; is < ls + min_l ; is += GEMM_P){
min_i = ls + min_l - is;
if (min_i > GEMM_P) min_i = GEMM_P;
if (is < ls) {
GEMM_ITCOPY(bk, min_i, a + (is + j * lda) * COMPSIZE, lda, sa);
aa = sa;
} else {
aa = sb2 + (is - ls) * bk * COMPSIZE;
}
SYRK_KERNEL(min_i, min_l, bk, dp1,
aa,
sb2,
SYRK_KERNEL(min_i, min_l, bk, dp1,
aa,
sb2,
a + (is + ls * lda) * COMPSIZE, lda, is - ls);
if (ls + REAL_GEMM_R >= j ) {
for (ks = 0; ks < bk; ks += GEMM_P) {
min_k = bk - ks;
if (min_k > GEMM_P) min_k = GEMM_P;
TRMM_KERNEL(min_i, min_k, bk, dp1,
#ifdef COMPLEX
ZERO,
#endif
aa,
sb + ks * bk * COMPSIZE,
a + (is + (ks + j) * lda) * COMPSIZE, lda, -ks);
a + (is + (ks + j) * lda) * COMPSIZE, lda, -ks);
}
}
}
@@ -198,12 +198,12 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
for (jjs = ls; jjs < ls + min_l; jjs += GEMM_P){
min_jj = ls + min_l - jjs;
if (min_jj > GEMM_P) min_jj = GEMM_P;
GEMM_OTCOPY(bk, min_jj, a + (jjs + j * lda) * COMPSIZE, lda, sb2 + (jjs - ls) * bk * COMPSIZE);
SYRK_KERNEL(min_i, min_jj, bk, dp1,
sa,
sb2 + (jjs - ls) * bk * COMPSIZE,
SYRK_KERNEL(min_i, min_jj, bk, dp1,
sa,
sb2 + (jjs - ls) * bk * COMPSIZE,
a + (jjs * lda) * COMPSIZE, lda, - jjs);
}
@@ -211,40 +211,40 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
for (ks = 0; ks < bk; ks += GEMM_P) {
min_k = bk - ks;
if (min_k > GEMM_P) min_k = GEMM_P;
TRMM_KERNEL(min_i, min_k, bk, dp1,
#ifdef COMPLEX
ZERO,
#endif
sa,
sb + ks * bk * COMPSIZE,
a + ((ks + j) * lda) * COMPSIZE, lda, -ks);
a + ((ks + j) * lda) * COMPSIZE, lda, -ks);
}
}
for(is = min_i; is < ls + min_l ; is += GEMM_P){
min_i = ls + min_l - is;
if (min_i > GEMM_P) min_i = GEMM_P;
GEMM_ITCOPY(bk, min_i, a + (is + j * lda) * COMPSIZE, lda, sa);
SYRK_KERNEL(min_i, min_l, bk, dp1,
sa,
sb2,
SYRK_KERNEL(min_i, min_l, bk, dp1,
sa,
sb2,
a + (is + ls * lda) * COMPSIZE, lda, is - ls);
if (ls + REAL_GEMM_R >= j ) {
for (ks = 0; ks < bk; ks += GEMM_P) {
min_k = bk - ks;
if (min_k > GEMM_P) min_k = GEMM_P;
TRMM_KERNEL(min_i, min_k, bk, dp1,
#ifdef COMPLEX
ZERO,
#endif
sa,
sb + ks * bk * COMPSIZE,
a + (is + (ks + j) * lda) * COMPSIZE, lda, -ks);
a + (is + (ks + j) * lda) * COMPSIZE, lda, -ks);
}
}
}
@@ -259,7 +259,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
range_N[0] = range_n[0] + j;
range_N[1] = range_n[0] + j + bk;
}
CNAME(args, NULL, range_N, sa, sb, 0);
}

View File

@@ -59,7 +59,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
@@ -81,11 +81,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
i = n - j - 1;
if (i > 0) {
GEMV_N(i, j, 0, dm1,
GEMV_N(i, j, 0, dm1,
a + j + 1, lda,
a + j, lda,
aoffset + j + 1, 1, sb);
SCAL_K(i, 0, 0, dp1 / ajj,
aoffset + j + 1, 1, NULL, 0, NULL, 0);
}

View File

@@ -58,7 +58,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
@@ -78,11 +78,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
i = n - j - 1;
if (i > 0) {
GEMV_T(j, i, 0, dm1,
GEMV_T(j, i, 0, dm1,
a + lda, lda,
a, 1,
a + j + lda, lda, sb);
SCAL_K(i, 0, 0, dp1 / ajj,
a + j + lda, lda, NULL, 0, NULL, 0);
}

View File

@@ -58,7 +58,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
@@ -89,7 +89,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
a + (j + 1) * 2, lda,
a + j * 2, lda,
aoffset + (j + 1) * 2, 1, sb);
SCAL_K(i, 0, 0, ONE / ajj[0], ZERO,
aoffset + (j + 1) * 2, 1, NULL, 0, NULL, 0);
}

View File

@@ -57,7 +57,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
@@ -68,7 +68,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
ajj[0] = DOTC_K(j, a, 1, a, 1);
GET_IMAGE(ajj[1]);
ajj[0] = *(a + j * 2) - ajj[0];
ajj[0] = *(a + j * 2) - ajj[0];
if (ajj[0] <= 0){
*(a + j * 2 + 0) = ajj[0];
@@ -87,7 +87,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
a + lda * 2, lda,
a, 1,
a + (j + lda) * 2, lda, sb);
SCAL_K(i, 0, 0, ONE / ajj[0], ZERO,
a + (j + lda) * 2, lda, NULL, 0, NULL, 0);
}

View File

@@ -55,7 +55,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_REAL;
#endif
#endif
#else
#ifdef XDOUBLE
mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -63,11 +63,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
#endif
#endif
if (args -> nthreads == 1) {
info = POTRF_L_SINGLE(args, NULL, NULL, sa, sb, 0);
info = POTRF_L_SINGLE(args, NULL, NULL, sa, sb, 0);
return info;
}
@@ -91,7 +91,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
blocking = (n / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
if (blocking > GEMM_Q) blocking = GEMM_Q;
for (i = 0; i < n; i += blocking) {
bk = n - i;
if (bk > blocking) bk = blocking;
@@ -108,15 +108,15 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.n = bk;
newarg.a = a + (i + i * lda) * COMPSIZE;
newarg.b = a + (i + bk + i * lda) * COMPSIZE;
gemm_thread_m(mode | BLAS_RSIDE | BLAS_TRANSA_T | BLAS_UPLO,
&newarg, NULL, NULL, (void *)TRSM_RCLN, sa, sb, args -> nthreads);
newarg.n = n - i - bk;
newarg.k = bk;
newarg.a = a + (i + bk + i * lda) * COMPSIZE;
newarg.c = a + (i + bk + (i + bk) * lda) * COMPSIZE;
#ifndef USE_SIMPLE_THREADED_LEVEL3
HERK_THREAD_LN(&newarg, NULL, NULL, sa, sb, 0);
#else

View File

@@ -100,7 +100,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
@@ -129,7 +129,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
if (info) return info + j;
if (n - j - bk > 0) {
TRSM_OLTCOPY(bk, bk, a + (j + j * lda) * COMPSIZE, lda, 0, sb);
/* First tile */
@@ -147,9 +147,9 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
} else {
aa = sa;
}
GEMM_ITCOPY(bk, min_i, a + (is + j * lda) * COMPSIZE, lda, aa);
TRSM_KERNEL(min_i, bk, bk, dm1,
#ifdef COMPLEX
ZERO,
@@ -157,7 +157,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
aa,
sb,
a + (is + j * lda) * COMPSIZE, lda, 0);
SYRK_KERNEL_L(min_i, min_j, bk, dm1,
aa,
sb2,
@@ -172,7 +172,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
#ifdef COMPLEX
ZERO,
#endif
sa,
sb,
a + (is + j * lda) * COMPSIZE, lda, 0);
@@ -188,17 +188,17 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
is - j - bk);
#endif
}
for(js = j + bk + min_j; js < n; js += REAL_GEMM_R){
min_j = n - js;
if (min_j > REAL_GEMM_R) min_j = REAL_GEMM_R;
GEMM_OTCOPY(bk, min_j, a + (js + j * lda) * COMPSIZE, lda, sb2);
for (is = js; is < n; is += GEMM_P) {
min_i = n - is;
if (min_i > GEMM_P) min_i = GEMM_P;
#ifdef SHARED_ARRAY
if (is + min_i < js + min_j) {
@@ -207,7 +207,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
GEMM_ITCOPY(bk, min_i, a + (is + j * lda) * COMPSIZE, lda, sa);
aa = sa;
}
SYRK_KERNEL_L(min_i, min_j, bk, dm1,
aa,
sb2,
@@ -217,7 +217,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
#else
GEMM_ITCOPY(bk, min_i, a + (is + j * lda) * COMPSIZE, lda, sa);
SYRK_KERNEL_L(min_i, min_j, bk, dm1,
sa,
sb2,
@@ -229,7 +229,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
}
}
}
return 0;

View File

@@ -55,7 +55,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_REAL;
#endif
#endif
#else
#ifdef XDOUBLE
mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -63,11 +63,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
#endif
#endif
if (args -> nthreads == 1) {
info = POTRF_U_SINGLE(args, NULL, NULL, sa, sb, 0);
info = POTRF_U_SINGLE(args, NULL, NULL, sa, sb, 0);
return info;
}
@@ -91,7 +91,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
blocking = (n / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
if (blocking > GEMM_Q) blocking = GEMM_Q;
for (i = 0; i < n; i += blocking) {
bk = n - i;
if (bk > blocking) bk = blocking;
@@ -108,15 +108,15 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.n = n - i - bk;
newarg.a = a + (i + i * lda) * COMPSIZE;
newarg.b = a + (i + (i + bk) * lda) * COMPSIZE;
gemm_thread_n(mode | BLAS_TRANSA_T,
&newarg, NULL, NULL, (void *)TRSM_LCUN, sa, sb, args -> nthreads);
newarg.n = n - i - bk;
newarg.k = bk;
newarg.a = a + ( i + (i + bk) * lda) * COMPSIZE;
newarg.c = a + ((i + bk) + (i + bk) * lda) * COMPSIZE;
#ifndef USE_SIMPLE_THREADED_LEVEL3
HERK_THREAD_UC(&newarg, NULL, NULL, sa, sb, 0);
#else

View File

@@ -91,7 +91,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
#ifdef SHARED_ARRAY
FLOAT *aa;
#endif
FLOAT *sb2 = (FLOAT *)((((BLASLONG)sb
+ GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)
+ GEMM_OFFSET_B);
@@ -109,14 +109,14 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
info = POTF2_U(args, NULL, range_n, sa, sb, 0);
return info;
}
blocking = GEMM_Q;
if (n <= 4 * GEMM_Q) blocking = (n + 3) / 4;
for (j = 0; j < n; j += blocking) {
bk = n - j;
if (bk > blocking) bk = blocking;
if (!range_n) {
range_N[0] = j;
range_N[1] = j + bk;
@@ -124,29 +124,29 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
range_N[0] = range_n[0] + j;
range_N[1] = range_n[0] + j + bk;
}
info = CNAME(args, NULL, range_N, sa, sb, 0);
if (info) return info + j;
if (n - j - bk > 0) {
TRSM_IUNCOPY(bk, bk, a + (j + j * lda) * COMPSIZE, lda, 0, sb);
for(js = j + bk; js < n; js += REAL_GEMM_R) {
min_j = n - js;
if (min_j > REAL_GEMM_R) min_j = REAL_GEMM_R;
for(jjs = js; jjs < js + min_j; jjs += GEMM_UNROLL_N){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
GEMM_ONCOPY(bk, min_jj, a + (j + jjs * lda) * COMPSIZE, lda, sb2 + bk * (jjs - js) * COMPSIZE);
for (is = 0; is < bk; is += GEMM_P) {
min_i = bk - is;
if (min_i > GEMM_P) min_i = GEMM_P;
TRSM_KERNEL (min_i, min_jj, bk, dm1,
TRSM_KERNEL (min_i, min_jj, bk, dm1,
#ifdef COMPLEX
ZERO,
#endif
@@ -158,14 +158,14 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
for (is = j + bk; is < js + min_j; is += min_i) {
min_i = js + min_j - is;
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
} else
} else
if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
#ifdef SHARED_ARRAY
if ((is >= js) && (is + min_i <= js + min_j)) {
aa = sb2 + bk * (is - js) * COMPSIZE;
@@ -176,18 +176,18 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
#else
GEMM_INCOPY(bk, min_i, a + (j + is * lda) * COMPSIZE, lda, sa);
#endif
SYRK_KERNEL_U(min_i, min_j, bk,
dm1,
dm1,
SA, sb2,
a + (is + js * lda) * COMPSIZE, lda,
is - js);
}
}
}
}
return 0;
}

View File

@@ -42,7 +42,7 @@
#ifndef USE_SIMPLE_THREADED_LEVEL3
//The array of job_t may overflow the stack.
//Instead, use malloc to alloc job_t.
//Instead, use malloc to alloc job_t.
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
#define USE_ALLOC_HEAP
#endif
@@ -189,19 +189,19 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
for (i = 1; i < DIVIDE_RATE; i++) {
buffer[i] = buffer[i - 1] + GEMM_Q * div_n * COMPSIZE;
}
#ifndef LOWER
TRSM_IUNCOPY(k, k, (FLOAT *)S, lda, 0, sb);
#else
TRSM_OLTCOPY(k, k, (FLOAT *)S, lda, 0, sb);
#endif
for (xxx = m_from, bufferside = 0; xxx < m_to; xxx += div_n, bufferside ++) {
for(jjs = xxx; jjs < MIN(m_to, xxx + div_n); jjs += min_jj){
min_jj = MIN(m_to, xxx + div_n) - jjs;
#ifndef LOWER
if (min_jj > GEMM_UNROLL_MN) min_jj = GEMM_UNROLL_MN;
#else
@@ -211,7 +211,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#ifndef LOWER
OCOPY_OPERATION (k, min_jj, a, lda, 0, jjs, buffer[bufferside] + k * (jjs - xxx) * COMPSIZE);
TRSM_KERNEL (k, min_jj, k, dm1,
TRSM_KERNEL (k, min_jj, k, dm1,
#ifdef COMPLEX
ZERO,
#endif
@@ -230,7 +230,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
a + jjs * COMPSIZE, lda, 0);
#endif
}
#ifndef LOWER
for (i = 0; i <= mypos; i++)
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
@@ -238,25 +238,25 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
for (i = mypos; i < args -> nthreads; i++)
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
#endif
WMB;
}
min_i = m_to - m_from;
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
} else
} else
if (min_i > GEMM_P) {
min_i = ((min_i + 1) / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
#ifndef LOWER
ICOPY_OPERATION(k, min_i, a, lda, 0, m_from, sa);
#else
OCOPY_OPERATION(k, min_i, a, lda, 0, m_from, sa);
#endif
current = mypos;
#ifndef LOWER
@@ -266,47 +266,47 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#endif
{
div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
/* thread has to wait */
if (current != mypos) while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), k, alpha,
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
c, lda, m_from, xxx);
if (m_from + min_i >= m_to) {
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
WMB;
}
}
#ifndef LOWER
current ++;
#else
current --;
#endif
}
for(is = m_from + min_i; is < m_to; is += min_i){
min_i = m_to - is;
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
} else
} else
if (min_i > GEMM_P) {
min_i = ((min_i + 1) / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
#ifndef LOWER
ICOPY_OPERATION(k, min_i, a, lda, 0, is, sa);
#else
OCOPY_OPERATION(k, min_i, a, lda, 0, is, sa);
#endif
current = mypos;
#ifndef LOWER
while (current < args -> nthreads)
#else
@@ -314,18 +314,18 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#endif
{
div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), k, alpha,
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
c, lda, is, xxx);
if (is + min_i >= m_to) {
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
WMB;
}
}
}
#ifndef LOWER
current ++;
#else
@@ -333,7 +333,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#endif
}
}
for (i = 0; i < args -> nthreads; i++) {
if (i != mypos) {
for (xxx = 0; xxx < DIVIDE_RATE; xxx++) {
@@ -341,7 +341,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
}
}
}
return 0;
}
@@ -378,7 +378,7 @@ static int thread_driver(blas_arg_t *args, FLOAT *sa, FLOAT *sb){
#else
mode = BLAS_SINGLE | BLAS_REAL;
mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1;
#endif
#endif
#else
#ifdef XDOUBLE
mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -389,7 +389,7 @@ static int thread_driver(blas_arg_t *args, FLOAT *sa, FLOAT *sb){
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
mask = MAX(CGEMM_UNROLL_M, CGEMM_UNROLL_N) - 1;
#endif
#endif
#endif
newarg.m = args -> m;
@@ -409,7 +409,7 @@ static int thread_driver(blas_arg_t *args, FLOAT *sa, FLOAT *sb){
#endif
newarg.common = (void *)job;
n_from = 0;
n_to = args -> m;
@@ -424,17 +424,17 @@ static int thread_driver(blas_arg_t *args, FLOAT *sa, FLOAT *sb){
dnum = (double)n * (double)n /(double)nthreads;
while (i < n){
if (nthreads - num_cpu > 1) {
double di = (double)i;
width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask);
if (num_cpu == 0) width = n - ((n - width) & ~mask);
if ((width > n - i) || (width < mask)) width = n - i;
} else {
width = n - i;
}
@@ -449,7 +449,7 @@ static int thread_driver(blas_arg_t *args, FLOAT *sa, FLOAT *sb){
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
num_cpu ++;
i += width;
}
@@ -466,21 +466,21 @@ static int thread_driver(blas_arg_t *args, FLOAT *sa, FLOAT *sb){
dnum = (double)n * (double)n /(double)nthreads;
while (i < n){
if (nthreads - num_cpu > 1) {
double di = (double)i;
width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask);
if ((width > n - i) || (width < mask)) width = n - i;
} else {
width = n - i;
}
range[num_cpu + 1] = range[num_cpu] + width;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = inner_thread;
queue[num_cpu].args = &newarg;
@@ -489,7 +489,7 @@ static int thread_driver(blas_arg_t *args, FLOAT *sa, FLOAT *sb){
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
num_cpu ++;
i += width;
}
@@ -507,14 +507,14 @@ static int thread_driver(blas_arg_t *args, FLOAT *sa, FLOAT *sb){
}
}
}
queue[0].sa = sa;
queue[0].sb = sb;
queue[num_cpu - 1].next = NULL;
exec_blas(num_cpu, queue);
}
#ifdef USE_ALLOC_HEAP
free(job);
#endif
@@ -540,7 +540,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_REAL;
#endif
#endif
#else
#ifdef XDOUBLE
mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -548,14 +548,14 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
#endif
#endif
if (args -> nthreads == 1) {
#ifndef LOWER
info = POTRF_U_SINGLE(args, NULL, NULL, sa, sb, 0);
info = POTRF_U_SINGLE(args, NULL, NULL, sa, sb, 0);
#else
info = POTRF_L_SINGLE(args, NULL, NULL, sa, sb, 0);
info = POTRF_L_SINGLE(args, NULL, NULL, sa, sb, 0);
#endif
return info;
}
@@ -584,7 +584,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
blocking = (n / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
if (blocking > GEMM_Q) blocking = GEMM_Q;
for (i = 0; i < n; i += blocking) {
bk = n - i;
if (bk > blocking) bk = blocking;
@@ -643,7 +643,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.k = bk;
newarg.a = a + (i + bk + i * lda) * COMPSIZE;
newarg.c = a + (i + bk + (i + bk) * lda) * COMPSIZE;
#if 0
HERK_THREAD_LN(&newarg, NULL, NULL, sa, sb, 0);
#else

View File

@@ -56,7 +56,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
@@ -77,7 +77,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
1, sb);
SCAL_K(n - j - 1, 0, 0,
-ajj,
-ajj,
a + (j + 1) + j * lda, 1,
NULL, 0, NULL, 0);
}

View File

@@ -56,7 +56,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
@@ -72,12 +72,12 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
#endif
TRMV (j,
a , lda,
a , lda,
a + j * lda, 1,
sb);
SCAL_K(j, 0, 0,
-ajj,
SCAL_K(j, 0, 0,
-ajj,
a + j * lda, 1,
NULL, 0, NULL, 0);

View File

@@ -59,7 +59,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
@@ -92,9 +92,9 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
ZTRMV (n - j - 1,
a + ((j + 1) + (j + 1) * lda) * COMPSIZE, lda,
a + ((j + 1) + j * lda) * COMPSIZE, 1,
a + ((j + 1) + j * lda) * COMPSIZE, 1,
sb);
SCAL_K(n - j - 1, 0, 0,
-ajj_r, -ajj_i,
a + ((j + 1) + j * lda) * COMPSIZE, 1,

View File

@@ -59,7 +59,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
n = args -> n;
a = (FLOAT *)args -> a;
lda = args -> lda;
if (range_n) {
n = range_n[1] - range_n[0];
a += range_n[0] * (lda + 1) * COMPSIZE;
@@ -92,15 +92,15 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
#endif
ZTRMV (j,
a , lda,
a , lda,
a + j * lda * COMPSIZE, 1,
sb);
SCAL_K(j, 0, 0,
SCAL_K(j, 0, 0,
-ajj_r, -ajj_i,
a + j * lda * COMPSIZE, 1,
NULL, 0, NULL, 0);
}
return 0;

View File

@@ -67,7 +67,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_REAL;
#endif
#endif
#else
#ifdef XDOUBLE
mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -75,7 +75,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
#endif
#endif
n = args -> n;
@@ -99,7 +99,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
for (i = start_i; i >= 0; i -= blocking) {
bk = n - i;
if (bk > blocking) bk = blocking;
range_N[0] = i;
range_N[1] = i + bk;
@@ -124,7 +124,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.a = a + (i + i * lda) * COMPSIZE;
CNAME (&newarg, NULL, NULL, sa, sb, 0);
newarg.m = n - bk - i;
newarg.n = i;
newarg.k = bk;

View File

@@ -67,7 +67,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_REAL;
#endif
#endif
#else
#ifdef XDOUBLE
mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -75,7 +75,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
#endif
#endif
n = args -> n;
@@ -120,7 +120,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.a = a + (i + i * lda) * COMPSIZE;
CNAME (&newarg, NULL, NULL, sa, sb, 0);
newarg.m = i;
newarg.n = n - i - bk;
newarg.k = bk;
@@ -142,6 +142,6 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
gemm_thread_n(mode, &newarg, NULL, NULL, TRMM, sa, sb, args -> nthreads);
}
return 0;
}