Merge pull request #1440 from quickwritereader/develop
small corrections
This commit is contained in:
commit
f45776ec1f
|
@ -44,7 +44,7 @@ static void dcopy_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y) {
|
||||||
"brctg %[n_tmp],1b"
|
"brctg %[n_tmp],1b"
|
||||||
: [mem_y] "=m" (*(double (*)[n])y), [n_tmp] "+&r"(n)
|
: [mem_y] "=m" (*(double (*)[n])y), [n_tmp] "+&r"(n)
|
||||||
: [mem_x] "m" (*(const double (*)[n])x),
|
: [mem_x] "m" (*(const double (*)[n])x),
|
||||||
[ptr_x] "a"(x), [ptr_y] "a"(y)
|
[ptr_x] "+&a"(x), [ptr_y] "+&a"(y)
|
||||||
: "cc"
|
: "cc"
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -139,7 +139,7 @@ static void dswap_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static void __attribute__ ((noinline)) dswap_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y)
|
static void dswap_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y)
|
||||||
{
|
{
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
"pfd 2, 0(%[ptr_x]) \n\t"
|
"pfd 2, 0(%[ptr_x]) \n\t"
|
||||||
|
@ -227,7 +227,7 @@ static void __attribute__ ((noinline)) dswap_kernel_32(BLASLONG n, FLOAT *x, F
|
||||||
[mem_y] "+m" (*(double (*)[n])y),
|
[mem_y] "+m" (*(double (*)[n])y),
|
||||||
[n_tmp] "+&r"(n)
|
[n_tmp] "+&r"(n)
|
||||||
: [ptr_x] "a"(x), [ptr_y] "a"(y)
|
: [ptr_x] "a"(x), [ptr_y] "a"(y)
|
||||||
: "cc", "memory","r1", "v0","v1","v2","v3","v4","v5","v6","v7","v16",
|
: "cc", "r1", "v0","v1","v2","v3","v4","v5","v6","v7","v16",
|
||||||
"v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
|
"v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -96,7 +96,7 @@ static void zscal_kernel_8(BLASLONG n, FLOAT da_r,FLOAT da_i, FLOAT *x) {
|
||||||
"clgrjl %[x_ptr],%%r0,1b \n\t"
|
"clgrjl %[x_ptr],%%r0,1b \n\t"
|
||||||
: [mem] "+m" (*(double (*)[2*n])x) ,[x_ptr] "+&a"(x)
|
: [mem] "+m" (*(double (*)[2*n])x) ,[x_ptr] "+&a"(x)
|
||||||
: [n] "r"(n), [alpha_r] "f"(da_r),[alpha_i] "f"(da_i)
|
: [n] "r"(n), [alpha_r] "f"(da_r),[alpha_i] "f"(da_i)
|
||||||
: "cc", "memory","r0","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25"
|
: "cc", "r0","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
@ -106,10 +106,9 @@ static void zscal_kernel_8_zero_r(BLASLONG n, FLOAT da_i, FLOAT *x) {
|
||||||
|
|
||||||
__asm__ ( "pfd 2, 0(%1) \n\t"
|
__asm__ ( "pfd 2, 0(%1) \n\t"
|
||||||
"lgdr %%r0,%[alpha] \n\t"
|
"lgdr %%r0,%[alpha] \n\t"
|
||||||
"vlvgg %%v16,%%r0,0 \n\t"
|
"vlvgp %%v16,%%r0,%%r0 \n\t" //load both from disjoint
|
||||||
"lcdbr %[alpha],%[alpha] \n\t"
|
"vflcdb %%v16,%%v16 \n\t" //complement both
|
||||||
"lgdr %%r0,%[alpha] \n\t"
|
"vlvgg %%v16,%%r0,0 \n\t" //restore 1st
|
||||||
"vlvgg %%v16,%%r0,1 \n\t"
|
|
||||||
"vlr %%v17 ,%%v16 \n\t"
|
"vlr %%v17 ,%%v16 \n\t"
|
||||||
"sllg %%r0,%[n],4 \n\t"
|
"sllg %%r0,%[n],4 \n\t"
|
||||||
"agr %%r0,%[x_ptr] \n\t"
|
"agr %%r0,%[x_ptr] \n\t"
|
||||||
|
@ -129,8 +128,8 @@ static void zscal_kernel_8_zero_r(BLASLONG n, FLOAT da_i, FLOAT *x) {
|
||||||
"vsteg %%v26, 40(%[x_ptr]),0 \n\t"
|
"vsteg %%v26, 40(%[x_ptr]),0 \n\t"
|
||||||
"vl %%v27, 48(%[x_ptr]) \n\t"
|
"vl %%v27, 48(%[x_ptr]) \n\t"
|
||||||
"vfmdb %%v27,%%v27,%%v17 \n\t"
|
"vfmdb %%v27,%%v27,%%v17 \n\t"
|
||||||
"vsteg %%v27, 40(%[x_ptr]),1 \n\t"
|
"vsteg %%v27, 48(%[x_ptr]),1 \n\t"
|
||||||
"vsteg %%v27, 48(%[x_ptr]),0 \n\t"
|
"vsteg %%v27, 56(%[x_ptr]),0 \n\t"
|
||||||
"vl %%v28, 64(%[x_ptr]) \n\t"
|
"vl %%v28, 64(%[x_ptr]) \n\t"
|
||||||
"vfmdb %%v28,%%v28,%%v16 \n\t"
|
"vfmdb %%v28,%%v28,%%v16 \n\t"
|
||||||
"vsteg %%v28, 64(%[x_ptr]),1 \n\t"
|
"vsteg %%v28, 64(%[x_ptr]),1 \n\t"
|
||||||
|
@ -141,8 +140,8 @@ static void zscal_kernel_8_zero_r(BLASLONG n, FLOAT da_i, FLOAT *x) {
|
||||||
"vsteg %%v29, 88(%[x_ptr]),0 \n\t"
|
"vsteg %%v29, 88(%[x_ptr]),0 \n\t"
|
||||||
"vl %%v30, 96(%[x_ptr]) \n\t"
|
"vl %%v30, 96(%[x_ptr]) \n\t"
|
||||||
"vfmdb %%v30,%%v30,%%v16 \n\t"
|
"vfmdb %%v30,%%v30,%%v16 \n\t"
|
||||||
"vsteg %%v27, 96(%[x_ptr]),1 \n\t"
|
"vsteg %%v30, 96(%[x_ptr]),1 \n\t"
|
||||||
"vsteg %%v27, 104(%[x_ptr]),0 \n\t"
|
"vsteg %%v30, 104(%[x_ptr]),0 \n\t"
|
||||||
"vl %%v31, 112(%[x_ptr]) \n\t"
|
"vl %%v31, 112(%[x_ptr]) \n\t"
|
||||||
"vfmdb %%v31,%%v31,%%v17 \n\t"
|
"vfmdb %%v31,%%v31,%%v17 \n\t"
|
||||||
"vsteg %%v31, 112(%[x_ptr]),1 \n\t"
|
"vsteg %%v31, 112(%[x_ptr]),1 \n\t"
|
||||||
|
|
|
@ -137,7 +137,7 @@ static void zswap_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static void __attribute__ ((noinline)) zswap_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y)
|
static void zswap_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y)
|
||||||
{
|
{
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
"pfd 2, 0(%[ptr_x]) \n\t"
|
"pfd 2, 0(%[ptr_x]) \n\t"
|
||||||
|
@ -225,7 +225,7 @@ static void __attribute__ ((noinline)) zswap_kernel_16(BLASLONG n, FLOAT *x, F
|
||||||
[mem_y] "+m" (*(double (*)[2*n])y),
|
[mem_y] "+m" (*(double (*)[2*n])y),
|
||||||
[n_tmp] "+&r"(n)
|
[n_tmp] "+&r"(n)
|
||||||
: [ptr_x] "a"(x), [ptr_y] "a"(y)
|
: [ptr_x] "a"(x), [ptr_y] "a"(y)
|
||||||
: "cc", "memory", "r1", "v0","v1","v2","v3","v4","v5","v6","v7","v16",
|
: "cc", "r1", "v0","v1","v2","v3","v4","v5","v6","v7","v16",
|
||||||
"v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
|
"v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
|
|
Loading…
Reference in New Issue