diff --git a/interface/scal.c b/interface/scal.c index 0a7fee640..c6638a62d 100644 --- a/interface/scal.c +++ b/interface/scal.c @@ -85,7 +85,7 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){ if (nthreads == 1) { #endif - SCAL_K(n, 0, 0, alpha, x, incx, NULL, 0, NULL, 0); + SCAL_K(n, 0, 0, alpha, x, incx, NULL, 0, NULL, 1); #ifdef SMP } else { @@ -102,7 +102,7 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){ #else &alpha, #endif - x, incx, NULL, 0, NULL, 0, (int (*)(void))SCAL_K, nthreads); + x, incx, NULL, 0, NULL, 1, (int (*)(void))SCAL_K, nthreads); } #endif diff --git a/kernel/x86_64/dscal.c b/kernel/x86_64/dscal.c index e7182c5ce..641f86f90 100644 --- a/kernel/x86_64/dscal.c +++ b/kernel/x86_64/dscal.c @@ -43,21 +43,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. static void dscal_kernel_8( BLASLONG n, FLOAT *da , FLOAT *x ) { - BLASLONG i; - FLOAT alpha = *da; + BLASLONG i; + FLOAT alpha = *da; - for( i=0; i 0 ) - { - dscal_kernel_inc_8(n1, &da, x, inc_x); - i = n1 * inc_x; - j = n1; - } - - while(j < n) - { - - x[i] *= da; - i += inc_x ; - j++; - - } - - } - - return(0); - } - - BLASLONG n1 = n & -8; - if ( n1 > 0 ) - { -// if ( da == 0.0 ) -// dscal_kernel_8_zero(n1 , &da , x); -// else - dscal_kernel_8(n1 , &da , x); - } - - if ( da == 0.0 ) - { - for ( i=n1 ; i 0 ) + { + dscal_kernel_inc_8(n1, &da, x, inc_x); + i = n1 * inc_x; + j = n1; + } + while(j < n) + { + x[i] *= da; + i += inc_x ; + j++; + } + } + else + { + BLASLONG n1 = n & -8; + if ( n1 > 0) + dscal_kernel_8(n1 , &da , x); + for ( i = n1 ; i < n; i++ ) + x[i] *= da; + } + } + else + { + if ( inc_x != 1 ) + { + if( da == 0.0) + { + BLASLONG n1 = n & -2; + while(j < n1) + { + x[i] = 0.0; + x[i+inc_x] = 0.0; + i += 2 * inc_x ; + j += 2; + } + while(j < n) + { + x[i] = 0.0; + i += inc_x ; + j++; + } + } + else + { + BLASLONG n1 = n & -8; + if ( n1 > 0 ) + { + dscal_kernel_inc_8(n1, &da, x, inc_x); + i = n1 * inc_x; + j = n1; + } + while(j < n) + { + x[i] *= da; + i += inc_x ; + j++; + } + } + } + else + { + if ( da == 0.0 ) + { + BLASLONG n1 = n & -8; + if ( n1 > 0) + dscal_kernel_8_zero(n1, &da, x); + for ( i = n1 ; i < n; i++ ) + x[i] = 0.0; + } + else + { + BLASLONG n1 = n & -8; + if ( n1 > 0) + dscal_kernel_8(n1 , &da , x); + for ( i = n1 ; i < n; i++ ) + x[i] *= da; + } + } + } } - - diff --git a/kernel/x86_64/sscal.c b/kernel/x86_64/sscal.c index a85d20564..6e54f8893 100644 --- a/kernel/x86_64/sscal.c +++ b/kernel/x86_64/sscal.c @@ -39,21 +39,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. static void sscal_kernel_16( BLASLONG n, FLOAT *da , FLOAT *x ) { - BLASLONG i; - FLOAT alpha = *da; + BLASLONG i; + FLOAT alpha = *da; - for( i=0; i 0 ) - { - sscal_kernel_inc_8(n1, &da, x, inc_x); - i = n1 * inc_x; - j = n1; - } -#endif - while(j < n) - { - x[i] *= da; - i += inc_x ; - j++; - - } - - } - return(0); - } - - BLASLONG n1 = n & -16; - if ( n1 > 0 ) - { - //if ( da == 0.0 ) - // sscal_kernel_16_zero(n1 , &da , x); - //else - sscal_kernel_16(n1 , &da , x); - } - - if ( da == 0.0 ) - { - for ( i=n1 ; i 0 ) + { + sscal_kernel_inc_8(n1, &da, x, inc_x); + i = n1 * inc_x; + j = n1; + } + while(j < n) + { + x[i] *= da; + i += inc_x ; + j++; + } + } + else + { + BLASLONG n1 = n & -16; + if ( n1 > 0) + sscal_kernel_16(n1 , &da , x); + for ( i = n1 ; i < n; i++ ) + x[i] *= da; + } + } + else + { + if ( inc_x != 1 ) + { + if( da == 0.0) + { + BLASLONG n1 = n & -2; + while(j < n1) + { + x[i] = 0.0; + x[i+inc_x] = 0.0; + i += 2 * inc_x ; + j += 2; + } + while(j < n) + { + x[i] = 0.0; + i += inc_x ; + j++; + } + } + else + { + BLASLONG n1 = n & -8; + if ( n1 > 0 ) + { + sscal_kernel_inc_8(n1, &da, x, inc_x); + i = n1 * inc_x; + j = n1; + } + while(j < n) + { + x[i] *= da; + i += inc_x ; + j++; + } + } + } + else + { + if ( da == 0.0 ) + { + BLASLONG n1 = n & -16; + if ( n1 > 0) + sscal_kernel_16_zero(n1, &da, x); + for ( i = n1 ; i < n; i++ ) + x[i] = 0.0; + } + else + { + BLASLONG n1 = n & -16; + if ( n1 > 0) + sscal_kernel_16(n1 , &da , x); + for ( i = n1 ; i < n; i++ ) + x[i] *= da; + } + } + } } - -