aligned
This commit is contained in:
parent
a97b301aaa
commit
be09551cdf
|
@ -24,12 +24,11 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#ifndef HAVE_ASM_KERNEL
|
||||
#include <altivec.h>
|
||||
static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
||||
|
||||
static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i)
|
||||
{
|
||||
|
||||
|
@ -43,7 +42,7 @@ static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT
|
|||
register __vector float valpha_i = {alpha_i, alpha_i,alpha_i, alpha_i};
|
||||
#endif
|
||||
|
||||
__vector unsigned char swap_mask = { 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
||||
__vector unsigned char swap_mask = *((__vector unsigned char*)swap_mask_arr);
|
||||
register __vector float *vy = (__vector float *) y;
|
||||
register __vector float *vx = (__vector float *) x;
|
||||
BLASLONG i=0;
|
||||
|
|
|
@ -25,12 +25,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#ifndef HAVE_KERNEL_8
|
||||
#include <altivec.h>
|
||||
static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
||||
static void cdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, float *dot)
|
||||
{
|
||||
__vector unsigned char swap_mask = { 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
||||
__vector unsigned char swap_mask = *((__vector unsigned char*)swap_mask_arr);
|
||||
register __vector float *vy = (__vector float *) y;
|
||||
register __vector float *vx = (__vector float *) x;
|
||||
BLASLONG i = 0;
|
||||
|
@ -96,7 +96,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
|
|||
BLASLONG i = 0;
|
||||
BLASLONG ix=0, iy=0;
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
FLOAT dot[4] __attribute__ ((aligned(16))) = {0.0, 0.0, 0.0, 0.0};
|
||||
FLOAT dot[4] __attribute__((aligned(16))) = {0.0, 0.0, 0.0, 0.0};
|
||||
|
||||
if (n <= 0) {
|
||||
CREAL(result) = 0.0;
|
||||
|
|
|
@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define NBMAX 1024
|
||||
|
||||
|
||||
static const unsigned char swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
||||
static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
||||
|
||||
|
||||
static void cgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y) {
|
||||
|
@ -247,8 +247,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
|
|||
BLASLONG m2;
|
||||
BLASLONG m3;
|
||||
BLASLONG n2;
|
||||
|
||||
FLOAT xbuffer[8], *ybuffer;
|
||||
FLOAT xbuffer[8] __attribute__((aligned(16)));
|
||||
FLOAT *ybuffer;
|
||||
|
||||
if (m < 1) return (0);
|
||||
if (n < 1) return (0);
|
||||
|
|
|
@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define NBMAX 1024
|
||||
#include <altivec.h>
|
||||
static const unsigned char swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
||||
static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
||||
|
||||
static void cgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) {
|
||||
BLASLONG i;
|
||||
|
@ -260,8 +260,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
|
|||
BLASLONG m2;
|
||||
BLASLONG m3;
|
||||
BLASLONG n2;
|
||||
|
||||
FLOAT ybuffer[8], *xbuffer;
|
||||
FLOAT ybuffer[8] __attribute__((aligned(16)));
|
||||
FLOAT *xbuffer;
|
||||
|
||||
if (m < 1) return (0);
|
||||
if (n < 1) return (0);
|
||||
|
|
|
@ -145,7 +145,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
|||
BLASLONG m3;
|
||||
BLASLONG n2;
|
||||
BLASLONG lda4 = lda << 2;
|
||||
FLOAT xbuffer[8] __attribute__ ((aligned (16)));;
|
||||
FLOAT xbuffer[8] __attribute__ ((aligned (16)));
|
||||
FLOAT *ybuffer;
|
||||
|
||||
if ( m < 1 ) return(0);
|
||||
|
|
|
@ -581,9 +581,9 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
|||
BLASLONG m1;
|
||||
BLASLONG m2;
|
||||
BLASLONG m3;
|
||||
BLASLONG n2;
|
||||
|
||||
FLOAT ybuffer[8], *xbuffer;
|
||||
BLASLONG n2;
|
||||
FLOAT ybuffer[8] __attribute__((aligned(16)));
|
||||
FLOAT *xbuffer;
|
||||
|
||||
if (m < 1) return (0);
|
||||
if (n < 1) return (0);
|
||||
|
|
|
@ -174,7 +174,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
|||
BLASLONG n2;
|
||||
BLASLONG lda4 = lda << 2;
|
||||
BLASLONG lda8 = lda << 3;
|
||||
FLOAT xbuffer[8],*ybuffer;
|
||||
FLOAT xbuffer[8] __attribute__((aligned(16)));
|
||||
FLOAT *ybuffer;
|
||||
|
||||
if ( m < 1 ) return(0);
|
||||
if ( n < 1 ) return(0);
|
||||
|
|
|
@ -213,7 +213,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
|||
BLASLONG n2;
|
||||
BLASLONG lda4 = lda << 2;
|
||||
BLASLONG lda8 = lda << 3;
|
||||
FLOAT xbuffer[8],*ybuffer;
|
||||
FLOAT xbuffer[8] __attribute__((aligned(16)));
|
||||
FLOAT *ybuffer;
|
||||
|
||||
if ( m < 1 ) return(0);
|
||||
if ( n < 1 ) return(0);
|
||||
|
|
|
@ -177,10 +177,9 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
|||
BLASLONG m1;
|
||||
BLASLONG m2;
|
||||
BLASLONG m3;
|
||||
BLASLONG n2;
|
||||
|
||||
FLOAT ybuffer[8], *xbuffer;
|
||||
|
||||
BLASLONG n2;
|
||||
FLOAT ybuffer[8] __attribute__((aligned(16)));
|
||||
FLOAT *xbuffer;
|
||||
if (m < 1) return (0);
|
||||
if (n < 1) return (0);
|
||||
|
||||
|
|
|
@ -204,8 +204,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
|||
BLASLONG m3;
|
||||
BLASLONG n2;
|
||||
|
||||
FLOAT ybuffer[8], *xbuffer;
|
||||
|
||||
FLOAT ybuffer[8] __attribute__((aligned(16)));
|
||||
FLOAT *xbuffer;
|
||||
if (m < 1) return (0);
|
||||
if (n < 1) return (0);
|
||||
|
||||
|
|
|
@ -614,8 +614,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
|
|||
BLASLONG m2;
|
||||
BLASLONG m3;
|
||||
BLASLONG n2;
|
||||
|
||||
FLOAT xbuffer[8], *ybuffer;
|
||||
FLOAT xbuffer[8] __attribute__((aligned(16)));
|
||||
FLOAT *ybuffer;
|
||||
|
||||
if (m < 1) return (0);
|
||||
if (n < 1) return (0);
|
||||
|
|
|
@ -532,8 +532,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
|
|||
BLASLONG m2;
|
||||
BLASLONG m3;
|
||||
BLASLONG n2;
|
||||
|
||||
FLOAT ybuffer[8], *xbuffer;
|
||||
FLOAT ybuffer[8] __attribute__((aligned(16)));
|
||||
FLOAT *xbuffer;
|
||||
|
||||
if (m < 1) return (0);
|
||||
if (n < 1) return (0);
|
||||
|
|
Loading…
Reference in New Issue