aligned
This commit is contained in:
parent
a97b301aaa
commit
be09551cdf
|
@ -24,12 +24,11 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
#ifndef HAVE_ASM_KERNEL
|
#ifndef HAVE_ASM_KERNEL
|
||||||
#include <altivec.h>
|
#include <altivec.h>
|
||||||
|
static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
||||||
|
|
||||||
static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i)
|
static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -43,7 +42,7 @@ static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT
|
||||||
register __vector float valpha_i = {alpha_i, alpha_i,alpha_i, alpha_i};
|
register __vector float valpha_i = {alpha_i, alpha_i,alpha_i, alpha_i};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__vector unsigned char swap_mask = { 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
__vector unsigned char swap_mask = *((__vector unsigned char*)swap_mask_arr);
|
||||||
register __vector float *vy = (__vector float *) y;
|
register __vector float *vy = (__vector float *) y;
|
||||||
register __vector float *vx = (__vector float *) x;
|
register __vector float *vx = (__vector float *) x;
|
||||||
BLASLONG i=0;
|
BLASLONG i=0;
|
||||||
|
|
|
@ -25,12 +25,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#ifndef HAVE_KERNEL_8
|
#ifndef HAVE_KERNEL_8
|
||||||
#include <altivec.h>
|
#include <altivec.h>
|
||||||
|
static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
||||||
static void cdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, float *dot)
|
static void cdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, float *dot)
|
||||||
{
|
{
|
||||||
__vector unsigned char swap_mask = { 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
__vector unsigned char swap_mask = *((__vector unsigned char*)swap_mask_arr);
|
||||||
register __vector float *vy = (__vector float *) y;
|
register __vector float *vy = (__vector float *) y;
|
||||||
register __vector float *vx = (__vector float *) x;
|
register __vector float *vx = (__vector float *) x;
|
||||||
BLASLONG i = 0;
|
BLASLONG i = 0;
|
||||||
|
@ -96,7 +96,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
|
||||||
BLASLONG i = 0;
|
BLASLONG i = 0;
|
||||||
BLASLONG ix=0, iy=0;
|
BLASLONG ix=0, iy=0;
|
||||||
OPENBLAS_COMPLEX_FLOAT result;
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
FLOAT dot[4] __attribute__ ((aligned(16))) = {0.0, 0.0, 0.0, 0.0};
|
FLOAT dot[4] __attribute__((aligned(16))) = {0.0, 0.0, 0.0, 0.0};
|
||||||
|
|
||||||
if (n <= 0) {
|
if (n <= 0) {
|
||||||
CREAL(result) = 0.0;
|
CREAL(result) = 0.0;
|
||||||
|
|
|
@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define NBMAX 1024
|
#define NBMAX 1024
|
||||||
|
|
||||||
|
|
||||||
static const unsigned char swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
||||||
|
|
||||||
|
|
||||||
static void cgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y) {
|
static void cgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y) {
|
||||||
|
@ -247,8 +247,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
BLASLONG m2;
|
BLASLONG m2;
|
||||||
BLASLONG m3;
|
BLASLONG m3;
|
||||||
BLASLONG n2;
|
BLASLONG n2;
|
||||||
|
FLOAT xbuffer[8] __attribute__((aligned(16)));
|
||||||
FLOAT xbuffer[8], *ybuffer;
|
FLOAT *ybuffer;
|
||||||
|
|
||||||
if (m < 1) return (0);
|
if (m < 1) return (0);
|
||||||
if (n < 1) return (0);
|
if (n < 1) return (0);
|
||||||
|
|
|
@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define NBMAX 1024
|
#define NBMAX 1024
|
||||||
#include <altivec.h>
|
#include <altivec.h>
|
||||||
static const unsigned char swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11};
|
||||||
|
|
||||||
static void cgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) {
|
static void cgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) {
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
|
@ -260,8 +260,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
BLASLONG m2;
|
BLASLONG m2;
|
||||||
BLASLONG m3;
|
BLASLONG m3;
|
||||||
BLASLONG n2;
|
BLASLONG n2;
|
||||||
|
FLOAT ybuffer[8] __attribute__((aligned(16)));
|
||||||
FLOAT ybuffer[8], *xbuffer;
|
FLOAT *xbuffer;
|
||||||
|
|
||||||
if (m < 1) return (0);
|
if (m < 1) return (0);
|
||||||
if (n < 1) return (0);
|
if (n < 1) return (0);
|
||||||
|
|
|
@ -145,7 +145,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
||||||
BLASLONG m3;
|
BLASLONG m3;
|
||||||
BLASLONG n2;
|
BLASLONG n2;
|
||||||
BLASLONG lda4 = lda << 2;
|
BLASLONG lda4 = lda << 2;
|
||||||
FLOAT xbuffer[8] __attribute__ ((aligned (16)));;
|
FLOAT xbuffer[8] __attribute__ ((aligned (16)));
|
||||||
FLOAT *ybuffer;
|
FLOAT *ybuffer;
|
||||||
|
|
||||||
if ( m < 1 ) return(0);
|
if ( m < 1 ) return(0);
|
||||||
|
|
|
@ -582,8 +582,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
||||||
BLASLONG m2;
|
BLASLONG m2;
|
||||||
BLASLONG m3;
|
BLASLONG m3;
|
||||||
BLASLONG n2;
|
BLASLONG n2;
|
||||||
|
FLOAT ybuffer[8] __attribute__((aligned(16)));
|
||||||
FLOAT ybuffer[8], *xbuffer;
|
FLOAT *xbuffer;
|
||||||
|
|
||||||
if (m < 1) return (0);
|
if (m < 1) return (0);
|
||||||
if (n < 1) return (0);
|
if (n < 1) return (0);
|
||||||
|
|
|
@ -174,7 +174,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
||||||
BLASLONG n2;
|
BLASLONG n2;
|
||||||
BLASLONG lda4 = lda << 2;
|
BLASLONG lda4 = lda << 2;
|
||||||
BLASLONG lda8 = lda << 3;
|
BLASLONG lda8 = lda << 3;
|
||||||
FLOAT xbuffer[8],*ybuffer;
|
FLOAT xbuffer[8] __attribute__((aligned(16)));
|
||||||
|
FLOAT *ybuffer;
|
||||||
|
|
||||||
if ( m < 1 ) return(0);
|
if ( m < 1 ) return(0);
|
||||||
if ( n < 1 ) return(0);
|
if ( n < 1 ) return(0);
|
||||||
|
|
|
@ -213,7 +213,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
||||||
BLASLONG n2;
|
BLASLONG n2;
|
||||||
BLASLONG lda4 = lda << 2;
|
BLASLONG lda4 = lda << 2;
|
||||||
BLASLONG lda8 = lda << 3;
|
BLASLONG lda8 = lda << 3;
|
||||||
FLOAT xbuffer[8],*ybuffer;
|
FLOAT xbuffer[8] __attribute__((aligned(16)));
|
||||||
|
FLOAT *ybuffer;
|
||||||
|
|
||||||
if ( m < 1 ) return(0);
|
if ( m < 1 ) return(0);
|
||||||
if ( n < 1 ) return(0);
|
if ( n < 1 ) return(0);
|
||||||
|
|
|
@ -178,9 +178,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
||||||
BLASLONG m2;
|
BLASLONG m2;
|
||||||
BLASLONG m3;
|
BLASLONG m3;
|
||||||
BLASLONG n2;
|
BLASLONG n2;
|
||||||
|
FLOAT ybuffer[8] __attribute__((aligned(16)));
|
||||||
FLOAT ybuffer[8], *xbuffer;
|
FLOAT *xbuffer;
|
||||||
|
|
||||||
if (m < 1) return (0);
|
if (m < 1) return (0);
|
||||||
if (n < 1) return (0);
|
if (n < 1) return (0);
|
||||||
|
|
||||||
|
|
|
@ -204,8 +204,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
||||||
BLASLONG m3;
|
BLASLONG m3;
|
||||||
BLASLONG n2;
|
BLASLONG n2;
|
||||||
|
|
||||||
FLOAT ybuffer[8], *xbuffer;
|
FLOAT ybuffer[8] __attribute__((aligned(16)));
|
||||||
|
FLOAT *xbuffer;
|
||||||
if (m < 1) return (0);
|
if (m < 1) return (0);
|
||||||
if (n < 1) return (0);
|
if (n < 1) return (0);
|
||||||
|
|
||||||
|
|
|
@ -614,8 +614,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
BLASLONG m2;
|
BLASLONG m2;
|
||||||
BLASLONG m3;
|
BLASLONG m3;
|
||||||
BLASLONG n2;
|
BLASLONG n2;
|
||||||
|
FLOAT xbuffer[8] __attribute__((aligned(16)));
|
||||||
FLOAT xbuffer[8], *ybuffer;
|
FLOAT *ybuffer;
|
||||||
|
|
||||||
if (m < 1) return (0);
|
if (m < 1) return (0);
|
||||||
if (n < 1) return (0);
|
if (n < 1) return (0);
|
||||||
|
|
|
@ -532,8 +532,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
BLASLONG m2;
|
BLASLONG m2;
|
||||||
BLASLONG m3;
|
BLASLONG m3;
|
||||||
BLASLONG n2;
|
BLASLONG n2;
|
||||||
|
FLOAT ybuffer[8] __attribute__((aligned(16)));
|
||||||
FLOAT ybuffer[8], *xbuffer;
|
FLOAT *xbuffer;
|
||||||
|
|
||||||
if (m < 1) return (0);
|
if (m < 1) return (0);
|
||||||
if (n < 1) return (0);
|
if (n < 1) return (0);
|
||||||
|
|
Loading…
Reference in New Issue