s390x: move common vector definitions and utils into header
... to facilitate reuse beyond gemm_vec.c and avoid code duplication. Signed-off-by: Marius Hillenbrand <mhillen@linux.ibm.com>
This commit is contained in:
parent
992d7ca63d
commit
f91057cbad
|
@ -30,12 +30,13 @@
|
||||||
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include <vecintrin.h>
|
#include "vector-common.h"
|
||||||
|
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
|
||||||
#ifdef COMPLEX
|
#ifdef COMPLEX
|
||||||
#error "Handling for complex numbers is not supported in this kernel"
|
#error "Handling for complex numbers is not supported in this kernel"
|
||||||
#endif
|
#endif
|
||||||
|
@ -153,37 +154,6 @@ static const bool backwards = false;
|
||||||
* 3, May 2008.
|
* 3, May 2008.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define VLEN_BYTES 16
|
|
||||||
#define VLEN_FLOATS (VLEN_BYTES / sizeof(FLOAT))
|
|
||||||
|
|
||||||
typedef FLOAT vector_float __attribute__ ((vector_size (16)));
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Load a vector into register, and hint on 8-byte alignment to improve
|
|
||||||
* performance. gcc-9 and newer will create these hints by itself. For older
|
|
||||||
* compiler versions, use inline assembly to explicitly express the hint.
|
|
||||||
* Provide explicit hex encoding to cater for binutils versions that do not know
|
|
||||||
* about vector-load with alignment hints yet.
|
|
||||||
*
|
|
||||||
* Note that, for block sizes where we apply vectorization, vectors in A will
|
|
||||||
* always be 8-byte aligned.
|
|
||||||
*/
|
|
||||||
static inline vector_float vec_load_hinted(FLOAT const *restrict a) {
|
|
||||||
vector_float const *restrict addr = (vector_float const *restrict)a;
|
|
||||||
vector_float y;
|
|
||||||
|
|
||||||
#if __GNUC__ < 9 && !defined(__clang__)
|
|
||||||
// hex-encode vl %[out],%[addr],3
|
|
||||||
asm(".insn vrx,0xe70000003006,%[out],%[addr],3"
|
|
||||||
: [ out ] "=v"(y)
|
|
||||||
: [ addr ] "R"(*addr));
|
|
||||||
#else
|
|
||||||
y = *addr;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return y;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate for a row-block in C_i of size ROWSxCOLS using vector intrinsics.
|
* Calculate for a row-block in C_i of size ROWSxCOLS using vector intrinsics.
|
||||||
*
|
*
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) IBM Corporation 2020.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
* 3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
* its contributors may be used to endorse or promote products
|
||||||
|
* derived from this software without specific prior written
|
||||||
|
* permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <vecintrin.h>
|
||||||
|
|
||||||
|
#define VLEN_BYTES 16
|
||||||
|
#define VLEN_FLOATS (VLEN_BYTES / sizeof(FLOAT))
|
||||||
|
|
||||||
|
typedef FLOAT vector_float __attribute__ ((vector_size (VLEN_BYTES)));
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load a vector into register, and hint on 8-byte alignment to improve
|
||||||
|
* performance. gcc-9 and newer will create these hints by itself. For older
|
||||||
|
* compiler versions, use inline assembly to explicitly express the hint.
|
||||||
|
* Provide explicit hex encoding to cater for binutils versions that do not know
|
||||||
|
* about vector-load with alignment hints yet.
|
||||||
|
*
|
||||||
|
* Note that, for block sizes where we apply vectorization, vectors in A will
|
||||||
|
* always be 8-byte aligned.
|
||||||
|
*/
|
||||||
|
static inline vector_float vec_load_hinted(FLOAT const *restrict a) {
|
||||||
|
vector_float const *restrict addr = (vector_float const *restrict)a;
|
||||||
|
vector_float y;
|
||||||
|
|
||||||
|
#if __GNUC__ < 9 && !defined(__clang__)
|
||||||
|
// hex-encode vl %[out],%[addr],3
|
||||||
|
asm(".insn vrx,0xe70000003006,%[out],%[addr],3"
|
||||||
|
: [ out ] "=v"(y)
|
||||||
|
: [ addr ] "R"(*addr));
|
||||||
|
#else
|
||||||
|
y = *addr;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return y;
|
||||||
|
}
|
Loading…
Reference in New Issue