s390x: move common vector definitions and utils into header

... to facilitate reuse beyond gemm_vec.c and avoid code duplication. Signed-off-by: Marius Hillenbrand <mhillen@linux.ibm.com>
2020-09-15 10:54:37 +02:00 · 2020-09-15 10:54:37 +02:00 · f91057cbad
parent 992d7ca63d
commit f91057cbad
2 changed files with 66 additions and 32 deletions
--- a/kernel/zarch/gemm_vec.c
+++ b/kernel/zarch/gemm_vec.c
@ -30,12 +30,13 @@
 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 #include "common.h"
-#include <vecintrin.h>
+#include "vector-common.h"
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #ifdef COMPLEX
 #error "Handling for complex numbers is not supported in this kernel"
 #endif
@ -153,37 +154,6 @@ static const bool backwards = false;
 * 3, May 2008.
 */
 #define VLEN_BYTES 16
 #define VLEN_FLOATS (VLEN_BYTES / sizeof(FLOAT))
 typedef FLOAT vector_float __attribute__ ((vector_size (16)));
 /**
 * Load a vector into register, and hint on 8-byte alignment to improve
 * performance. gcc-9 and newer will create these hints by itself. For older
 * compiler versions, use inline assembly to explicitly express the hint.
 * Provide explicit hex encoding to cater for binutils versions that do not know
 * about vector-load with alignment hints yet.
 *
 * Note that, for block sizes where we apply vectorization, vectors in A will
 * always be 8-byte aligned.
 */
 static inline vector_float vec_load_hinted(FLOAT const *restrict a) {
 	vector_float const *restrict addr = (vector_float const *restrict)a;
 	vector_float y;
 #if __GNUC__ < 9 && !defined(__clang__)
 	// hex-encode vl %[out],%[addr],3
 	asm(".insn vrx,0xe70000003006,%[out],%[addr],3"
 	    : [ out ] "=v"(y)
 	    : [ addr ] "R"(*addr));
 #else
 	y = *addr;
 #endif
 	return y;
 }
 /**
 * Calculate for a row-block in C_i of size ROWSxCOLS using vector intrinsics.
 *
--- a/kernel/zarch/vector-common.h
+++ b/kernel/zarch/vector-common.h
@ -0,0 +1,64 @@
 /*
 * Copyright (c) IBM Corporation 2020.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 *    1. Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *
 *    2. Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *    3. Neither the name of the OpenBLAS project nor the names of
 *       its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written
 *       permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 #include <vecintrin.h>
 #define VLEN_BYTES 16
 #define VLEN_FLOATS (VLEN_BYTES / sizeof(FLOAT))
 typedef FLOAT vector_float __attribute__ ((vector_size (VLEN_BYTES)));
 /**
 * Load a vector into register, and hint on 8-byte alignment to improve
 * performance. gcc-9 and newer will create these hints by itself. For older
 * compiler versions, use inline assembly to explicitly express the hint.
 * Provide explicit hex encoding to cater for binutils versions that do not know
 * about vector-load with alignment hints yet.
 *
 * Note that, for block sizes where we apply vectorization, vectors in A will
 * always be 8-byte aligned.
 */
 static inline vector_float vec_load_hinted(FLOAT const *restrict a) {
 	vector_float const *restrict addr = (vector_float const *restrict)a;
 	vector_float y;
 #if __GNUC__ < 9 && !defined(__clang__)
 	// hex-encode vl %[out],%[addr],3
 	asm(".insn vrx,0xe70000003006,%[out],%[addr],3"
 	    : [ out ] "=v"(y)
 	    : [ addr ] "R"(*addr));
 #else
 	y = *addr;
 #endif
 	return y;
 }