added experimental support for ARMV8
This commit is contained in:
parent
e25de3d182
commit
fe5f46c330
|
@ -0,0 +1,7 @@
|
||||||
|
|
||||||
|
ifeq ($(CORE), ARMV8)
|
||||||
|
CCOMMON_OPT += -march=armv8-a
|
||||||
|
FCOMMON_OPT += -march=armv8-a
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
|
@ -367,6 +367,14 @@ ifeq ($(ARCH), arm)
|
||||||
NO_BINARY_MODE = 1
|
NO_BINARY_MODE = 1
|
||||||
BINARY_DEFINED = 1
|
BINARY_DEFINED = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(ARCH), arm64)
|
||||||
|
NO_BINARY_MODE = 1
|
||||||
|
BINARY_DEFINED = 1
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# C Compiler dependent settings
|
# C Compiler dependent settings
|
||||||
#
|
#
|
||||||
|
|
2
c_check
2
c_check
|
@ -64,6 +64,7 @@ $architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
||||||
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
||||||
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||||
|
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||||
|
|
||||||
$defined = 0;
|
$defined = 0;
|
||||||
|
|
||||||
|
@ -151,6 +152,7 @@ $architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
||||||
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
||||||
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||||
|
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||||
|
|
||||||
$binformat = bin32;
|
$binformat = bin32;
|
||||||
$binformat = bin64 if ($data =~ /BINARY_64/);
|
$binformat = bin64 if ($data =~ /BINARY_64/);
|
||||||
|
|
6
common.h
6
common.h
|
@ -311,7 +311,7 @@ typedef int blasint;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef ARMV7
|
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8)
|
||||||
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
|
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -375,6 +375,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
||||||
#include "common_arm.h"
|
#include "common_arm.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef ARCH_ARM64
|
||||||
|
#include "common_arm64.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef OS_LINUX
|
#ifdef OS_LINUX
|
||||||
#include "common_linux.h"
|
#include "common_linux.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -0,0 +1,169 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||||
|
be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
/*********************************************************************/
|
||||||
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* All rights reserved. */
|
||||||
|
/* */
|
||||||
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
/* without modification, are permitted provided that the following */
|
||||||
|
/* conditions are met: */
|
||||||
|
/* */
|
||||||
|
/* 1. Redistributions of source code must retain the above */
|
||||||
|
/* copyright notice, this list of conditions and the following */
|
||||||
|
/* disclaimer. */
|
||||||
|
/* */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above */
|
||||||
|
/* copyright notice, this list of conditions and the following */
|
||||||
|
/* disclaimer in the documentation and/or other materials */
|
||||||
|
/* provided with the distribution. */
|
||||||
|
/* */
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||||
|
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||||
|
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||||
|
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||||
|
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||||
|
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||||
|
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||||
|
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||||
|
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||||
|
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||||
|
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||||
|
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/* */
|
||||||
|
/* The views and conclusions contained in the software and */
|
||||||
|
/* documentation are those of the authors and should not be */
|
||||||
|
/* interpreted as representing official policies, either expressed */
|
||||||
|
/* or implied, of The University of Texas at Austin. */
|
||||||
|
/*********************************************************************/
|
||||||
|
|
||||||
|
#ifndef COMMON_ARM64
|
||||||
|
#define COMMON_ARM64
|
||||||
|
|
||||||
|
#define MB
|
||||||
|
#define WMB
|
||||||
|
|
||||||
|
#define INLINE inline
|
||||||
|
|
||||||
|
#define RETURN_BY_COMPLEX
|
||||||
|
|
||||||
|
#ifndef ASSEMBLER
|
||||||
|
|
||||||
|
static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
|
/*
|
||||||
|
int register ret;
|
||||||
|
|
||||||
|
do {
|
||||||
|
while (*address) {YIELDING;};
|
||||||
|
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"ldrex r2, [%1] \n\t"
|
||||||
|
"mov r2, #0 \n\t"
|
||||||
|
"strex r3, r2, [%1] \n\t"
|
||||||
|
"mov %0 , r3 \n\t"
|
||||||
|
: "=r"(ret), "=r"(address)
|
||||||
|
: "1"(address)
|
||||||
|
: "memory", "r2" , "r3"
|
||||||
|
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
|
} while (ret);
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline unsigned long long rpcc(void){
|
||||||
|
unsigned long long ret=0;
|
||||||
|
double v;
|
||||||
|
struct timeval tv;
|
||||||
|
gettimeofday(&tv,NULL);
|
||||||
|
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
|
||||||
|
ret = (unsigned long long) ( v * 1000.0d );
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
|
return x / y;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(DOUBLE)
|
||||||
|
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
|
||||||
|
#else
|
||||||
|
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GET_IMAGE_CANCEL
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef F_INTERFACE
|
||||||
|
#define REALNAME ASMNAME
|
||||||
|
#else
|
||||||
|
#define REALNAME ASMFNAME
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||||
|
|
||||||
|
#define PROLOGUE \
|
||||||
|
.arm ;\
|
||||||
|
.global REALNAME ;\
|
||||||
|
.func REALNAME ;\
|
||||||
|
REALNAME:
|
||||||
|
|
||||||
|
#define EPILOGUE
|
||||||
|
|
||||||
|
#define PROFCODE
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#define SEEK_ADDRESS
|
||||||
|
|
||||||
|
#ifndef PAGESIZE
|
||||||
|
#define PAGESIZE ( 4 << 10)
|
||||||
|
#endif
|
||||||
|
#define HUGE_PAGESIZE ( 4 << 20)
|
||||||
|
|
||||||
|
#define BUFFER_SIZE (16 << 20)
|
||||||
|
|
||||||
|
|
||||||
|
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||||
|
|
||||||
|
#ifndef MAP_ANONYMOUS
|
||||||
|
#define MAP_ANONYMOUS MAP_ANON
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
3
ctest.c
3
ctest.c
|
@ -129,4 +129,7 @@ BINARY_64
|
||||||
ARCH_ARM
|
ARCH_ARM
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
ARCH_ARM64
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
16
getarch.c
16
getarch.c
|
@ -709,6 +709,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#else
|
#else
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_ARMV8
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ARM64"
|
||||||
|
#define SUBARCHITECTURE "ARMV8"
|
||||||
|
#define SUBDIRNAME "arm64"
|
||||||
|
#define ARCHCONFIG "-DARMV8 " \
|
||||||
|
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||||
|
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||||
|
"-DHAVE_VFP -DHAVE_VFPV3 -DHAVE_VFPV4"
|
||||||
|
#define LIBNAME "armv8"
|
||||||
|
#define CORENAME "ARMV8"
|
||||||
|
#else
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef FORCE
|
#ifndef FORCE
|
||||||
|
|
|
@ -18,6 +18,10 @@ ifeq ($(ARCH), arm)
|
||||||
USE_TRMM = 1
|
USE_TRMM = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(ARCH), arm64)
|
||||||
|
USE_TRMM = 1
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(TARGET), LOONGSON3B)
|
ifeq ($(TARGET), LOONGSON3B)
|
||||||
USE_TRMM = 1
|
USE_TRMM = 1
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -0,0 +1,46 @@
|
||||||
|
ifndef SNRM2KERNEL
|
||||||
|
SNRM2KERNEL = nrm2.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef DNRM2KERNEL
|
||||||
|
DNRM2KERNEL = nrm2.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef CNRM2KERNEL
|
||||||
|
CNRM2KERNEL = znrm2.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef ZNRM2KERNEL
|
||||||
|
ZNRM2KERNEL = znrm2.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef SCABS_KERNEL
|
||||||
|
SCABS_KERNEL = ../generic/cabs.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef DCABS_KERNEL
|
||||||
|
DCABS_KERNEL = ../generic/cabs.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef QCABS_KERNEL
|
||||||
|
QCABS_KERNEL = ../generic/cabs.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef LSAME_KERNEL
|
||||||
|
LSAME_KERNEL = ../generic/lsame.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef SGEMM_BETA
|
||||||
|
SGEMM_BETA = ../generic/gemm_beta.c
|
||||||
|
endif
|
||||||
|
ifndef DGEMM_BETA
|
||||||
|
DGEMM_BETA = ../generic/gemm_beta.c
|
||||||
|
endif
|
||||||
|
ifndef CGEMM_BETA
|
||||||
|
CGEMM_BETA = ../generic/zgemm_beta.c
|
||||||
|
endif
|
||||||
|
ifndef ZGEMM_BETA
|
||||||
|
ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,134 @@
|
||||||
|
SAMAXKERNEL = ../arm/amax.c
|
||||||
|
DAMAXKERNEL = ../arm/amax.c
|
||||||
|
CAMAXKERNEL = ../arm/zamax.c
|
||||||
|
ZAMAXKERNEL = ../arm/zamax.c
|
||||||
|
|
||||||
|
SAMINKERNEL = ../arm/amin.c
|
||||||
|
DAMINKERNEL = ../arm/amin.c
|
||||||
|
CAMINKERNEL = ../arm/zamin.c
|
||||||
|
ZAMINKERNEL = ../arm/zamin.c
|
||||||
|
|
||||||
|
SMAXKERNEL = ../arm/max.c
|
||||||
|
DMAXKERNEL = ../arm/max.c
|
||||||
|
|
||||||
|
SMINKERNEL = ../arm/min.c
|
||||||
|
DMINKERNEL = ../arm/min.c
|
||||||
|
|
||||||
|
ISAMAXKERNEL = ../arm/iamax.c
|
||||||
|
IDAMAXKERNEL = ../arm/iamax.c
|
||||||
|
ICAMAXKERNEL = ../arm/izamax.c
|
||||||
|
IZAMAXKERNEL = ../arm/izamax.c
|
||||||
|
|
||||||
|
ISAMINKERNEL = ../arm/iamin.c
|
||||||
|
IDAMINKERNEL = ../arm/iamin.c
|
||||||
|
ICAMINKERNEL = ../arm/izamin.c
|
||||||
|
IZAMINKERNEL = ../arm/izamin.c
|
||||||
|
|
||||||
|
ISMAXKERNEL = ../arm/imax.c
|
||||||
|
IDMAXKERNEL = ../arm/imax.c
|
||||||
|
|
||||||
|
ISMINKERNEL = ../arm/imin.c
|
||||||
|
IDMINKERNEL = ../arm/imin.c
|
||||||
|
|
||||||
|
SASUMKERNEL = ../arm/asum.c
|
||||||
|
DASUMKERNEL = ../arm/asum.c
|
||||||
|
CASUMKERNEL = ../arm/zasum.c
|
||||||
|
ZASUMKERNEL = ../arm/zasum.c
|
||||||
|
|
||||||
|
SAXPYKERNEL = ../arm/axpy.c
|
||||||
|
DAXPYKERNEL = ../arm/axpy.c
|
||||||
|
CAXPYKERNEL = ../arm/zaxpy.c
|
||||||
|
ZAXPYKERNEL = ../arm/zaxpy.c
|
||||||
|
|
||||||
|
SCOPYKERNEL = ../arm/copy.c
|
||||||
|
DCOPYKERNEL = ../arm/copy.c
|
||||||
|
CCOPYKERNEL = ../arm/zcopy.c
|
||||||
|
ZCOPYKERNEL = ../arm/zcopy.c
|
||||||
|
|
||||||
|
SDOTKERNEL = ../arm/dot.c
|
||||||
|
DDOTKERNEL = ../arm/dot.c
|
||||||
|
CDOTKERNEL = ../arm/zdot.c
|
||||||
|
ZDOTKERNEL = ../arm/zdot.c
|
||||||
|
|
||||||
|
SNRM2KERNEL = ../arm/nrm2.c
|
||||||
|
DNRM2KERNEL = ../arm/nrm2.c
|
||||||
|
CNRM2KERNEL = ../arm/znrm2.c
|
||||||
|
ZNRM2KERNEL = ../arm/znrm2.c
|
||||||
|
|
||||||
|
SROTKERNEL = ../arm/rot.c
|
||||||
|
DROTKERNEL = ../arm/rot.c
|
||||||
|
CROTKERNEL = ../arm/zrot.c
|
||||||
|
ZROTKERNEL = ../arm/zrot.c
|
||||||
|
|
||||||
|
SSCALKERNEL = ../arm/scal.c
|
||||||
|
DSCALKERNEL = ../arm/scal.c
|
||||||
|
CSCALKERNEL = ../arm/zscal.c
|
||||||
|
ZSCALKERNEL = ../arm/zscal.c
|
||||||
|
|
||||||
|
SSWAPKERNEL = ../arm/swap.c
|
||||||
|
DSWAPKERNEL = ../arm/swap.c
|
||||||
|
CSWAPKERNEL = ../arm/zswap.c
|
||||||
|
ZSWAPKERNEL = ../arm/zswap.c
|
||||||
|
|
||||||
|
SGEMVNKERNEL = ../arm/gemv_n.c
|
||||||
|
DGEMVNKERNEL = ../arm/gemv_n.c
|
||||||
|
CGEMVNKERNEL = ../arm/zgemv_n.c
|
||||||
|
ZGEMVNKERNEL = ../arm/zgemv_n.c
|
||||||
|
|
||||||
|
SGEMVTKERNEL = ../arm/gemv_t.c
|
||||||
|
DGEMVTKERNEL = ../arm/gemv_t.c
|
||||||
|
CGEMVTKERNEL = ../arm/zgemv_t.c
|
||||||
|
ZGEMVTKERNEL = ../arm/zgemv_t.c
|
||||||
|
|
||||||
|
STRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||||
|
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||||
|
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||||
|
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||||
|
|
||||||
|
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
|
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
|
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
|
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||||
|
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
|
|
||||||
|
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
|
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
|
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
|
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
|
||||||
|
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
|
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||||
|
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||||
|
|
||||||
|
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||||
|
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||||
|
|
||||||
|
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
clean ::
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
TOPDIR = ../../..
|
||||||
|
include ../../../Makefile.system
|
||||||
|
|
||||||
|
ifeq ($(CORE), CORE2)
|
||||||
|
LASWP = ../generic/laswp_k_2.c
|
||||||
|
ZLASWP = ../generic/zlaswp_k_2.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), OPTERON)
|
||||||
|
LASWP = ../generic/laswp_k_1.c
|
||||||
|
ZLASWP = ../generic/zlaswp_k_1.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), PRESCOTT)
|
||||||
|
LASWP = ../generic/laswp_k_1.c
|
||||||
|
ZLASWP = ../generic/zlaswp_k_1.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(DYNAMIC_ARCH), 1)
|
||||||
|
LASWP = ../generic/laswp_k_4.c
|
||||||
|
ZLASWP = ../generic/zlaswp_k_4.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef LASWP
|
||||||
|
LASWP = ../generic/laswp_k.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef ZLASWP
|
||||||
|
ZLASWP = ../generic/zlaswp_k.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
include ../generic/Makefile
|
||||||
|
|
40
param.h
40
param.h
|
@ -1874,6 +1874,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SYMV_P 16
|
#define SYMV_P 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(ARMV8)
|
||||||
|
#define SNUMOPT 2
|
||||||
|
#define DNUMOPT 2
|
||||||
|
|
||||||
|
#define GEMM_DEFAULT_OFFSET_A 0
|
||||||
|
#define GEMM_DEFAULT_OFFSET_B 0
|
||||||
|
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_P 128
|
||||||
|
#define DGEMM_DEFAULT_P 128
|
||||||
|
#define CGEMM_DEFAULT_P 96
|
||||||
|
#define ZGEMM_DEFAULT_P 64
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_Q 240
|
||||||
|
#define DGEMM_DEFAULT_Q 120
|
||||||
|
#define CGEMM_DEFAULT_Q 120
|
||||||
|
#define ZGEMM_DEFAULT_Q 120
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_R 12288
|
||||||
|
#define DGEMM_DEFAULT_R 8192
|
||||||
|
#define CGEMM_DEFAULT_R 4096
|
||||||
|
#define ZGEMM_DEFAULT_R 4096
|
||||||
|
|
||||||
|
|
||||||
|
#define SYMV_P 16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef GENERIC
|
#ifdef GENERIC
|
||||||
|
|
Loading…
Reference in New Issue