diff --git a/Makefile.arm64 b/Makefile.arm64 new file mode 100644 index 000000000..a4f8bab6b --- /dev/null +++ b/Makefile.arm64 @@ -0,0 +1,7 @@ + +ifeq ($(CORE), ARMV8) +CCOMMON_OPT += -march=armv8-a +FCOMMON_OPT += -march=armv8-a +endif + + diff --git a/Makefile.system b/Makefile.system index 0f5e9c6d5..aceadf2b6 100644 --- a/Makefile.system +++ b/Makefile.system @@ -367,6 +367,14 @@ ifeq ($(ARCH), arm) NO_BINARY_MODE = 1 BINARY_DEFINED = 1 endif + +ifeq ($(ARCH), arm64) +NO_BINARY_MODE = 1 +BINARY_DEFINED = 1 +endif + + + # # C Compiler dependent settings # diff --git a/c_check b/c_check index c1cdd59c4..0828a5bba 100644 --- a/c_check +++ b/c_check @@ -64,6 +64,7 @@ $architecture = alpha if ($data =~ /ARCH_ALPHA/); $architecture = sparc if ($data =~ /ARCH_SPARC/); $architecture = ia64 if ($data =~ /ARCH_IA64/); $architecture = arm if ($data =~ /ARCH_ARM/); +$architecture = arm64 if ($data =~ /ARCH_ARM64/); $defined = 0; @@ -151,6 +152,7 @@ $architecture = alpha if ($data =~ /ARCH_ALPHA/); $architecture = sparc if ($data =~ /ARCH_SPARC/); $architecture = ia64 if ($data =~ /ARCH_IA64/); $architecture = arm if ($data =~ /ARCH_ARM/); +$architecture = arm64 if ($data =~ /ARCH_ARM64/); $binformat = bin32; $binformat = bin64 if ($data =~ /BINARY_64/); diff --git a/common.h b/common.h index a2775520f..310fcad93 100644 --- a/common.h +++ b/common.h @@ -311,7 +311,7 @@ typedef int blasint; #endif -#ifdef ARMV7 +#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n"); #endif @@ -375,6 +375,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246 #include "common_arm.h" #endif +#ifdef ARCH_ARM64 +#include "common_arm64.h" +#endif + #ifdef OS_LINUX #include "common_linux.h" #endif diff --git a/common_arm64.h b/common_arm64.h new file mode 100644 index 000000000..2da0d894c --- /dev/null +++ b/common_arm64.h @@ -0,0 +1,169 @@ +/***************************************************************************** +Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the ISCAS nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#ifndef COMMON_ARM64 +#define COMMON_ARM64 + +#define MB +#define WMB + +#define INLINE inline + +#define RETURN_BY_COMPLEX + +#ifndef ASSEMBLER + +static void __inline blas_lock(volatile BLASULONG *address){ +/* + int register ret; + + do { + while (*address) {YIELDING;}; + + __asm__ __volatile__( + "ldrex r2, [%1] \n\t" + "mov r2, #0 \n\t" + "strex r3, r2, [%1] \n\t" + "mov %0 , r3 \n\t" + : "=r"(ret), "=r"(address) + : "1"(address) + : "memory", "r2" , "r3" + + + ); + + } while (ret); +*/ +} + + +static inline unsigned long long rpcc(void){ + unsigned long long ret=0; + double v; + struct timeval tv; + gettimeofday(&tv,NULL); + v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6; + ret = (unsigned long long) ( v * 1000.0d ); + return ret; +} + +static inline int blas_quickdivide(blasint x, blasint y){ + return x / y; +} + +#if defined(DOUBLE) +#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory") +#else +#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory") +#endif + +#define GET_IMAGE_CANCEL + +#endif + + +#ifndef F_INTERFACE +#define REALNAME ASMNAME +#else +#define REALNAME ASMFNAME +#endif + +#if defined(ASSEMBLER) && !defined(NEEDPARAM) + +#define PROLOGUE \ + .arm ;\ + .global REALNAME ;\ + .func REALNAME ;\ +REALNAME: + +#define EPILOGUE + +#define PROFCODE + +#endif + + +#define SEEK_ADDRESS + +#ifndef PAGESIZE +#define PAGESIZE ( 4 << 10) +#endif +#define HUGE_PAGESIZE ( 4 << 20) + +#define BUFFER_SIZE (16 << 20) + + +#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +#endif diff --git a/ctest.c b/ctest.c index 184416339..86dc226d4 100644 --- a/ctest.c +++ b/ctest.c @@ -129,4 +129,7 @@ BINARY_64 ARCH_ARM #endif +#if defined(__aarch64__) +ARCH_ARM64 +#endif diff --git a/getarch.c b/getarch.c index 4407e3d9b..7975c9468 100644 --- a/getarch.c +++ b/getarch.c @@ -709,6 +709,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #endif +#ifdef FORCE_ARMV8 +#define FORCE +#define ARCHITECTURE "ARM64" +#define SUBARCHITECTURE "ARMV8" +#define SUBDIRNAME "arm64" +#define ARCHCONFIG "-DARMV8 " \ + "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ + "-DHAVE_VFP -DHAVE_VFPV3 -DHAVE_VFPV4" +#define LIBNAME "armv8" +#define CORENAME "ARMV8" +#else +#endif + + #ifndef FORCE diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index f543cd08d..b9b4bef1e 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -18,6 +18,10 @@ ifeq ($(ARCH), arm) USE_TRMM = 1 endif +ifeq ($(ARCH), arm64) +USE_TRMM = 1 +endif + ifeq ($(TARGET), LOONGSON3B) USE_TRMM = 1 endif diff --git a/kernel/arm64/KERNEL b/kernel/arm64/KERNEL new file mode 100644 index 000000000..aeccfbf4c --- /dev/null +++ b/kernel/arm64/KERNEL @@ -0,0 +1,46 @@ +ifndef SNRM2KERNEL +SNRM2KERNEL = nrm2.c +endif + +ifndef DNRM2KERNEL +DNRM2KERNEL = nrm2.c +endif + +ifndef CNRM2KERNEL +CNRM2KERNEL = znrm2.c +endif + +ifndef ZNRM2KERNEL +ZNRM2KERNEL = znrm2.c +endif + +ifndef SCABS_KERNEL +SCABS_KERNEL = ../generic/cabs.c +endif + +ifndef DCABS_KERNEL +DCABS_KERNEL = ../generic/cabs.c +endif + +ifndef QCABS_KERNEL +QCABS_KERNEL = ../generic/cabs.c +endif + +ifndef LSAME_KERNEL +LSAME_KERNEL = ../generic/lsame.c +endif + +ifndef SGEMM_BETA +SGEMM_BETA = ../generic/gemm_beta.c +endif +ifndef DGEMM_BETA +DGEMM_BETA = ../generic/gemm_beta.c +endif +ifndef CGEMM_BETA +CGEMM_BETA = ../generic/zgemm_beta.c +endif +ifndef ZGEMM_BETA +ZGEMM_BETA = ../generic/zgemm_beta.c +endif + + diff --git a/kernel/arm64/KERNEL.ARMV8 b/kernel/arm64/KERNEL.ARMV8 new file mode 100644 index 000000000..ecf278cf9 --- /dev/null +++ b/kernel/arm64/KERNEL.ARMV8 @@ -0,0 +1,134 @@ +SAMAXKERNEL = ../arm/amax.c +DAMAXKERNEL = ../arm/amax.c +CAMAXKERNEL = ../arm/zamax.c +ZAMAXKERNEL = ../arm/zamax.c + +SAMINKERNEL = ../arm/amin.c +DAMINKERNEL = ../arm/amin.c +CAMINKERNEL = ../arm/zamin.c +ZAMINKERNEL = ../arm/zamin.c + +SMAXKERNEL = ../arm/max.c +DMAXKERNEL = ../arm/max.c + +SMINKERNEL = ../arm/min.c +DMINKERNEL = ../arm/min.c + +ISAMAXKERNEL = ../arm/iamax.c +IDAMAXKERNEL = ../arm/iamax.c +ICAMAXKERNEL = ../arm/izamax.c +IZAMAXKERNEL = ../arm/izamax.c + +ISAMINKERNEL = ../arm/iamin.c +IDAMINKERNEL = ../arm/iamin.c +ICAMINKERNEL = ../arm/izamin.c +IZAMINKERNEL = ../arm/izamin.c + +ISMAXKERNEL = ../arm/imax.c +IDMAXKERNEL = ../arm/imax.c + +ISMINKERNEL = ../arm/imin.c +IDMINKERNEL = ../arm/imin.c + +SASUMKERNEL = ../arm/asum.c +DASUMKERNEL = ../arm/asum.c +CASUMKERNEL = ../arm/zasum.c +ZASUMKERNEL = ../arm/zasum.c + +SAXPYKERNEL = ../arm/axpy.c +DAXPYKERNEL = ../arm/axpy.c +CAXPYKERNEL = ../arm/zaxpy.c +ZAXPYKERNEL = ../arm/zaxpy.c + +SCOPYKERNEL = ../arm/copy.c +DCOPYKERNEL = ../arm/copy.c +CCOPYKERNEL = ../arm/zcopy.c +ZCOPYKERNEL = ../arm/zcopy.c + +SDOTKERNEL = ../arm/dot.c +DDOTKERNEL = ../arm/dot.c +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c + +SNRM2KERNEL = ../arm/nrm2.c +DNRM2KERNEL = ../arm/nrm2.c +CNRM2KERNEL = ../arm/znrm2.c +ZNRM2KERNEL = ../arm/znrm2.c + +SROTKERNEL = ../arm/rot.c +DROTKERNEL = ../arm/rot.c +CROTKERNEL = ../arm/zrot.c +ZROTKERNEL = ../arm/zrot.c + +SSCALKERNEL = ../arm/scal.c +DSCALKERNEL = ../arm/scal.c +CSCALKERNEL = ../arm/zscal.c +ZSCALKERNEL = ../arm/zscal.c + +SSWAPKERNEL = ../arm/swap.c +DSWAPKERNEL = ../arm/swap.c +CSWAPKERNEL = ../arm/zswap.c +ZSWAPKERNEL = ../arm/zswap.c + +SGEMVNKERNEL = ../arm/gemv_n.c +DGEMVNKERNEL = ../arm/gemv_n.c +CGEMVNKERNEL = ../arm/zgemv_n.c +ZGEMVNKERNEL = ../arm/zgemv_n.c + +SGEMVTKERNEL = ../arm/gemv_t.c +DGEMVTKERNEL = ../arm/gemv_t.c +CGEMVTKERNEL = ../arm/zgemv_t.c +ZGEMVTKERNEL = ../arm/zgemv_t.c + +STRMMKERNEL = ../generic/trmmkernel_2x2.c +DTRMMKERNEL = ../generic/trmmkernel_2x2.c +CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c + +SGEMMKERNEL = ../generic/gemmkernel_2x2.c +SGEMMONCOPY = ../generic/gemm_ncopy_2.c +SGEMMOTCOPY = ../generic/gemm_tcopy_2.c +SGEMMONCOPYOBJ = sgemm_oncopy.o +SGEMMOTCOPYOBJ = sgemm_otcopy.o + +DGEMMKERNEL = ../generic/gemmkernel_2x2.c +DGEMMONCOPY = ../generic/gemm_ncopy_2.c +DGEMMOTCOPY = ../generic/gemm_tcopy_2.c +DGEMMONCOPYOBJ = dgemm_oncopy.o +DGEMMOTCOPYOBJ = dgemm_otcopy.o + +CGEMMKERNEL = ../generic/zgemmkernel_2x2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPYOBJ = cgemm_oncopy.o +CGEMMOTCOPYOBJ = cgemm_otcopy.o + +ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPYOBJ = zgemm_oncopy.o +ZGEMMOTCOPYOBJ = zgemm_otcopy.o + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + + + + diff --git a/kernel/arm64/Makefile b/kernel/arm64/Makefile new file mode 100644 index 000000000..efae70d7b --- /dev/null +++ b/kernel/arm64/Makefile @@ -0,0 +1,2 @@ +clean :: + diff --git a/lapack/laswp/arm64/Makefile b/lapack/laswp/arm64/Makefile new file mode 100644 index 000000000..434c82a84 --- /dev/null +++ b/lapack/laswp/arm64/Makefile @@ -0,0 +1,33 @@ +TOPDIR = ../../.. +include ../../../Makefile.system + +ifeq ($(CORE), CORE2) +LASWP = ../generic/laswp_k_2.c +ZLASWP = ../generic/zlaswp_k_2.c +endif + +ifeq ($(CORE), OPTERON) +LASWP = ../generic/laswp_k_1.c +ZLASWP = ../generic/zlaswp_k_1.c +endif + +ifeq ($(CORE), PRESCOTT) +LASWP = ../generic/laswp_k_1.c +ZLASWP = ../generic/zlaswp_k_1.c +endif + +ifeq ($(DYNAMIC_ARCH), 1) +LASWP = ../generic/laswp_k_4.c +ZLASWP = ../generic/zlaswp_k_4.c +endif + +ifndef LASWP +LASWP = ../generic/laswp_k.c +endif + +ifndef ZLASWP +ZLASWP = ../generic/zlaswp_k.c +endif + +include ../generic/Makefile + diff --git a/param.h b/param.h index ec1767d20..0628a1972 100644 --- a/param.h +++ b/param.h @@ -1874,6 +1874,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif +#if defined(ARMV8) +#define SNUMOPT 2 +#define DNUMOPT 2 + +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN 0x03fffUL + +#define SGEMM_DEFAULT_UNROLL_M 2 +#define SGEMM_DEFAULT_UNROLL_N 2 + +#define DGEMM_DEFAULT_UNROLL_M 2 +#define DGEMM_DEFAULT_UNROLL_N 2 + +#define CGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_N 2 + +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 + +#define SGEMM_DEFAULT_P 128 +#define DGEMM_DEFAULT_P 128 +#define CGEMM_DEFAULT_P 96 +#define ZGEMM_DEFAULT_P 64 + +#define SGEMM_DEFAULT_Q 240 +#define DGEMM_DEFAULT_Q 120 +#define CGEMM_DEFAULT_Q 120 +#define ZGEMM_DEFAULT_Q 120 + +#define SGEMM_DEFAULT_R 12288 +#define DGEMM_DEFAULT_R 8192 +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 + + +#define SYMV_P 16 +#endif + + #ifdef GENERIC