From c167a3d6f41ed2f5680c2a72846b4c9b5d416543 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Mon, 16 Apr 2018 12:34:43 -0700 Subject: [PATCH 01/22] Added RISCV build --- Makefile.riscv64 | 0 Makefile.system | 4 + c_check | 1 + common.h | 5 ++ common_riscv64.h | 93 +++++++++++++++++++ cpuid_riscv64.c | 111 +++++++++++++++++++++++ ctest.c | 4 + getarch.c | 19 ++++ kernel/riscv64/KERNEL | 149 +++++++++++++++++++++++++++++++ kernel/riscv64/amax.c | 75 ++++++++++++++++ kernel/riscv64/amin.c | 75 ++++++++++++++++ kernel/riscv64/asum.c | 67 ++++++++++++++ kernel/riscv64/axpby.c | 96 ++++++++++++++++++++ kernel/riscv64/axpy.c | 64 ++++++++++++++ kernel/riscv64/copy.c | 59 +++++++++++++ kernel/riscv64/dot.c | 64 ++++++++++++++ kernel/riscv64/gemv_n.c | 67 ++++++++++++++ kernel/riscv64/gemv_t.c | 68 ++++++++++++++ kernel/riscv64/iamax.c | 77 ++++++++++++++++ kernel/riscv64/iamin.c | 77 ++++++++++++++++ kernel/riscv64/imax.c | 69 +++++++++++++++ kernel/riscv64/imin.c | 67 ++++++++++++++ kernel/riscv64/izamax.c | 81 +++++++++++++++++ kernel/riscv64/izamin.c | 81 +++++++++++++++++ kernel/riscv64/max.c | 65 ++++++++++++++ kernel/riscv64/min.c | 65 ++++++++++++++ kernel/riscv64/nrm2.c | 88 ++++++++++++++++++ kernel/riscv64/omatcopy_cn.c | 90 +++++++++++++++++++ kernel/riscv64/omatcopy_ct.c | 89 +++++++++++++++++++ kernel/riscv64/omatcopy_rn.c | 90 +++++++++++++++++++ kernel/riscv64/omatcopy_rt.c | 62 +++++++++++++ kernel/riscv64/rot.c | 62 +++++++++++++ kernel/riscv64/scal.c | 63 +++++++++++++ kernel/riscv64/swap.c | 62 +++++++++++++ kernel/riscv64/symv_L.c | 70 +++++++++++++++ kernel/riscv64/symv_U.c | 71 +++++++++++++++ kernel/riscv64/zamax.c | 79 +++++++++++++++++ kernel/riscv64/zamin.c | 79 +++++++++++++++++ kernel/riscv64/zasum.c | 72 +++++++++++++++ kernel/riscv64/zaxpby.c | 118 +++++++++++++++++++++++++ kernel/riscv64/zaxpy.c | 74 ++++++++++++++++ kernel/riscv64/zcopy.c | 65 ++++++++++++++ kernel/riscv64/zdot.c | 80 +++++++++++++++++ kernel/riscv64/zgemv_n.c | 157 +++++++++++++++++++++++++++++++++ kernel/riscv64/zgemv_t.c | 140 +++++++++++++++++++++++++++++ kernel/riscv64/znrm2.c | 106 ++++++++++++++++++++++ kernel/riscv64/zomatcopy_cn.c | 70 +++++++++++++++ kernel/riscv64/zomatcopy_cnc.c | 69 +++++++++++++++ kernel/riscv64/zomatcopy_ct.c | 71 +++++++++++++++ kernel/riscv64/zomatcopy_ctc.c | 71 +++++++++++++++ kernel/riscv64/zomatcopy_rn.c | 70 +++++++++++++++ kernel/riscv64/zomatcopy_rnc.c | 69 +++++++++++++++ kernel/riscv64/zomatcopy_rt.c | 72 +++++++++++++++ kernel/riscv64/zomatcopy_rtc.c | 72 +++++++++++++++ kernel/riscv64/zrot.c | 70 +++++++++++++++ kernel/riscv64/zscal.c | 88 ++++++++++++++++++ kernel/riscv64/zswap.c | 72 +++++++++++++++ lapack/laswp/riscv64/Makefile | 13 +++ param.h | 39 ++++++++ 59 files changed, 4166 insertions(+) create mode 100644 Makefile.riscv64 create mode 100644 common_riscv64.h create mode 100644 cpuid_riscv64.c create mode 100644 kernel/riscv64/KERNEL create mode 100644 kernel/riscv64/amax.c create mode 100644 kernel/riscv64/amin.c create mode 100644 kernel/riscv64/asum.c create mode 100644 kernel/riscv64/axpby.c create mode 100644 kernel/riscv64/axpy.c create mode 100644 kernel/riscv64/copy.c create mode 100644 kernel/riscv64/dot.c create mode 100644 kernel/riscv64/gemv_n.c create mode 100644 kernel/riscv64/gemv_t.c create mode 100644 kernel/riscv64/iamax.c create mode 100644 kernel/riscv64/iamin.c create mode 100644 kernel/riscv64/imax.c create mode 100644 kernel/riscv64/imin.c create mode 100644 kernel/riscv64/izamax.c create mode 100644 kernel/riscv64/izamin.c create mode 100644 kernel/riscv64/max.c create mode 100644 kernel/riscv64/min.c create mode 100644 kernel/riscv64/nrm2.c create mode 100644 kernel/riscv64/omatcopy_cn.c create mode 100644 kernel/riscv64/omatcopy_ct.c create mode 100644 kernel/riscv64/omatcopy_rn.c create mode 100644 kernel/riscv64/omatcopy_rt.c create mode 100644 kernel/riscv64/rot.c create mode 100644 kernel/riscv64/scal.c create mode 100644 kernel/riscv64/swap.c create mode 100644 kernel/riscv64/symv_L.c create mode 100644 kernel/riscv64/symv_U.c create mode 100644 kernel/riscv64/zamax.c create mode 100644 kernel/riscv64/zamin.c create mode 100644 kernel/riscv64/zasum.c create mode 100644 kernel/riscv64/zaxpby.c create mode 100644 kernel/riscv64/zaxpy.c create mode 100644 kernel/riscv64/zcopy.c create mode 100644 kernel/riscv64/zdot.c create mode 100644 kernel/riscv64/zgemv_n.c create mode 100644 kernel/riscv64/zgemv_t.c create mode 100644 kernel/riscv64/znrm2.c create mode 100644 kernel/riscv64/zomatcopy_cn.c create mode 100644 kernel/riscv64/zomatcopy_cnc.c create mode 100644 kernel/riscv64/zomatcopy_ct.c create mode 100644 kernel/riscv64/zomatcopy_ctc.c create mode 100644 kernel/riscv64/zomatcopy_rn.c create mode 100644 kernel/riscv64/zomatcopy_rnc.c create mode 100644 kernel/riscv64/zomatcopy_rt.c create mode 100644 kernel/riscv64/zomatcopy_rtc.c create mode 100644 kernel/riscv64/zrot.c create mode 100644 kernel/riscv64/zscal.c create mode 100644 kernel/riscv64/zswap.c create mode 100644 lapack/laswp/riscv64/Makefile diff --git a/Makefile.riscv64 b/Makefile.riscv64 new file mode 100644 index 000000000..e69de29bb diff --git a/Makefile.system b/Makefile.system index 142cb420f..02d392d9c 100644 --- a/Makefile.system +++ b/Makefile.system @@ -593,7 +593,9 @@ endif ifndef BINARY_DEFINED ifneq ($(OSNAME), AIX) ifdef BINARY64 +ifneq ($(ARCH), riscv64) CCOMMON_OPT += -m64 +endif else CCOMMON_OPT += -m32 endif @@ -687,8 +689,10 @@ endif else ifdef BINARY64 ifneq ($(OSNAME), AIX) +ifneq ($(ARCH), riscv64) FCOMMON_OPT += -m64 endif +endif ifdef INTERFACE64 ifneq ($(INTERFACE64), 0) FCOMMON_OPT += -fdefault-integer-8 diff --git a/c_check b/c_check index a3b337602..c564855f3 100644 --- a/c_check +++ b/c_check @@ -76,6 +76,7 @@ $architecture = ia64 if ($data =~ /ARCH_IA64/); $architecture = arm if ($data =~ /ARCH_ARM/); $architecture = arm64 if ($data =~ /ARCH_ARM64/); $architecture = zarch if ($data =~ /ARCH_ZARCH/); +$architecture = riscv64 if ($data =~ /ARCH_RISCV64/); $defined = 0; diff --git a/common.h b/common.h index 5a599a5af..3d23d9ee6 100644 --- a/common.h +++ b/common.h @@ -408,6 +408,11 @@ please https://github.com/xianyi/OpenBLAS/issues/246 #include "common_mips.h" #endif + +#ifdef ARCH_RISCV64 +#include "common_riscv64.h" +#endif + #ifdef ARCH_MIPS64 #include "common_mips64.h" #endif diff --git a/common_riscv64.h b/common_riscv64.h new file mode 100644 index 000000000..fe4e0a6d3 --- /dev/null +++ b/common_riscv64.h @@ -0,0 +1,93 @@ +/***************************************************************************** +Copyright (c) 2011-2014, The OpenBLAS Project +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************************/ + +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#ifndef COMMON_RISCV64 +#define COMMON_RISCV64 + +#define MB __sync_synchronize() +#define WMB __sync_synchronize() + +#define INLINE inline + +#ifndef ASSEMBLER + + +static inline int blas_quickdivide(blasint x, blasint y){ + return x / y; +} + +#endif + + + +#define BUFFER_SIZE ( 32 << 20) +#define SEEK_ADDRESS + +#endif diff --git a/cpuid_riscv64.c b/cpuid_riscv64.c new file mode 100644 index 000000000..129ed11b0 --- /dev/null +++ b/cpuid_riscv64.c @@ -0,0 +1,111 @@ +/***************************************************************************** +Copyright (c) 2011-2014, The OpenBLAS Project +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + + +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#define CPU_UNKNOWN 0 + +static char *cpuname[] = { + "UNKOWN", +}; + +int detect(void){ + return CPU_UNKNOWN; +} + +char *get_corename(void){ + return cpuname[detect()]; +} + +void get_architecture(void){ + printf("RISCV64"); +} + +void get_subarchitecture(void){ +} + +void get_subdirname(void){ + printf("riscv64"); +} + +void get_cpuconfig(void){ + printf("#define UNKNOWN\n"); + printf("#define L1_DATA_SIZE 65536\n"); + printf("#define L1_DATA_LINESIZE 32\n"); + printf("#define L2_SIZE 512488\n"); + printf("#define L2_LINESIZE 32\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + printf("#define L2_ASSOCIATIVE 4\n"); +} + +void get_libname(void){ + printf("riscv64\n"); +} diff --git a/ctest.c b/ctest.c index 00be423d1..cab939887 100644 --- a/ctest.c +++ b/ctest.c @@ -149,3 +149,7 @@ ARCH_ARM ARCH_ARM64 #endif +#if defined(__riscv) +ARCH_RISCV64 +#endif + diff --git a/getarch.c b/getarch.c index 992fc2b95..7f7fd97c4 100644 --- a/getarch.c +++ b/getarch.c @@ -604,6 +604,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + #ifdef FORCE_PPCG4 #define FORCE #define ARCHITECTURE "POWER" @@ -859,6 +860,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #endif +#ifdef FORCE_RISCV64 +#define FORCE +#define ARCHITECTURE "RISCV64" +#define SUBARCHITECTURE "RISCV64" +#define SUBDIRNAME "riscv64" +#define ARCHCONFIG "-DRISCV64 " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " +#define LIBNAME "riscv64" +#define CORENAME "RISCV64" +#else +#endif + #ifdef FORCE_CORTEXA15 #define FORCE #define ARCHITECTURE "ARM" @@ -1051,6 +1066,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define OPENBLAS_SUPPORTED #endif +#ifdef __riscv +#include "cpuid_riscv64.c" +#endif + #ifdef __arm__ #include "cpuid_arm.c" #define OPENBLAS_SUPPORTED diff --git a/kernel/riscv64/KERNEL b/kernel/riscv64/KERNEL new file mode 100644 index 000000000..7d854ced6 --- /dev/null +++ b/kernel/riscv64/KERNEL @@ -0,0 +1,149 @@ +SAMAXKERNEL = ../riscv64/amax.c +DAMAXKERNEL = ../riscv64/amax.c +CAMAXKERNEL = ../riscv64/zamax.c +ZAMAXKERNEL = ../riscv64/zamax.c + +SAMINKERNEL = ../riscv64/amin.c +DAMINKERNEL = ../riscv64/amin.c +CAMINKERNEL = ../riscv64/zamin.c +ZAMINKERNEL = ../riscv64/zamin.c + +SMAXKERNEL = ../riscv64/max.c +DMAXKERNEL = ../riscv64/max.c + +SMINKERNEL = ../riscv64/min.c +DMINKERNEL = ../riscv64/min.c + +ISAMAXKERNEL = ../riscv64/iamax.c +IDAMAXKERNEL = ../riscv64/iamax.c +ICAMAXKERNEL = ../riscv64/izamax.c +IZAMAXKERNEL = ../riscv64/izamax.c + +ISAMINKERNEL = ../riscv64/iamin.c +IDAMINKERNEL = ../riscv64/iamin.c +ICAMINKERNEL = ../riscv64/izamin.c +IZAMINKERNEL = ../riscv64/izamin.c + +ISMAXKERNEL = ../riscv64/imax.c +IDMAXKERNEL = ../riscv64/imax.c + +ISMINKERNEL = ../riscv64/imin.c +IDMINKERNEL = ../riscv64/imin.c + +SASUMKERNEL = ../riscv64/asum.c +DASUMKERNEL = ../riscv64/asum.c +CASUMKERNEL = ../riscv64/zasum.c +ZASUMKERNEL = ../riscv64/zasum.c + +SAXPYKERNEL = ../riscv64/axpy.c +DAXPYKERNEL = ../riscv64/axpy.c +CAXPYKERNEL = ../riscv64/zaxpy.c +ZAXPYKERNEL = ../riscv64/zaxpy.c + +SCOPYKERNEL = ../riscv64/copy.c +DCOPYKERNEL = ../riscv64/copy.c +CCOPYKERNEL = ../riscv64/zcopy.c +ZCOPYKERNEL = ../riscv64/zcopy.c + +SDOTKERNEL = ../riscv64/dot.c +DDOTKERNEL = ../riscv64/dot.c +CDOTKERNEL = ../riscv64/zdot.c +ZDOTKERNEL = ../riscv64/zdot.c + +SNRM2KERNEL = ../riscv64/nrm2.c +DNRM2KERNEL = ../riscv64/nrm2.c +CNRM2KERNEL = ../riscv64/znrm2.c +ZNRM2KERNEL = ../riscv64/znrm2.c + +SROTKERNEL = ../riscv64/rot.c +DROTKERNEL = ../riscv64/rot.c +CROTKERNEL = ../riscv64/zrot.c +ZROTKERNEL = ../riscv64/zrot.c + +SSCALKERNEL = ../riscv64/scal.c +DSCALKERNEL = ../riscv64/scal.c +CSCALKERNEL = ../riscv64/zscal.c +ZSCALKERNEL = ../riscv64/zscal.c + +SSWAPKERNEL = ../riscv64/swap.c +DSWAPKERNEL = ../riscv64/swap.c +CSWAPKERNEL = ../riscv64/zswap.c +ZSWAPKERNEL = ../riscv64/zswap.c + +SGEMVNKERNEL = ../riscv64/gemv_n.c +DGEMVNKERNEL = ../riscv64/gemv_n.c +CGEMVNKERNEL = ../riscv64/zgemv_n.c +ZGEMVNKERNEL = ../riscv64/zgemv_n.c + +SGEMVTKERNEL = ../riscv64/gemv_t.c +DGEMVTKERNEL = ../riscv64/gemv_t.c +CGEMVTKERNEL = ../riscv64/zgemv_t.c +ZGEMVTKERNEL = ../riscv64/zgemv_t.c + +STRMMKERNEL = ../generic/trmmkernel_2x2.c +DTRMMKERNEL = ../generic/trmmkernel_2x2.c +CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c + +SGEMMKERNEL = ../generic/gemmkernel_2x2.c +SGEMMONCOPY = ../generic/gemm_ncopy_2.c +SGEMMOTCOPY = ../generic/gemm_tcopy_2.c +SGEMMONCOPYOBJ = sgemm_oncopy.o +SGEMMOTCOPYOBJ = sgemm_otcopy.o + +DGEMMKERNEL = ../generic/gemmkernel_2x2.c +DGEMMONCOPY = ../generic/gemm_ncopy_2.c +DGEMMOTCOPY = ../generic/gemm_tcopy_2.c +DGEMMONCOPYOBJ = dgemm_oncopy.o +DGEMMOTCOPYOBJ = dgemm_otcopy.o + +CGEMMKERNEL = ../generic/zgemmkernel_2x2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPYOBJ = cgemm_oncopy.o +CGEMMOTCOPYOBJ = cgemm_otcopy.o + +ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPYOBJ = zgemm_oncopy.o +ZGEMMOTCOPYOBJ = zgemm_otcopy.o + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +LSAME_KERNEL = ../generic/lsame.c + +SCABS_KERNEL = ../generic/cabs.c +DCABS_KERNEL = ../generic/cabs.c +QCABS_KERNEL = ../generic/cabs.c + +ifndef SGEMM_BETA +SGEMM_BETA = ../generic/gemm_beta.c +endif +ifndef DGEMM_BETA +DGEMM_BETA = ../generic/gemm_beta.c +endif +ifndef CGEMM_BETA +CGEMM_BETA = ../generic/zgemm_beta.c +endif +ifndef ZGEMM_BETA +ZGEMM_BETA = ../generic/zgemm_beta.c +endif diff --git a/kernel/riscv64/amax.c b/kernel/riscv64/amax.c new file mode 100644 index 000000000..792e68bd9 --- /dev/null +++ b/kernel/riscv64/amax.c @@ -0,0 +1,75 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + + if (n <= 0 || inc_x <= 0) return(maxf); + + maxf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) > maxf ) + { + maxf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(maxf); +} + + diff --git a/kernel/riscv64/amin.c b/kernel/riscv64/amin.c new file mode 100644 index 000000000..78495a8e3 --- /dev/null +++ b/kernel/riscv64/amin.c @@ -0,0 +1,75 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + + if (n <= 0 || inc_x <= 0) return(minf); + + minf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) < minf ) + { + minf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(minf); +} + + diff --git a/kernel/riscv64/asum.c b/kernel/riscv64/asum.c new file mode 100644 index 000000000..b284ae3fc --- /dev/null +++ b/kernel/riscv64/asum.c @@ -0,0 +1,67 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT sumf = 0.0; + if (n <= 0 || inc_x <= 0) return(sumf); + + n *= inc_x; + while(i < n) + { + sumf += ABS(x[i]); + i += inc_x; + } + return(sumf); +} + + diff --git a/kernel/riscv64/axpby.c b/kernel/riscv64/axpby.c new file mode 100644 index 000000000..278747f75 --- /dev/null +++ b/kernel/riscv64/axpby.c @@ -0,0 +1,96 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix,iy; + + if ( n < 0 ) return(0); + + ix = 0; + iy = 0; + + if ( beta == 0.0 ) + { + + if ( alpha == 0.0 ) + { + while(i < n) + { + y[iy] = 0.0 ; + iy += inc_y ; + i++ ; + } + } + else + { + while(i < n) + { + y[iy] = alpha * x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + } + + + } + + } + else + { + + if ( alpha == 0.0 ) + { + while(i < n) + { + y[iy] = beta * y[iy] ; + iy += inc_y ; + i++ ; + } + } + else + { + while(i < n) + { + y[iy] = alpha * x[ix] + beta * y[iy] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + } + } + + } + + return(0); + +} + + diff --git a/kernel/riscv64/axpy.c b/kernel/riscv64/axpy.c new file mode 100644 index 000000000..fb1094dd9 --- /dev/null +++ b/kernel/riscv64/axpy.c @@ -0,0 +1,64 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix,iy; + + if ( n < 0 ) return(0); + if ( da == 0.0 ) return(0); + + ix = 0; + iy = 0; + + while(i < n) + { + + y[iy] += da * x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/riscv64/copy.c b/kernel/riscv64/copy.c new file mode 100644 index 000000000..7b4f04f30 --- /dev/null +++ b/kernel/riscv64/copy.c @@ -0,0 +1,59 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + + if ( n < 0 ) return(0); + + while(i < n) + { + + y[iy] = x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/riscv64/dot.c b/kernel/riscv64/dot.c new file mode 100644 index 000000000..46a84ad18 --- /dev/null +++ b/kernel/riscv64/dot.c @@ -0,0 +1,64 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +#if defined(DSDOT) +double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#else +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#endif +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + double dot = 0.0 ; + + if ( n < 0 ) return(dot); + + while(i < n) + { + + dot += y[iy] * x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(dot); + +} + + diff --git a/kernel/riscv64/gemv_n.c b/kernel/riscv64/gemv_n.c new file mode 100644 index 000000000..ef61b245b --- /dev/null +++ b/kernel/riscv64/gemv_n.c @@ -0,0 +1,67 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** + * * 2013/09/14 Saar + * * BLASTEST float : OK + * * BLASTEST double : OK + * CTEST : OK + * TEST : OK + * * + * **************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +{ + BLASLONG i; + BLASLONG ix,iy; + BLASLONG j; + FLOAT *a_ptr; + FLOAT temp; + + ix = 0; + a_ptr = a; + + for (j=0; j + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + BLASLONG max=0; + + if (n <= 0 || inc_x <= 0) return(max); + + maxf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) > maxf ) + { + max = i; + maxf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(max+1); +} + + diff --git a/kernel/riscv64/iamin.c b/kernel/riscv64/iamin.c new file mode 100644 index 000000000..155292bd5 --- /dev/null +++ b/kernel/riscv64/iamin.c @@ -0,0 +1,77 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + BLASLONG min=0; + + if (n <= 0 || inc_x <= 0) return(min); + + minf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) < ABS(minf) ) + { + min = i; + minf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(min+1); +} + + diff --git a/kernel/riscv64/imax.c b/kernel/riscv64/imax.c new file mode 100644 index 000000000..5072dd16e --- /dev/null +++ b/kernel/riscv64/imax.c @@ -0,0 +1,69 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + BLASLONG max=0; + + if (n <= 0 || inc_x <= 0) return(max); + + maxf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] > maxf ) + { + max = i; + maxf = x[ix]; + } + ix += inc_x; + i++; + } + return(max+1); +} + + diff --git a/kernel/riscv64/imin.c b/kernel/riscv64/imin.c new file mode 100644 index 000000000..598cba387 --- /dev/null +++ b/kernel/riscv64/imin.c @@ -0,0 +1,67 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** +* 2013/08/19 Saar +* BLASTEST float +* BLASTEST double +* +**************************************************************************************/ + +#include "common.h" +#include + + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + BLASLONG min=0; + + if (n <= 0 || inc_x <= 0) return(min); + + minf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] > minf ) + { + min = i; + minf = x[ix]; + } + ix += inc_x; + i++; + } + return(min+1); +} + + diff --git a/kernel/riscv64/izamax.c b/kernel/riscv64/izamax.c new file mode 100644 index 000000000..8fe33e95b --- /dev/null +++ b/kernel/riscv64/izamax.c @@ -0,0 +1,81 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf; + BLASLONG max=0; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(max); + + inc_x2 = 2 * inc_x; + + maxf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) > maxf ) + { + max = i; + maxf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(max+1); +} + + diff --git a/kernel/riscv64/izamin.c b/kernel/riscv64/izamin.c new file mode 100644 index 000000000..fb5a0d4cb --- /dev/null +++ b/kernel/riscv64/izamin.c @@ -0,0 +1,81 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf; + BLASLONG min=0; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(min); + + inc_x2 = 2 * inc_x; + + minf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) < minf ) + { + min = i; + minf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(min+1); +} + + diff --git a/kernel/riscv64/max.c b/kernel/riscv64/max.c new file mode 100644 index 000000000..2ad956bc0 --- /dev/null +++ b/kernel/riscv64/max.c @@ -0,0 +1,65 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + + if (n <= 0 || inc_x <= 0) return(maxf); + + maxf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] > maxf ) + { + maxf = x[ix]; + } + ix += inc_x; + i++; + } + return(maxf); +} + + diff --git a/kernel/riscv64/min.c b/kernel/riscv64/min.c new file mode 100644 index 000000000..2812fe397 --- /dev/null +++ b/kernel/riscv64/min.c @@ -0,0 +1,65 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + + if (n <= 0 || inc_x <= 0) return(minf); + + minf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] < minf ) + { + minf = x[ix]; + } + ix += inc_x; + i++; + } + return(minf); +} + + diff --git a/kernel/riscv64/nrm2.c b/kernel/riscv64/nrm2.c new file mode 100644 index 000000000..fcff09337 --- /dev/null +++ b/kernel/riscv64/nrm2.c @@ -0,0 +1,88 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/13 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT scale = 0.0; + FLOAT ssq = 1.0; + FLOAT absxi = 0.0; + + + if (n <= 0 || inc_x <= 0) return(0.0); + if ( n == 1 ) return( ABS(x[0]) ); + + n *= inc_x; + while(i < n) + { + + if ( x[i] != 0.0 ) + { + absxi = ABS( x[i] ); + if ( scale < absxi ) + { + ssq = 1 + ssq * ( scale / absxi ) * ( scale / absxi ); + scale = absxi ; + } + else + { + ssq += ( absxi/scale ) * ( absxi/scale ); + } + + } + i += inc_x; + } + scale = scale * sqrt( ssq ); + return(scale); + +} + + diff --git a/kernel/riscv64/omatcopy_cn.c b/kernel/riscv64/omatcopy_cn.c new file mode 100644 index 000000000..4d11b9125 --- /dev/null +++ b/kernel/riscv64/omatcopy_cn.c @@ -0,0 +1,90 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + +/***************************************************** + * 2014/06/09 Saar + * + * Order ColMajor + * No Trans + * +******************************************************/ + +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) +{ + BLASLONG i,j; + FLOAT *aptr,*bptr; + + if ( rows <= 0 ) return(0); + if ( cols <= 0 ) return(0); + + aptr = a; + bptr = b; + + if ( alpha == 0.0 ) + { + for ( i=0; i + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp; + + if ( n < 0 ) return(0); + + while(i < n) + { + + temp = x[ix] ; + x[ix] = y[iy] ; + y[iy] = temp ; + + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/riscv64/symv_L.c b/kernel/riscv64/symv_L.c new file mode 100644 index 000000000..8f48d03f5 --- /dev/null +++ b/kernel/riscv64/symv_L.c @@ -0,0 +1,70 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +{ + BLASLONG i; + BLASLONG ix,iy; + BLASLONG jx,jy; + BLASLONG j; + FLOAT temp1; + FLOAT temp2; + +#if 0 + if ( m != offset ) + printf("Symv_L: m=%d offset=%d\n",m,offset); +#endif + + jx = 0; + jy = 0; + + for (j=0; j + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(0.0); + + inc_x2 = 2 * inc_x; + + maxf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) > maxf ) + { + maxf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(maxf); +} + + diff --git a/kernel/riscv64/zamin.c b/kernel/riscv64/zamin.c new file mode 100644 index 000000000..02eab3e75 --- /dev/null +++ b/kernel/riscv64/zamin.c @@ -0,0 +1,79 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(0.0); + + inc_x2 = 2 * inc_x; + + minf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) < minf ) + { + minf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(minf); +} + + diff --git a/kernel/riscv64/zasum.c b/kernel/riscv64/zasum.c new file mode 100644 index 000000000..61e85cae6 --- /dev/null +++ b/kernel/riscv64/zasum.c @@ -0,0 +1,72 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT sumf = 0.0; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(sumf); + + inc_x2 = 2 * inc_x; + + n *= inc_x2; + while(i < n) + { + sumf += CABS1(x,i); + i += inc_x2; + } + return(sumf); +} + + diff --git a/kernel/riscv64/zaxpby.c b/kernel/riscv64/zaxpby.c new file mode 100644 index 000000000..445354416 --- /dev/null +++ b/kernel/riscv64/zaxpby.c @@ -0,0 +1,118 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/*************************************************************************** +* 2014/06/07 Saar +* +***************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FLOAT beta_r, FLOAT beta_i,FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix,iy; + FLOAT temp; + BLASLONG inc_x2, inc_y2; + + if ( n <= 0 ) return(0); + + ix = 0; + iy = 0; + + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; + + if ( beta_r == 0.0 && beta_i == 0.0) + { + if ( alpha_r == 0.0 && alpha_i == 0.0 ) + { + + while(i < n) + { + y[iy] = 0.0 ; + y[iy+1] = 0.0 ; + iy += inc_y2 ; + i++ ; + } + + } + else + { + + while(i < n) + { + y[iy] = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) ; + y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) ; + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + } + + + } + + } + else + { + if ( alpha_r == 0.0 && alpha_i == 0.0 ) + { + + while(i < n) + { + temp = ( beta_r * y[iy] - beta_i * y[iy+1] ) ; + y[iy+1] = ( beta_r * y[iy+1] + beta_i * y[iy] ) ; + y[iy] = temp; + iy += inc_y2 ; + i++ ; + } + + } + else + { + + while(i < n) + { + temp = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) + ( beta_r * y[iy] - beta_i * y[iy+1] ) ; + y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) + ( beta_r * y[iy+1] + beta_i * y[iy] ) ; + y[iy] = temp; + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + } + + + } + + + + } + return(0); + +} + + diff --git a/kernel/riscv64/zaxpy.c b/kernel/riscv64/zaxpy.c new file mode 100644 index 000000000..1dcaeac27 --- /dev/null +++ b/kernel/riscv64/zaxpy.c @@ -0,0 +1,74 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/15 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix,iy; + BLASLONG inc_x2; + BLASLONG inc_y2; + + if ( n < 0 ) return(0); + if ( da_r == 0.0 && da_i == 0.0 ) return(0); + + ix = 0; + iy = 0; + + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; + + while(i < n) + { +#if !defined(CONJ) + y[iy] += ( da_r * x[ix] - da_i * x[ix+1] ) ; + y[iy+1] += ( da_r * x[ix+1] + da_i * x[ix] ) ; +#else + y[iy] += ( da_r * x[ix] + da_i * x[ix+1] ) ; + y[iy+1] -= ( da_r * x[ix+1] - da_i * x[ix] ) ; +#endif + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/riscv64/zcopy.c b/kernel/riscv64/zcopy.c new file mode 100644 index 000000000..07fe584c5 --- /dev/null +++ b/kernel/riscv64/zcopy.c @@ -0,0 +1,65 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + BLASLONG inc_x2; + BLASLONG inc_y2; + + if ( n < 0 ) return(0); + + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; + + while(i < n) + { + + y[iy] = x[ix] ; + y[iy+1] = x[ix+1] ; + ix += inc_x2; + iy += inc_y2; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/riscv64/zdot.c b/kernel/riscv64/zdot.c new file mode 100644 index 000000000..733c235c6 --- /dev/null +++ b/kernel/riscv64/zdot.c @@ -0,0 +1,80 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : FAIL +* BLASTEST double : FAIL +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) + +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT dot[2]; + OPENBLAS_COMPLEX_FLOAT result; + BLASLONG inc_x2; + BLASLONG inc_y2; + + dot[0]=0.0; + dot[1]=0.0; + + CREAL(result) = 0.0 ; + CIMAG(result) = 0.0 ; + + if ( n < 1 ) return(result); + + inc_x2 = 2 * inc_x ; + inc_y2 = 2 * inc_y ; + + while(i < n) + { +#if !defined(CONJ) + dot[0] += ( x[ix] * y[iy] - x[ix+1] * y[iy+1] ) ; + dot[1] += ( x[ix+1] * y[iy] + x[ix] * y[iy+1] ) ; +#else + dot[0] += ( x[ix] * y[iy] + x[ix+1] * y[iy+1] ) ; + dot[1] -= ( x[ix+1] * y[iy] - x[ix] * y[iy+1] ) ; +#endif + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + CREAL(result) = dot[0]; + CIMAG(result) = dot[1]; + return(result); + +} + + diff --git a/kernel/riscv64/zgemv_n.c b/kernel/riscv64/zgemv_n.c new file mode 100644 index 000000000..b9b03f792 --- /dev/null +++ b/kernel/riscv64/zgemv_n.c @@ -0,0 +1,157 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** + * * 2013/11/23 Saar + * * BLASTEST float : OK + * * BLASTEST double : OK + * CTEST : OK + * TEST : OK + * * + * **************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +{ + BLASLONG i; + BLASLONG ix,iy; + BLASLONG j; + FLOAT *a_ptr; + FLOAT temp_r,temp_i; + BLASLONG inc_x2,inc_y2; + BLASLONG lda2; + BLASLONG i2; + + lda2 = 2*lda; + + ix = 0; + a_ptr = a; + + if ( inc_x == 1 && inc_y == 1 ) + { + + for (j=0; j + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT scale = 0.0; + FLOAT ssq = 1.0; + BLASLONG inc_x2; + FLOAT temp; + + if (n <= 0 || inc_x <= 0) return(0.0); + + inc_x2 = 2 * inc_x; + + n *= inc_x2; + while(i < n) + { + + if ( x[i] != 0.0 ) + { + temp = ABS( x[i] ); + if ( scale < temp ) + { + ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); + scale = temp ; + } + else + { + ssq += ( temp / scale ) * ( temp / scale ); + } + + } + + if ( x[i+1] != 0.0 ) + { + temp = ABS( x[i+1] ); + if ( scale < temp ) + { + ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); + scale = temp ; + } + else + { + ssq += ( temp / scale ) * ( temp / scale ); + } + + } + + + i += inc_x2; + } + scale = scale * sqrt( ssq ); + return(scale); + +} + + diff --git a/kernel/riscv64/zomatcopy_cn.c b/kernel/riscv64/zomatcopy_cn.c new file mode 100644 index 000000000..f5a7a6284 --- /dev/null +++ b/kernel/riscv64/zomatcopy_cn.c @@ -0,0 +1,70 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + +/***************************************************** + * 2014/06/09 Saar + * + * Order ColMajor + * No Trans + * +******************************************************/ + +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) +{ + BLASLONG i,j,ia; + FLOAT *aptr,*bptr; + + if ( rows <= 0 ) return(0); + if ( cols <= 0 ) return(0); + + aptr = a; + bptr = b; + + lda *= 2; + ldb *= 2; + + for ( i=0; i + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dummy4, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp[2]; + BLASLONG inc_x2; + BLASLONG inc_y2; + + if ( n < 0 ) return(0); + + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; + + while(i < n) + { + + temp[0] = x[ix] ; + temp[1] = x[ix+1] ; + x[ix] = y[iy] ; + x[ix+1] = y[iy+1] ; + y[iy] = temp[0] ; + y[iy+1] = temp[1] ; + + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + return(0); + +} + + diff --git a/lapack/laswp/riscv64/Makefile b/lapack/laswp/riscv64/Makefile new file mode 100644 index 000000000..75411deb5 --- /dev/null +++ b/lapack/laswp/riscv64/Makefile @@ -0,0 +1,13 @@ +TOPDIR = ../../.. +include ../../../Makefile.system + +ifndef LASWP +LASWP = ../generic/laswp_k.c +endif + +ifndef ZLASWP +ZLASWP = ../generic/zlaswp_k.c +endif + +include ../generic/Makefile + diff --git a/param.h b/param.h index 189cdc4a0..52675bc25 100644 --- a/param.h +++ b/param.h @@ -2343,6 +2343,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif +#ifdef RISCV64 +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN 0x03fffUL + +#define SGEMM_DEFAULT_UNROLL_M 4 +#define SGEMM_DEFAULT_UNROLL_N 4 + +#define DGEMM_DEFAULT_UNROLL_M 4 +#define DGEMM_DEFAULT_UNROLL_N 4 + +#define CGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_N 2 + +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 + +#define SGEMM_DEFAULT_P 128 +#define DGEMM_DEFAULT_P 128 +#define CGEMM_DEFAULT_P 96 +#define ZGEMM_DEFAULT_P 64 + +#define SGEMM_DEFAULT_Q 240 +#define DGEMM_DEFAULT_Q 120 +#define CGEMM_DEFAULT_Q 120 +#define ZGEMM_DEFAULT_Q 120 + +#define SGEMM_DEFAULT_R 12288 +#define DGEMM_DEFAULT_R 8192 +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 + +#define SYMV_P 16 + +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 + +#endif + #ifdef ARMV7 #define SNUMOPT 2 #define DNUMOPT 2 From 0ee395db35ee824aff77d4d2b812aaedb111addd Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Wed, 18 Apr 2018 18:03:32 -0700 Subject: [PATCH 02/22] Fixed TRMM and SYMM for RISCV --- kernel/Makefile.L3 | 4 ++++ kernel/riscv64/KERNEL | 10 ++++++++++ param.h | 8 ++++---- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 4284fbfa0..63e09a56d 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -20,6 +20,10 @@ ifeq ($(ARCH), arm64) USE_TRMM = 1 endif +ifeq ($(ARCH), riscv64) +USE_TRMM = 1 +endif + ifeq ($(TARGET), LOONGSON3B) USE_TRMM = 1 endif diff --git a/kernel/riscv64/KERNEL b/kernel/riscv64/KERNEL index 7d854ced6..04d82b4ce 100644 --- a/kernel/riscv64/KERNEL +++ b/kernel/riscv64/KERNEL @@ -129,6 +129,16 @@ ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +SSYMV_U_KERNEL = ../generic/symv_k.c +SSYMV_L_KERNEL = ../generic/symv_k.c +DSYMV_U_KERNEL = ../generic/symv_k.c +DSYMV_L_KERNEL = ../generic/symv_k.c +CSYMV_U_KERNEL = ../generic/zsymv_k.c +CSYMV_L_KERNEL = ../generic/zsymv_k.c +ZSYMV_U_KERNEL = ../generic/zsymv_k.c +ZSYMV_L_KERNEL = ../generic/zsymv_k.c + + LSAME_KERNEL = ../generic/lsame.c SCABS_KERNEL = ../generic/cabs.c diff --git a/param.h b/param.h index 52675bc25..22d837960 100644 --- a/param.h +++ b/param.h @@ -2348,11 +2348,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_ALIGN 0x03fffUL -#define SGEMM_DEFAULT_UNROLL_M 4 -#define SGEMM_DEFAULT_UNROLL_N 4 +#define SGEMM_DEFAULT_UNROLL_M 2 +#define SGEMM_DEFAULT_UNROLL_N 2 -#define DGEMM_DEFAULT_UNROLL_M 4 -#define DGEMM_DEFAULT_UNROLL_N 4 +#define DGEMM_DEFAULT_UNROLL_M 2 +#define DGEMM_DEFAULT_UNROLL_N 2 #define CGEMM_DEFAULT_UNROLL_M 2 #define CGEMM_DEFAULT_UNROLL_N 2 From db17ce896fbbf53cbef34f81e1f1ec6887965ec4 Mon Sep 17 00:00:00 2001 From: Dumi Loghin Date: Wed, 5 Sep 2018 12:49:37 +0800 Subject: [PATCH 03/22] replace ARCH with AR in lapack-netlib --- Makefile | 4 +-- c_check | 4 +++ lapack-netlib/BLAS/SRC/Makefile | 10 +++--- lapack-netlib/CBLAS/src/Makefile | 32 +++++++++---------- lapack-netlib/DOCS/lawn81.tex | 2 +- lapack-netlib/INSTALL/make.inc.ALPHA | 4 +-- lapack-netlib/INSTALL/make.inc.HPPA | 4 +-- lapack-netlib/INSTALL/make.inc.IRIX64 | 4 +-- lapack-netlib/INSTALL/make.inc.O2K | 4 +-- lapack-netlib/INSTALL/make.inc.SGI5 | 4 +-- lapack-netlib/INSTALL/make.inc.SUN4 | 4 +-- lapack-netlib/INSTALL/make.inc.SUN4SOL2 | 4 +-- lapack-netlib/INSTALL/make.inc.XLF | 4 +-- lapack-netlib/INSTALL/make.inc.gfortran | 4 +-- lapack-netlib/INSTALL/make.inc.gfortran_debug | 4 +-- lapack-netlib/INSTALL/make.inc.ifort | 4 +-- lapack-netlib/INSTALL/make.inc.pgf95 | 4 +-- lapack-netlib/INSTALL/make.inc.pghpf | 4 +-- lapack-netlib/LAPACKE/src/Makefile | 10 +++--- lapack-netlib/LAPACKE/utils/Makefile | 2 +- lapack-netlib/SRC/Makefile | 10 +++--- lapack-netlib/SRC/VARIANTS/Makefile | 12 +++---- lapack-netlib/TESTING/MATGEN/Makefile | 10 +++--- lapack-netlib/make.inc.example | 4 +-- make.inc | 2 +- 25 files changed, 79 insertions(+), 75 deletions(-) diff --git a/Makefile b/Makefile index c0e5fbcf8..aaeb0c498 100644 --- a/Makefile +++ b/Makefile @@ -237,8 +237,8 @@ ifndef NOFORTRAN -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc diff --git a/c_check b/c_check index c564855f3..f86a37b5b 100644 --- a/c_check +++ b/c_check @@ -121,6 +121,10 @@ if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) { $binary =32; } +if ($architecture eq "riscv64") { + $defined = 1; +} + if ($compiler eq "PGI") { $compiler_name .= " -tp p7" if ($binary eq "32"); $compiler_name .= " -tp p7-64" if ($binary eq "64"); diff --git a/lapack-netlib/BLAS/SRC/Makefile b/lapack-netlib/BLAS/SRC/Makefile index a436365aa..f7236318b 100644 --- a/lapack-netlib/BLAS/SRC/Makefile +++ b/lapack-netlib/BLAS/SRC/Makefile @@ -138,23 +138,23 @@ ALLOBJ = $(SBLAS1) $(SBLAS2) $(SBLAS3) $(DBLAS1) $(DBLAS2) $(DBLAS3) \ $(ZBLAS2) $(ZBLAS3) $(ALLBLAS) $(BLASLIB): $(ALLOBJ) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ + $(AR) $(ARFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ + $(AR) $(ARFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ + $(AR) $(ARFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ + $(AR) $(ARFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) FRC: diff --git a/lapack-netlib/CBLAS/src/Makefile b/lapack-netlib/CBLAS/src/Makefile index 6c0518ac7..9b9063d8d 100644 --- a/lapack-netlib/CBLAS/src/Makefile +++ b/lapack-netlib/CBLAS/src/Makefile @@ -45,22 +45,22 @@ sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o # Single precision real slib1: $(slev1) $(sclev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision real dlib1: $(dlev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Single precision complex clib1: $(clev1) $(sclev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision complex zlib1: $(zlev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # @@ -97,22 +97,22 @@ zlev2 = cblas_zgemv.o cblas_zgbmv.o cblas_zhemv.o cblas_zhbmv.o cblas_zhpmv.o \ # Single precision real slib2: $(slev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision real dlib2: $(dlev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Single precision complex clib2: $(clev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision complex zlib2: $(zlev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # @@ -143,22 +143,22 @@ zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o \ # Single precision real slib3: $(slev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision real dlib3: $(dlev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Single precision complex clib3: $(clev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision complex zlib3: $(zlev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) @@ -168,22 +168,22 @@ alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3) # All level 1 all1: $(alev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # All level 2 all2: $(alev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # All level 3 all3: $(alev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # All levels and precisions $(CBLASLIB): $(alev1) $(alev2) $(alev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ FRC: diff --git a/lapack-netlib/DOCS/lawn81.tex b/lapack-netlib/DOCS/lawn81.tex index 291735299..01c7c39e2 100644 --- a/lapack-netlib/DOCS/lawn81.tex +++ b/lapack-netlib/DOCS/lawn81.tex @@ -466,7 +466,7 @@ TIMER = EXT_ETIME Refer to the section~\ref{second} to get more information. -Next, you will need to modify \texttt{ARCH}, \texttt{ARCHFLAGS}, and \texttt{RANLIB} to specify archiver, +Next, you will need to modify \texttt{AR}, \texttt{ARFLAGS}, and \texttt{RANLIB} to specify archiver, archiver options, and ranlib for your machine. If your architecture does not require \texttt{ranlib} to be run after each archive command (as is the case with CRAY computers running UNICOS, Hewlett Packard diff --git a/lapack-netlib/INSTALL/make.inc.ALPHA b/lapack-netlib/INSTALL/make.inc.ALPHA index 0ceeaa155..049cf0b13 100644 --- a/lapack-netlib/INSTALL/make.inc.ALPHA +++ b/lapack-netlib/INSTALL/make.inc.ALPHA @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.HPPA b/lapack-netlib/INSTALL/make.inc.HPPA index 8eabbbdf4..2bd8ee16e 100644 --- a/lapack-netlib/INSTALL/make.inc.HPPA +++ b/lapack-netlib/INSTALL/make.inc.HPPA @@ -29,8 +29,8 @@ LOADOPTS = -Aa +U77 # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.IRIX64 b/lapack-netlib/INSTALL/make.inc.IRIX64 index d9e71e1bf..0f57941b5 100644 --- a/lapack-netlib/INSTALL/make.inc.IRIX64 +++ b/lapack-netlib/INSTALL/make.inc.IRIX64 @@ -32,8 +32,8 @@ LOADOPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.O2K b/lapack-netlib/INSTALL/make.inc.O2K index 3ffcadacc..d99beca41 100644 --- a/lapack-netlib/INSTALL/make.inc.O2K +++ b/lapack-netlib/INSTALL/make.inc.O2K @@ -32,8 +32,8 @@ LOADOPTS = -O3 -64 -mips4 -r10000 # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.SGI5 b/lapack-netlib/INSTALL/make.inc.SGI5 index c7019ac16..c4a702d48 100644 --- a/lapack-netlib/INSTALL/make.inc.SGI5 +++ b/lapack-netlib/INSTALL/make.inc.SGI5 @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.SUN4 b/lapack-netlib/INSTALL/make.inc.SUN4 index 4e44f1beb..6a78e9576 100644 --- a/lapack-netlib/INSTALL/make.inc.SUN4 +++ b/lapack-netlib/INSTALL/make.inc.SUN4 @@ -29,8 +29,8 @@ LOADOPTS = -dalign -O4 -fast # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.SUN4SOL2 b/lapack-netlib/INSTALL/make.inc.SUN4SOL2 index e6d79add3..0ac3cc4e4 100644 --- a/lapack-netlib/INSTALL/make.inc.SUN4SOL2 +++ b/lapack-netlib/INSTALL/make.inc.SUN4SOL2 @@ -33,8 +33,8 @@ LOADOPTS = -f -dalign -native -xO2 -xarch=v8plusa # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.XLF b/lapack-netlib/INSTALL/make.inc.XLF index 9466ee332..27e22cac9 100644 --- a/lapack-netlib/INSTALL/make.inc.XLF +++ b/lapack-netlib/INSTALL/make.inc.XLF @@ -30,8 +30,8 @@ LOADOPTS = -qnosave # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.gfortran b/lapack-netlib/INSTALL/make.inc.gfortran index 39d98d4d4..b342b18a8 100644 --- a/lapack-netlib/INSTALL/make.inc.gfortran +++ b/lapack-netlib/INSTALL/make.inc.gfortran @@ -33,8 +33,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.gfortran_debug b/lapack-netlib/INSTALL/make.inc.gfortran_debug index 10e6381df..1eaed2102 100644 --- a/lapack-netlib/INSTALL/make.inc.gfortran_debug +++ b/lapack-netlib/INSTALL/make.inc.gfortran_debug @@ -33,8 +33,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.ifort b/lapack-netlib/INSTALL/make.inc.ifort index b067bd484..a3c37428e 100644 --- a/lapack-netlib/INSTALL/make.inc.ifort +++ b/lapack-netlib/INSTALL/make.inc.ifort @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.pgf95 b/lapack-netlib/INSTALL/make.inc.pgf95 index a9a5cec98..931ff378f 100644 --- a/lapack-netlib/INSTALL/make.inc.pgf95 +++ b/lapack-netlib/INSTALL/make.inc.pgf95 @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.pghpf b/lapack-netlib/INSTALL/make.inc.pghpf index 1d9bf549c..0dfe8c683 100644 --- a/lapack-netlib/INSTALL/make.inc.pghpf +++ b/lapack-netlib/INSTALL/make.inc.pghpf @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/LAPACKE/src/Makefile b/lapack-netlib/LAPACKE/src/Makefile index 44884d4a5..03c140bf7 100644 --- a/lapack-netlib/LAPACKE/src/Makefile +++ b/lapack-netlib/LAPACKE/src/Makefile @@ -2455,16 +2455,16 @@ endif all: ../../$(LAPACKELIB) ../../$(LAPACKELIB): $(OBJ_A) $(OBJ_B) $(DEPRECATED) $(EXTENDED) $(MATGEN) - $(ARCH) $(ARCHFLAGS) $@ $(OBJ_A) - $(ARCH) $(ARCHFLAGS) $@ $(OBJ_B) + $(AR) $(ARFLAGS) $@ $(OBJ_A) + $(AR) $(ARFLAGS) $@ $(OBJ_B) ifdef BUILD_DEPRECATED - $(ARCH) $(ARCHFLAGS) $@ $(DEPRECATED) + $(AR) $(ARFLAGS) $@ $(DEPRECATED) endif ifdef (USEXBLAS) - $(ARCH) $(ARCHFLAGS) $@ $(EXTENDED) + $(AR) $(ARFLAGS) $@ $(EXTENDED) endif ifdef LAPACKE_WITH_TMG - $(ARCH) $(ARCHFLAGS) $@ $(MATGEN) + $(AR) $(ARFLAGS) $@ $(MATGEN) endif $(RANLIB) $@ diff --git a/lapack-netlib/LAPACKE/utils/Makefile b/lapack-netlib/LAPACKE/utils/Makefile index 1f639c6ea..c6204ee3b 100644 --- a/lapack-netlib/LAPACKE/utils/Makefile +++ b/lapack-netlib/LAPACKE/utils/Makefile @@ -186,7 +186,7 @@ OBJ = lapacke_cgb_nancheck.o \ all: lib lib: $(OBJ) - $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $^ + $(AR) $(ARFLAGS) ../../$(LAPACKELIB) $^ $(RANLIB) ../../$(LAPACKELIB) clean: cleanobj diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index 531cb51fc..e5bb7a3db 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -553,26 +553,26 @@ endif all: ../$(LAPACKLIB) ../$(LAPACKLIB): $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) - $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) + $(AR) $(ARFLAGS) $@ $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) $(RANLIB) $@ single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX) - $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \ + $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \ $(SXLASRC) $(SCLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) complex: $(CLASRC) $(ZCLASRC) $(CXLASRC) $(SCLAUX) $(ALLAUX) - $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(CLASRC) $(ZCLASRC) \ + $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(CLASRC) $(ZCLASRC) \ $(CXLASRC) $(SCLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) double: $(DLASRC) $(DSLASRC) $(DXLASRC) $(DZLAUX) $(ALLAUX) - $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(DLASRC) $(DSLASRC) \ + $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(DLASRC) $(DSLASRC) \ $(DXLASRC) $(DZLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) complex16: $(ZLASRC) $(ZCLASRC) $(ZXLASRC) $(DZLAUX) $(ALLAUX) - $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(ZLASRC) $(ZCLASRC) \ + $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(ZLASRC) $(ZCLASRC) \ $(ZXLASRC) $(DZLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) diff --git a/lapack-netlib/SRC/VARIANTS/Makefile b/lapack-netlib/SRC/VARIANTS/Makefile index 9f1410755..7d0e8824c 100644 --- a/lapack-netlib/SRC/VARIANTS/Makefile +++ b/lapack-netlib/SRC/VARIANTS/Makefile @@ -33,27 +33,27 @@ QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a cholrl.a: $(CHOLRL) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ choltop.a: $(CHOLTOP) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ lucr.a: $(LUCR) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ lull.a: $(LULL) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ lurec.a: $(LUREC) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ qrll.a: $(QRLL) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ clean: cleanobj cleanlib diff --git a/lapack-netlib/TESTING/MATGEN/Makefile b/lapack-netlib/TESTING/MATGEN/Makefile index e20004c2f..f5ea5a8c0 100644 --- a/lapack-netlib/TESTING/MATGEN/Makefile +++ b/lapack-netlib/TESTING/MATGEN/Makefile @@ -58,23 +58,23 @@ ALLOBJ = $(SMATGEN) $(CMATGEN) $(SCATGEN) $(DMATGEN) $(ZMATGEN) \ $(DZATGEN) ../../$(TMGLIB): $(ALLOBJ) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ single: $(SMATGEN) $(SCATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ + $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) complex: $(CMATGEN) $(SCATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ + $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) double: $(DMATGEN) $(DZATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ + $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) complex16: $(ZMATGEN) $(DZATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ + $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) $(SCATGEN): $(FRC) diff --git a/lapack-netlib/make.inc.example b/lapack-netlib/make.inc.example index d780c3a23..3ddb9eafc 100644 --- a/lapack-netlib/make.inc.example +++ b/lapack-netlib/make.inc.example @@ -33,8 +33,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/make.inc b/make.inc index b6ed098c0..93b355103 100644 --- a/make.inc +++ b/make.inc @@ -1,6 +1,6 @@ SHELL = /bin/sh PLAT = _LINUX DRVOPTS = $(NOOPT) -#ARCHFLAGS= $(ARFLAGS) -ru +#ARFLAGS= $(ARFLAGS) -ru #RANLIB = ranlib From 0b7ccb9e381d4bc3d0149c158631389c2c2d411c Mon Sep 17 00:00:00 2001 From: Dumi Loghin Date: Thu, 6 Sep 2018 13:08:30 +0800 Subject: [PATCH 04/22] Revert "replace ARCH with AR in lapack-netlib" This reverts commit db17ce896fbbf53cbef34f81e1f1ec6887965ec4. --- Makefile | 4 +-- c_check | 4 --- lapack-netlib/BLAS/SRC/Makefile | 10 +++--- lapack-netlib/CBLAS/src/Makefile | 32 +++++++++---------- lapack-netlib/DOCS/lawn81.tex | 2 +- lapack-netlib/INSTALL/make.inc.ALPHA | 4 +-- lapack-netlib/INSTALL/make.inc.HPPA | 4 +-- lapack-netlib/INSTALL/make.inc.IRIX64 | 4 +-- lapack-netlib/INSTALL/make.inc.O2K | 4 +-- lapack-netlib/INSTALL/make.inc.SGI5 | 4 +-- lapack-netlib/INSTALL/make.inc.SUN4 | 4 +-- lapack-netlib/INSTALL/make.inc.SUN4SOL2 | 4 +-- lapack-netlib/INSTALL/make.inc.XLF | 4 +-- lapack-netlib/INSTALL/make.inc.gfortran | 4 +-- lapack-netlib/INSTALL/make.inc.gfortran_debug | 4 +-- lapack-netlib/INSTALL/make.inc.ifort | 4 +-- lapack-netlib/INSTALL/make.inc.pgf95 | 4 +-- lapack-netlib/INSTALL/make.inc.pghpf | 4 +-- lapack-netlib/LAPACKE/src/Makefile | 10 +++--- lapack-netlib/LAPACKE/utils/Makefile | 2 +- lapack-netlib/SRC/Makefile | 10 +++--- lapack-netlib/SRC/VARIANTS/Makefile | 12 +++---- lapack-netlib/TESTING/MATGEN/Makefile | 10 +++--- lapack-netlib/make.inc.example | 4 +-- make.inc | 2 +- 25 files changed, 75 insertions(+), 79 deletions(-) diff --git a/Makefile b/Makefile index aaeb0c498..c0e5fbcf8 100644 --- a/Makefile +++ b/Makefile @@ -237,8 +237,8 @@ ifndef NOFORTRAN -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc diff --git a/c_check b/c_check index f86a37b5b..c564855f3 100644 --- a/c_check +++ b/c_check @@ -121,10 +121,6 @@ if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) { $binary =32; } -if ($architecture eq "riscv64") { - $defined = 1; -} - if ($compiler eq "PGI") { $compiler_name .= " -tp p7" if ($binary eq "32"); $compiler_name .= " -tp p7-64" if ($binary eq "64"); diff --git a/lapack-netlib/BLAS/SRC/Makefile b/lapack-netlib/BLAS/SRC/Makefile index f7236318b..a436365aa 100644 --- a/lapack-netlib/BLAS/SRC/Makefile +++ b/lapack-netlib/BLAS/SRC/Makefile @@ -138,23 +138,23 @@ ALLOBJ = $(SBLAS1) $(SBLAS2) $(SBLAS3) $(DBLAS1) $(DBLAS2) $(DBLAS3) \ $(ZBLAS2) $(ZBLAS3) $(ALLBLAS) $(BLASLIB): $(ALLOBJ) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3) - $(AR) $(ARFLAGS) $(BLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3) - $(AR) $(ARFLAGS) $(BLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3) - $(AR) $(ARFLAGS) $(BLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3) - $(AR) $(ARFLAGS) $(BLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) FRC: diff --git a/lapack-netlib/CBLAS/src/Makefile b/lapack-netlib/CBLAS/src/Makefile index 9b9063d8d..6c0518ac7 100644 --- a/lapack-netlib/CBLAS/src/Makefile +++ b/lapack-netlib/CBLAS/src/Makefile @@ -45,22 +45,22 @@ sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o # Single precision real slib1: $(slev1) $(sclev1) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision real dlib1: $(dlev1) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Single precision complex clib1: $(clev1) $(sclev1) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision complex zlib1: $(zlev1) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # @@ -97,22 +97,22 @@ zlev2 = cblas_zgemv.o cblas_zgbmv.o cblas_zhemv.o cblas_zhbmv.o cblas_zhpmv.o \ # Single precision real slib2: $(slev2) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision real dlib2: $(dlev2) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Single precision complex clib2: $(clev2) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision complex zlib2: $(zlev2) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # @@ -143,22 +143,22 @@ zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o \ # Single precision real slib3: $(slev3) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision real dlib3: $(dlev3) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Single precision complex clib3: $(clev3) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision complex zlib3: $(zlev3) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) @@ -168,22 +168,22 @@ alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3) # All level 1 all1: $(alev1) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # All level 2 all2: $(alev2) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # All level 3 all3: $(alev3) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # All levels and precisions $(CBLASLIB): $(alev1) $(alev2) $(alev3) $(errhand) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ FRC: diff --git a/lapack-netlib/DOCS/lawn81.tex b/lapack-netlib/DOCS/lawn81.tex index 01c7c39e2..291735299 100644 --- a/lapack-netlib/DOCS/lawn81.tex +++ b/lapack-netlib/DOCS/lawn81.tex @@ -466,7 +466,7 @@ TIMER = EXT_ETIME Refer to the section~\ref{second} to get more information. -Next, you will need to modify \texttt{AR}, \texttt{ARFLAGS}, and \texttt{RANLIB} to specify archiver, +Next, you will need to modify \texttt{ARCH}, \texttt{ARCHFLAGS}, and \texttt{RANLIB} to specify archiver, archiver options, and ranlib for your machine. If your architecture does not require \texttt{ranlib} to be run after each archive command (as is the case with CRAY computers running UNICOS, Hewlett Packard diff --git a/lapack-netlib/INSTALL/make.inc.ALPHA b/lapack-netlib/INSTALL/make.inc.ALPHA index 049cf0b13..0ceeaa155 100644 --- a/lapack-netlib/INSTALL/make.inc.ALPHA +++ b/lapack-netlib/INSTALL/make.inc.ALPHA @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.HPPA b/lapack-netlib/INSTALL/make.inc.HPPA index 2bd8ee16e..8eabbbdf4 100644 --- a/lapack-netlib/INSTALL/make.inc.HPPA +++ b/lapack-netlib/INSTALL/make.inc.HPPA @@ -29,8 +29,8 @@ LOADOPTS = -Aa +U77 # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.IRIX64 b/lapack-netlib/INSTALL/make.inc.IRIX64 index 0f57941b5..d9e71e1bf 100644 --- a/lapack-netlib/INSTALL/make.inc.IRIX64 +++ b/lapack-netlib/INSTALL/make.inc.IRIX64 @@ -32,8 +32,8 @@ LOADOPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.O2K b/lapack-netlib/INSTALL/make.inc.O2K index d99beca41..3ffcadacc 100644 --- a/lapack-netlib/INSTALL/make.inc.O2K +++ b/lapack-netlib/INSTALL/make.inc.O2K @@ -32,8 +32,8 @@ LOADOPTS = -O3 -64 -mips4 -r10000 # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.SGI5 b/lapack-netlib/INSTALL/make.inc.SGI5 index c4a702d48..c7019ac16 100644 --- a/lapack-netlib/INSTALL/make.inc.SGI5 +++ b/lapack-netlib/INSTALL/make.inc.SGI5 @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.SUN4 b/lapack-netlib/INSTALL/make.inc.SUN4 index 6a78e9576..4e44f1beb 100644 --- a/lapack-netlib/INSTALL/make.inc.SUN4 +++ b/lapack-netlib/INSTALL/make.inc.SUN4 @@ -29,8 +29,8 @@ LOADOPTS = -dalign -O4 -fast # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.SUN4SOL2 b/lapack-netlib/INSTALL/make.inc.SUN4SOL2 index 0ac3cc4e4..e6d79add3 100644 --- a/lapack-netlib/INSTALL/make.inc.SUN4SOL2 +++ b/lapack-netlib/INSTALL/make.inc.SUN4SOL2 @@ -33,8 +33,8 @@ LOADOPTS = -f -dalign -native -xO2 -xarch=v8plusa # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.XLF b/lapack-netlib/INSTALL/make.inc.XLF index 27e22cac9..9466ee332 100644 --- a/lapack-netlib/INSTALL/make.inc.XLF +++ b/lapack-netlib/INSTALL/make.inc.XLF @@ -30,8 +30,8 @@ LOADOPTS = -qnosave # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.gfortran b/lapack-netlib/INSTALL/make.inc.gfortran index b342b18a8..39d98d4d4 100644 --- a/lapack-netlib/INSTALL/make.inc.gfortran +++ b/lapack-netlib/INSTALL/make.inc.gfortran @@ -33,8 +33,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.gfortran_debug b/lapack-netlib/INSTALL/make.inc.gfortran_debug index 1eaed2102..10e6381df 100644 --- a/lapack-netlib/INSTALL/make.inc.gfortran_debug +++ b/lapack-netlib/INSTALL/make.inc.gfortran_debug @@ -33,8 +33,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.ifort b/lapack-netlib/INSTALL/make.inc.ifort index a3c37428e..b067bd484 100644 --- a/lapack-netlib/INSTALL/make.inc.ifort +++ b/lapack-netlib/INSTALL/make.inc.ifort @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.pgf95 b/lapack-netlib/INSTALL/make.inc.pgf95 index 931ff378f..a9a5cec98 100644 --- a/lapack-netlib/INSTALL/make.inc.pgf95 +++ b/lapack-netlib/INSTALL/make.inc.pgf95 @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.pghpf b/lapack-netlib/INSTALL/make.inc.pghpf index 0dfe8c683..1d9bf549c 100644 --- a/lapack-netlib/INSTALL/make.inc.pghpf +++ b/lapack-netlib/INSTALL/make.inc.pghpf @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/LAPACKE/src/Makefile b/lapack-netlib/LAPACKE/src/Makefile index 03c140bf7..44884d4a5 100644 --- a/lapack-netlib/LAPACKE/src/Makefile +++ b/lapack-netlib/LAPACKE/src/Makefile @@ -2455,16 +2455,16 @@ endif all: ../../$(LAPACKELIB) ../../$(LAPACKELIB): $(OBJ_A) $(OBJ_B) $(DEPRECATED) $(EXTENDED) $(MATGEN) - $(AR) $(ARFLAGS) $@ $(OBJ_A) - $(AR) $(ARFLAGS) $@ $(OBJ_B) + $(ARCH) $(ARCHFLAGS) $@ $(OBJ_A) + $(ARCH) $(ARCHFLAGS) $@ $(OBJ_B) ifdef BUILD_DEPRECATED - $(AR) $(ARFLAGS) $@ $(DEPRECATED) + $(ARCH) $(ARCHFLAGS) $@ $(DEPRECATED) endif ifdef (USEXBLAS) - $(AR) $(ARFLAGS) $@ $(EXTENDED) + $(ARCH) $(ARCHFLAGS) $@ $(EXTENDED) endif ifdef LAPACKE_WITH_TMG - $(AR) $(ARFLAGS) $@ $(MATGEN) + $(ARCH) $(ARCHFLAGS) $@ $(MATGEN) endif $(RANLIB) $@ diff --git a/lapack-netlib/LAPACKE/utils/Makefile b/lapack-netlib/LAPACKE/utils/Makefile index c6204ee3b..1f639c6ea 100644 --- a/lapack-netlib/LAPACKE/utils/Makefile +++ b/lapack-netlib/LAPACKE/utils/Makefile @@ -186,7 +186,7 @@ OBJ = lapacke_cgb_nancheck.o \ all: lib lib: $(OBJ) - $(AR) $(ARFLAGS) ../../$(LAPACKELIB) $^ + $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $^ $(RANLIB) ../../$(LAPACKELIB) clean: cleanobj diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index e5bb7a3db..531cb51fc 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -553,26 +553,26 @@ endif all: ../$(LAPACKLIB) ../$(LAPACKLIB): $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) - $(AR) $(ARFLAGS) $@ $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) + $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) $(RANLIB) $@ single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX) - $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \ + $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \ $(SXLASRC) $(SCLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) complex: $(CLASRC) $(ZCLASRC) $(CXLASRC) $(SCLAUX) $(ALLAUX) - $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(CLASRC) $(ZCLASRC) \ + $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(CLASRC) $(ZCLASRC) \ $(CXLASRC) $(SCLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) double: $(DLASRC) $(DSLASRC) $(DXLASRC) $(DZLAUX) $(ALLAUX) - $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(DLASRC) $(DSLASRC) \ + $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(DLASRC) $(DSLASRC) \ $(DXLASRC) $(DZLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) complex16: $(ZLASRC) $(ZCLASRC) $(ZXLASRC) $(DZLAUX) $(ALLAUX) - $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(ZLASRC) $(ZCLASRC) \ + $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(ZLASRC) $(ZCLASRC) \ $(ZXLASRC) $(DZLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) diff --git a/lapack-netlib/SRC/VARIANTS/Makefile b/lapack-netlib/SRC/VARIANTS/Makefile index 7d0e8824c..9f1410755 100644 --- a/lapack-netlib/SRC/VARIANTS/Makefile +++ b/lapack-netlib/SRC/VARIANTS/Makefile @@ -33,27 +33,27 @@ QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a cholrl.a: $(CHOLRL) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ choltop.a: $(CHOLTOP) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ lucr.a: $(LUCR) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ lull.a: $(LULL) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ lurec.a: $(LUREC) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ qrll.a: $(QRLL) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ clean: cleanobj cleanlib diff --git a/lapack-netlib/TESTING/MATGEN/Makefile b/lapack-netlib/TESTING/MATGEN/Makefile index f5ea5a8c0..e20004c2f 100644 --- a/lapack-netlib/TESTING/MATGEN/Makefile +++ b/lapack-netlib/TESTING/MATGEN/Makefile @@ -58,23 +58,23 @@ ALLOBJ = $(SMATGEN) $(CMATGEN) $(SCATGEN) $(DMATGEN) $(ZMATGEN) \ $(DZATGEN) ../../$(TMGLIB): $(ALLOBJ) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ single: $(SMATGEN) $(SCATGEN) - $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ + $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) complex: $(CMATGEN) $(SCATGEN) - $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ + $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) double: $(DMATGEN) $(DZATGEN) - $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ + $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) complex16: $(ZMATGEN) $(DZATGEN) - $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ + $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) $(SCATGEN): $(FRC) diff --git a/lapack-netlib/make.inc.example b/lapack-netlib/make.inc.example index 3ddb9eafc..d780c3a23 100644 --- a/lapack-netlib/make.inc.example +++ b/lapack-netlib/make.inc.example @@ -33,8 +33,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/make.inc b/make.inc index 93b355103..b6ed098c0 100644 --- a/make.inc +++ b/make.inc @@ -1,6 +1,6 @@ SHELL = /bin/sh PLAT = _LINUX DRVOPTS = $(NOOPT) -#ARFLAGS= $(ARFLAGS) -ru +#ARCHFLAGS= $(ARFLAGS) -ru #RANLIB = ranlib From a1bdc308b8d4dcb924f339ca5018c12a455d2652 Mon Sep 17 00:00:00 2001 From: Dumi Loghin Date: Thu, 6 Sep 2018 13:13:36 +0800 Subject: [PATCH 05/22] override ARCH (archiver) in lapack-netlib/make.inc --- Makefile | 2 +- c_check | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c0e5fbcf8..547feb0d2 100644 --- a/Makefile +++ b/Makefile @@ -237,7 +237,7 @@ ifndef NOFORTRAN -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "override ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc diff --git a/c_check b/c_check index c564855f3..eb302b71a 100644 --- a/c_check +++ b/c_check @@ -121,6 +121,11 @@ if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) { $binary =32; } +if ($architecture eq "riscv64") { + $defined = 1; + $binary = 64; +} + if ($compiler eq "PGI") { $compiler_name .= " -tp p7" if ($binary eq "32"); $compiler_name .= " -tp p7-64" if ($binary eq "64"); From 44020a42a453579740fd16cd23e76f4267c41b65 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Thu, 27 Feb 2020 14:29:42 +0800 Subject: [PATCH 06/22] Fixed compile bug for RV64. --- kernel/riscv64/KERNEL | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/riscv64/KERNEL b/kernel/riscv64/KERNEL index 04d82b4ce..ea6a8cf21 100644 --- a/kernel/riscv64/KERNEL +++ b/kernel/riscv64/KERNEL @@ -35,6 +35,11 @@ DASUMKERNEL = ../riscv64/asum.c CASUMKERNEL = ../riscv64/zasum.c ZASUMKERNEL = ../riscv64/zasum.c +SSUMKERNEL = ../arm/sum.c +DSUMKERNEL = ../arm/sum.c +CSUMKERNEL = ../arm/zsum.c +ZSUMKERNEL = ../arm/zsum.c + SAXPYKERNEL = ../riscv64/axpy.c DAXPYKERNEL = ../riscv64/axpy.c CAXPYKERNEL = ../riscv64/zaxpy.c From 265ab484c89d10dfff2d5df678221918d7a880e3 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Thu, 27 Feb 2020 14:46:15 +0800 Subject: [PATCH 07/22] Change default RISC-V 64-bit corename to RISCV64_GENERIC e.g. make CC=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran TARGET=RISCV64_GENERIC HOSTCC=gcc --- TargetList.txt | 3 + getarch.c | 10 +- kernel/riscv64/KERNEL | 162 +++---------------------- kernel/riscv64/KERNEL.RISCV64_GENERIC | 164 ++++++++++++++++++++++++++ param.h | 2 +- 5 files changed, 187 insertions(+), 154 deletions(-) create mode 100644 kernel/riscv64/KERNEL.RISCV64_GENERIC diff --git a/TargetList.txt b/TargetList.txt index 6a57bf1af..3b018e17a 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -97,3 +97,6 @@ TSV110 ZARCH_GENERIC Z13 Z14 + +10.RISC-V 64: +RISCV64_GENERIC diff --git a/getarch.c b/getarch.c index d0d260577..58706c452 100644 --- a/getarch.c +++ b/getarch.c @@ -906,17 +906,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #endif -#ifdef FORCE_RISCV64 +#ifdef FORCE_RISCV64_GENERIC #define FORCE #define ARCHITECTURE "RISCV64" -#define SUBARCHITECTURE "RISCV64" +#define SUBARCHITECTURE "RISCV64_GENERIC" #define SUBDIRNAME "riscv64" -#define ARCHCONFIG "-DRISCV64 " \ +#define ARCHCONFIG "-DRISCV64_GENERIC " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " -#define LIBNAME "riscv64" -#define CORENAME "RISCV64" +#define LIBNAME "riscv64_generic" +#define CORENAME "RISCV64_GENERIC" #else #endif diff --git a/kernel/riscv64/KERNEL b/kernel/riscv64/KERNEL index ea6a8cf21..68d68b5f8 100644 --- a/kernel/riscv64/KERNEL +++ b/kernel/riscv64/KERNEL @@ -1,154 +1,18 @@ -SAMAXKERNEL = ../riscv64/amax.c -DAMAXKERNEL = ../riscv64/amax.c -CAMAXKERNEL = ../riscv64/zamax.c -ZAMAXKERNEL = ../riscv64/zamax.c - -SAMINKERNEL = ../riscv64/amin.c -DAMINKERNEL = ../riscv64/amin.c -CAMINKERNEL = ../riscv64/zamin.c -ZAMINKERNEL = ../riscv64/zamin.c - -SMAXKERNEL = ../riscv64/max.c -DMAXKERNEL = ../riscv64/max.c - -SMINKERNEL = ../riscv64/min.c -DMINKERNEL = ../riscv64/min.c - -ISAMAXKERNEL = ../riscv64/iamax.c -IDAMAXKERNEL = ../riscv64/iamax.c -ICAMAXKERNEL = ../riscv64/izamax.c -IZAMAXKERNEL = ../riscv64/izamax.c - -ISAMINKERNEL = ../riscv64/iamin.c -IDAMINKERNEL = ../riscv64/iamin.c -ICAMINKERNEL = ../riscv64/izamin.c -IZAMINKERNEL = ../riscv64/izamin.c - -ISMAXKERNEL = ../riscv64/imax.c -IDMAXKERNEL = ../riscv64/imax.c - -ISMINKERNEL = ../riscv64/imin.c -IDMINKERNEL = ../riscv64/imin.c - -SASUMKERNEL = ../riscv64/asum.c -DASUMKERNEL = ../riscv64/asum.c -CASUMKERNEL = ../riscv64/zasum.c -ZASUMKERNEL = ../riscv64/zasum.c - -SSUMKERNEL = ../arm/sum.c -DSUMKERNEL = ../arm/sum.c -CSUMKERNEL = ../arm/zsum.c -ZSUMKERNEL = ../arm/zsum.c - -SAXPYKERNEL = ../riscv64/axpy.c -DAXPYKERNEL = ../riscv64/axpy.c -CAXPYKERNEL = ../riscv64/zaxpy.c -ZAXPYKERNEL = ../riscv64/zaxpy.c - -SCOPYKERNEL = ../riscv64/copy.c -DCOPYKERNEL = ../riscv64/copy.c -CCOPYKERNEL = ../riscv64/zcopy.c -ZCOPYKERNEL = ../riscv64/zcopy.c - -SDOTKERNEL = ../riscv64/dot.c -DDOTKERNEL = ../riscv64/dot.c -CDOTKERNEL = ../riscv64/zdot.c -ZDOTKERNEL = ../riscv64/zdot.c - -SNRM2KERNEL = ../riscv64/nrm2.c -DNRM2KERNEL = ../riscv64/nrm2.c -CNRM2KERNEL = ../riscv64/znrm2.c -ZNRM2KERNEL = ../riscv64/znrm2.c - -SROTKERNEL = ../riscv64/rot.c -DROTKERNEL = ../riscv64/rot.c -CROTKERNEL = ../riscv64/zrot.c -ZROTKERNEL = ../riscv64/zrot.c - -SSCALKERNEL = ../riscv64/scal.c -DSCALKERNEL = ../riscv64/scal.c -CSCALKERNEL = ../riscv64/zscal.c -ZSCALKERNEL = ../riscv64/zscal.c - -SSWAPKERNEL = ../riscv64/swap.c -DSWAPKERNEL = ../riscv64/swap.c -CSWAPKERNEL = ../riscv64/zswap.c -ZSWAPKERNEL = ../riscv64/zswap.c - -SGEMVNKERNEL = ../riscv64/gemv_n.c -DGEMVNKERNEL = ../riscv64/gemv_n.c -CGEMVNKERNEL = ../riscv64/zgemv_n.c -ZGEMVNKERNEL = ../riscv64/zgemv_n.c - -SGEMVTKERNEL = ../riscv64/gemv_t.c -DGEMVTKERNEL = ../riscv64/gemv_t.c -CGEMVTKERNEL = ../riscv64/zgemv_t.c -ZGEMVTKERNEL = ../riscv64/zgemv_t.c - -STRMMKERNEL = ../generic/trmmkernel_2x2.c -DTRMMKERNEL = ../generic/trmmkernel_2x2.c -CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c -ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c - -SGEMMKERNEL = ../generic/gemmkernel_2x2.c -SGEMMONCOPY = ../generic/gemm_ncopy_2.c -SGEMMOTCOPY = ../generic/gemm_tcopy_2.c -SGEMMONCOPYOBJ = sgemm_oncopy.o -SGEMMOTCOPYOBJ = sgemm_otcopy.o - -DGEMMKERNEL = ../generic/gemmkernel_2x2.c -DGEMMONCOPY = ../generic/gemm_ncopy_2.c -DGEMMOTCOPY = ../generic/gemm_tcopy_2.c -DGEMMONCOPYOBJ = dgemm_oncopy.o -DGEMMOTCOPYOBJ = dgemm_otcopy.o - -CGEMMKERNEL = ../generic/zgemmkernel_2x2.c -CGEMMONCOPY = ../generic/zgemm_ncopy_2.c -CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c -CGEMMONCOPYOBJ = cgemm_oncopy.o -CGEMMOTCOPYOBJ = cgemm_otcopy.o - -ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c -ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c -ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c -ZGEMMONCOPYOBJ = zgemm_oncopy.o -ZGEMMOTCOPYOBJ = zgemm_otcopy.o - -STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c - -DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c - -CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c - -ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c - -SSYMV_U_KERNEL = ../generic/symv_k.c -SSYMV_L_KERNEL = ../generic/symv_k.c -DSYMV_U_KERNEL = ../generic/symv_k.c -DSYMV_L_KERNEL = ../generic/symv_k.c -CSYMV_U_KERNEL = ../generic/zsymv_k.c -CSYMV_L_KERNEL = ../generic/zsymv_k.c -ZSYMV_U_KERNEL = ../generic/zsymv_k.c -ZSYMV_L_KERNEL = ../generic/zsymv_k.c - - -LSAME_KERNEL = ../generic/lsame.c - +ifndef SCABS_KERNEL SCABS_KERNEL = ../generic/cabs.c +endif + +ifndef DCABS_KERNEL DCABS_KERNEL = ../generic/cabs.c +endif + +ifndef QCABS_KERNEL QCABS_KERNEL = ../generic/cabs.c +endif + +ifndef LSAME_KERNEL +LSAME_KERNEL = ../generic/lsame.c +endif ifndef SGEMM_BETA SGEMM_BETA = ../generic/gemm_beta.c @@ -162,3 +26,5 @@ endif ifndef ZGEMM_BETA ZGEMM_BETA = ../generic/zgemm_beta.c endif + + diff --git a/kernel/riscv64/KERNEL.RISCV64_GENERIC b/kernel/riscv64/KERNEL.RISCV64_GENERIC new file mode 100644 index 000000000..ea6a8cf21 --- /dev/null +++ b/kernel/riscv64/KERNEL.RISCV64_GENERIC @@ -0,0 +1,164 @@ +SAMAXKERNEL = ../riscv64/amax.c +DAMAXKERNEL = ../riscv64/amax.c +CAMAXKERNEL = ../riscv64/zamax.c +ZAMAXKERNEL = ../riscv64/zamax.c + +SAMINKERNEL = ../riscv64/amin.c +DAMINKERNEL = ../riscv64/amin.c +CAMINKERNEL = ../riscv64/zamin.c +ZAMINKERNEL = ../riscv64/zamin.c + +SMAXKERNEL = ../riscv64/max.c +DMAXKERNEL = ../riscv64/max.c + +SMINKERNEL = ../riscv64/min.c +DMINKERNEL = ../riscv64/min.c + +ISAMAXKERNEL = ../riscv64/iamax.c +IDAMAXKERNEL = ../riscv64/iamax.c +ICAMAXKERNEL = ../riscv64/izamax.c +IZAMAXKERNEL = ../riscv64/izamax.c + +ISAMINKERNEL = ../riscv64/iamin.c +IDAMINKERNEL = ../riscv64/iamin.c +ICAMINKERNEL = ../riscv64/izamin.c +IZAMINKERNEL = ../riscv64/izamin.c + +ISMAXKERNEL = ../riscv64/imax.c +IDMAXKERNEL = ../riscv64/imax.c + +ISMINKERNEL = ../riscv64/imin.c +IDMINKERNEL = ../riscv64/imin.c + +SASUMKERNEL = ../riscv64/asum.c +DASUMKERNEL = ../riscv64/asum.c +CASUMKERNEL = ../riscv64/zasum.c +ZASUMKERNEL = ../riscv64/zasum.c + +SSUMKERNEL = ../arm/sum.c +DSUMKERNEL = ../arm/sum.c +CSUMKERNEL = ../arm/zsum.c +ZSUMKERNEL = ../arm/zsum.c + +SAXPYKERNEL = ../riscv64/axpy.c +DAXPYKERNEL = ../riscv64/axpy.c +CAXPYKERNEL = ../riscv64/zaxpy.c +ZAXPYKERNEL = ../riscv64/zaxpy.c + +SCOPYKERNEL = ../riscv64/copy.c +DCOPYKERNEL = ../riscv64/copy.c +CCOPYKERNEL = ../riscv64/zcopy.c +ZCOPYKERNEL = ../riscv64/zcopy.c + +SDOTKERNEL = ../riscv64/dot.c +DDOTKERNEL = ../riscv64/dot.c +CDOTKERNEL = ../riscv64/zdot.c +ZDOTKERNEL = ../riscv64/zdot.c + +SNRM2KERNEL = ../riscv64/nrm2.c +DNRM2KERNEL = ../riscv64/nrm2.c +CNRM2KERNEL = ../riscv64/znrm2.c +ZNRM2KERNEL = ../riscv64/znrm2.c + +SROTKERNEL = ../riscv64/rot.c +DROTKERNEL = ../riscv64/rot.c +CROTKERNEL = ../riscv64/zrot.c +ZROTKERNEL = ../riscv64/zrot.c + +SSCALKERNEL = ../riscv64/scal.c +DSCALKERNEL = ../riscv64/scal.c +CSCALKERNEL = ../riscv64/zscal.c +ZSCALKERNEL = ../riscv64/zscal.c + +SSWAPKERNEL = ../riscv64/swap.c +DSWAPKERNEL = ../riscv64/swap.c +CSWAPKERNEL = ../riscv64/zswap.c +ZSWAPKERNEL = ../riscv64/zswap.c + +SGEMVNKERNEL = ../riscv64/gemv_n.c +DGEMVNKERNEL = ../riscv64/gemv_n.c +CGEMVNKERNEL = ../riscv64/zgemv_n.c +ZGEMVNKERNEL = ../riscv64/zgemv_n.c + +SGEMVTKERNEL = ../riscv64/gemv_t.c +DGEMVTKERNEL = ../riscv64/gemv_t.c +CGEMVTKERNEL = ../riscv64/zgemv_t.c +ZGEMVTKERNEL = ../riscv64/zgemv_t.c + +STRMMKERNEL = ../generic/trmmkernel_2x2.c +DTRMMKERNEL = ../generic/trmmkernel_2x2.c +CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c + +SGEMMKERNEL = ../generic/gemmkernel_2x2.c +SGEMMONCOPY = ../generic/gemm_ncopy_2.c +SGEMMOTCOPY = ../generic/gemm_tcopy_2.c +SGEMMONCOPYOBJ = sgemm_oncopy.o +SGEMMOTCOPYOBJ = sgemm_otcopy.o + +DGEMMKERNEL = ../generic/gemmkernel_2x2.c +DGEMMONCOPY = ../generic/gemm_ncopy_2.c +DGEMMOTCOPY = ../generic/gemm_tcopy_2.c +DGEMMONCOPYOBJ = dgemm_oncopy.o +DGEMMOTCOPYOBJ = dgemm_otcopy.o + +CGEMMKERNEL = ../generic/zgemmkernel_2x2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPYOBJ = cgemm_oncopy.o +CGEMMOTCOPYOBJ = cgemm_otcopy.o + +ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPYOBJ = zgemm_oncopy.o +ZGEMMOTCOPYOBJ = zgemm_otcopy.o + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +SSYMV_U_KERNEL = ../generic/symv_k.c +SSYMV_L_KERNEL = ../generic/symv_k.c +DSYMV_U_KERNEL = ../generic/symv_k.c +DSYMV_L_KERNEL = ../generic/symv_k.c +CSYMV_U_KERNEL = ../generic/zsymv_k.c +CSYMV_L_KERNEL = ../generic/zsymv_k.c +ZSYMV_U_KERNEL = ../generic/zsymv_k.c +ZSYMV_L_KERNEL = ../generic/zsymv_k.c + + +LSAME_KERNEL = ../generic/lsame.c + +SCABS_KERNEL = ../generic/cabs.c +DCABS_KERNEL = ../generic/cabs.c +QCABS_KERNEL = ../generic/cabs.c + +ifndef SGEMM_BETA +SGEMM_BETA = ../generic/gemm_beta.c +endif +ifndef DGEMM_BETA +DGEMM_BETA = ../generic/gemm_beta.c +endif +ifndef CGEMM_BETA +CGEMM_BETA = ../generic/zgemm_beta.c +endif +ifndef ZGEMM_BETA +ZGEMM_BETA = ../generic/zgemm_beta.c +endif diff --git a/param.h b/param.h index d42724a57..4a7765012 100644 --- a/param.h +++ b/param.h @@ -2509,7 +2509,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif -#ifdef RISCV64 +#ifdef RISCV64_GENERIC #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_ALIGN 0x03fffUL From ef8e7d0279dfd1f9d9bec32b514a853d10bfdda7 Mon Sep 17 00:00:00 2001 From: damonyu Date: Thu, 15 Oct 2020 16:05:37 +0800 Subject: [PATCH 08/22] Add the support for RISC-V Vector. Change-Id: Iae7800a32f5af3903c330882cdf6f292d885f266 --- Makefile.prebuild | 4 + Makefile.riscv64 | 4 + Makefile.system | 5 +- TargetList.txt | 3 + c_check | 6 + common.h | 4 + common_riscv64.h | 98 + cpuid_riscv64.c | 113 ++ ctest.c | 4 + getarch.c | 33 + kernel/Makefile.L3 | 4 + kernel/generic/trmmkernel_16x4.c | 2092 ++++++++++++++++++++ kernel/generic/trmmkernel_8x4.c | 1317 +++++++++++++ kernel/generic/trmmkernel_8x8.c | 2207 ++++++++++++++++++++++ kernel/riscv64/KERNEL | 30 + kernel/riscv64/KERNEL.C910V | 190 ++ kernel/riscv64/KERNEL.RISCV64_GENERIC | 164 ++ kernel/riscv64/amax.c | 75 + kernel/riscv64/amax_vector.c | 245 +++ kernel/riscv64/amin.c | 75 + kernel/riscv64/amin_vector.c | 241 +++ kernel/riscv64/asum.c | 67 + kernel/riscv64/asum_vector.c | 131 ++ kernel/riscv64/axpby.c | 96 + kernel/riscv64/axpby_vector.c | 378 ++++ kernel/riscv64/axpy.c | 64 + kernel/riscv64/axpy_vector.c | 179 ++ kernel/riscv64/copy.c | 59 + kernel/riscv64/copy_vector.c | 148 ++ kernel/riscv64/dgemm_kernel_8x4_c910v.c | 977 ++++++++++ kernel/riscv64/dot.c | 64 + kernel/riscv64/dot_vector.c | 172 ++ kernel/riscv64/gemv_n.c | 67 + kernel/riscv64/gemv_n_vector.c | 146 ++ kernel/riscv64/gemv_t.c | 68 + kernel/riscv64/gemv_t_vector.c | 126 ++ kernel/riscv64/iamax.c | 77 + kernel/riscv64/iamax_vector.c | 191 ++ kernel/riscv64/iamin.c | 77 + kernel/riscv64/iamin_vector.c | 192 ++ kernel/riscv64/imax.c | 69 + kernel/riscv64/imax_vector.c | 176 ++ kernel/riscv64/imin.c | 67 + kernel/riscv64/imin_vector.c | 212 +++ kernel/riscv64/izamax.c | 81 + kernel/riscv64/izamax_vector.c | 246 +++ kernel/riscv64/izamin.c | 81 + kernel/riscv64/izamin_vector.c | 247 +++ kernel/riscv64/max.c | 65 + kernel/riscv64/max_vector.c | 116 ++ kernel/riscv64/min.c | 65 + kernel/riscv64/min_vector.c | 116 ++ kernel/riscv64/nrm2.c | 88 + kernel/riscv64/nrm2_vector.c | 220 +++ kernel/riscv64/nrm2_vector_dot.c | 128 ++ kernel/riscv64/omatcopy_cn.c | 90 + kernel/riscv64/omatcopy_ct.c | 89 + kernel/riscv64/omatcopy_rn.c | 90 + kernel/riscv64/omatcopy_rt.c | 62 + kernel/riscv64/rot.c | 62 + kernel/riscv64/rot_vector.c | 196 ++ kernel/riscv64/scal.c | 63 + kernel/riscv64/scal_vector.c | 133 ++ kernel/riscv64/sgemm_kernel_16x4_c910v.c | 1575 +++++++++++++++ kernel/riscv64/swap.c | 62 + kernel/riscv64/swap_vector.c | 173 ++ kernel/riscv64/symv_L.c | 70 + kernel/riscv64/symv_L_vector.c | 265 +++ kernel/riscv64/symv_U.c | 71 + kernel/riscv64/symv_U_vector.c | 264 +++ kernel/riscv64/zamax.c | 79 + kernel/riscv64/zamax_vector.c | 104 + kernel/riscv64/zamin.c | 79 + kernel/riscv64/zamin_vector.c | 104 + kernel/riscv64/zasum.c | 72 + kernel/riscv64/zasum_vector.c | 136 ++ kernel/riscv64/zaxpby.c | 118 ++ kernel/riscv64/zaxpby_vector.c | 197 ++ kernel/riscv64/zaxpy.c | 74 + kernel/riscv64/zaxpy_vector.c | 107 ++ kernel/riscv64/zcopy.c | 65 + kernel/riscv64/zcopy_vector.c | 92 + kernel/riscv64/zdot.c | 80 + kernel/riscv64/zdot_vector.c | 135 ++ kernel/riscv64/zgemv_n.c | 157 ++ kernel/riscv64/zgemv_n_vector.c | 175 ++ kernel/riscv64/zgemv_t.c | 140 ++ kernel/riscv64/zgemv_t_vector.c | 134 ++ kernel/riscv64/zhemv_LM_vector.c | 191 ++ kernel/riscv64/zhemv_UV_vector.c | 192 ++ kernel/riscv64/znrm2.c | 106 ++ kernel/riscv64/znrm2_vector.c | 278 +++ kernel/riscv64/zomatcopy_cn.c | 70 + kernel/riscv64/zomatcopy_cnc.c | 69 + kernel/riscv64/zomatcopy_ct.c | 71 + kernel/riscv64/zomatcopy_ctc.c | 71 + kernel/riscv64/zomatcopy_rn.c | 70 + kernel/riscv64/zomatcopy_rnc.c | 69 + kernel/riscv64/zomatcopy_rt.c | 72 + kernel/riscv64/zomatcopy_rtc.c | 72 + kernel/riscv64/zrot.c | 70 + kernel/riscv64/zrot_vector.c | 162 ++ kernel/riscv64/zscal.c | 88 + kernel/riscv64/zscal_vector.c | 152 ++ kernel/riscv64/zswap.c | 72 + kernel/riscv64/zswap_vector.c | 117 ++ lapack/laswp/riscv64/Makefile | 13 + param.h | 78 + test/Makefile | 6 + 109 files changed, 19571 insertions(+), 1 deletion(-) create mode 100644 Makefile.riscv64 create mode 100644 common_riscv64.h create mode 100644 cpuid_riscv64.c create mode 100644 kernel/generic/trmmkernel_16x4.c create mode 100644 kernel/generic/trmmkernel_8x4.c create mode 100644 kernel/generic/trmmkernel_8x8.c create mode 100644 kernel/riscv64/KERNEL create mode 100644 kernel/riscv64/KERNEL.C910V create mode 100644 kernel/riscv64/KERNEL.RISCV64_GENERIC create mode 100644 kernel/riscv64/amax.c create mode 100644 kernel/riscv64/amax_vector.c create mode 100644 kernel/riscv64/amin.c create mode 100644 kernel/riscv64/amin_vector.c create mode 100644 kernel/riscv64/asum.c create mode 100644 kernel/riscv64/asum_vector.c create mode 100644 kernel/riscv64/axpby.c create mode 100644 kernel/riscv64/axpby_vector.c create mode 100644 kernel/riscv64/axpy.c create mode 100644 kernel/riscv64/axpy_vector.c create mode 100644 kernel/riscv64/copy.c create mode 100644 kernel/riscv64/copy_vector.c create mode 100644 kernel/riscv64/dgemm_kernel_8x4_c910v.c create mode 100644 kernel/riscv64/dot.c create mode 100644 kernel/riscv64/dot_vector.c create mode 100644 kernel/riscv64/gemv_n.c create mode 100644 kernel/riscv64/gemv_n_vector.c create mode 100644 kernel/riscv64/gemv_t.c create mode 100644 kernel/riscv64/gemv_t_vector.c create mode 100644 kernel/riscv64/iamax.c create mode 100644 kernel/riscv64/iamax_vector.c create mode 100644 kernel/riscv64/iamin.c create mode 100644 kernel/riscv64/iamin_vector.c create mode 100644 kernel/riscv64/imax.c create mode 100644 kernel/riscv64/imax_vector.c create mode 100644 kernel/riscv64/imin.c create mode 100644 kernel/riscv64/imin_vector.c create mode 100644 kernel/riscv64/izamax.c create mode 100644 kernel/riscv64/izamax_vector.c create mode 100644 kernel/riscv64/izamin.c create mode 100644 kernel/riscv64/izamin_vector.c create mode 100644 kernel/riscv64/max.c create mode 100644 kernel/riscv64/max_vector.c create mode 100644 kernel/riscv64/min.c create mode 100644 kernel/riscv64/min_vector.c create mode 100644 kernel/riscv64/nrm2.c create mode 100644 kernel/riscv64/nrm2_vector.c create mode 100644 kernel/riscv64/nrm2_vector_dot.c create mode 100644 kernel/riscv64/omatcopy_cn.c create mode 100644 kernel/riscv64/omatcopy_ct.c create mode 100644 kernel/riscv64/omatcopy_rn.c create mode 100644 kernel/riscv64/omatcopy_rt.c create mode 100644 kernel/riscv64/rot.c create mode 100644 kernel/riscv64/rot_vector.c create mode 100644 kernel/riscv64/scal.c create mode 100644 kernel/riscv64/scal_vector.c create mode 100644 kernel/riscv64/sgemm_kernel_16x4_c910v.c create mode 100644 kernel/riscv64/swap.c create mode 100644 kernel/riscv64/swap_vector.c create mode 100644 kernel/riscv64/symv_L.c create mode 100644 kernel/riscv64/symv_L_vector.c create mode 100644 kernel/riscv64/symv_U.c create mode 100644 kernel/riscv64/symv_U_vector.c create mode 100644 kernel/riscv64/zamax.c create mode 100644 kernel/riscv64/zamax_vector.c create mode 100644 kernel/riscv64/zamin.c create mode 100644 kernel/riscv64/zamin_vector.c create mode 100644 kernel/riscv64/zasum.c create mode 100644 kernel/riscv64/zasum_vector.c create mode 100644 kernel/riscv64/zaxpby.c create mode 100644 kernel/riscv64/zaxpby_vector.c create mode 100644 kernel/riscv64/zaxpy.c create mode 100644 kernel/riscv64/zaxpy_vector.c create mode 100644 kernel/riscv64/zcopy.c create mode 100644 kernel/riscv64/zcopy_vector.c create mode 100644 kernel/riscv64/zdot.c create mode 100644 kernel/riscv64/zdot_vector.c create mode 100644 kernel/riscv64/zgemv_n.c create mode 100644 kernel/riscv64/zgemv_n_vector.c create mode 100644 kernel/riscv64/zgemv_t.c create mode 100644 kernel/riscv64/zgemv_t_vector.c create mode 100644 kernel/riscv64/zhemv_LM_vector.c create mode 100644 kernel/riscv64/zhemv_UV_vector.c create mode 100644 kernel/riscv64/znrm2.c create mode 100644 kernel/riscv64/znrm2_vector.c create mode 100644 kernel/riscv64/zomatcopy_cn.c create mode 100644 kernel/riscv64/zomatcopy_cnc.c create mode 100644 kernel/riscv64/zomatcopy_ct.c create mode 100644 kernel/riscv64/zomatcopy_ctc.c create mode 100644 kernel/riscv64/zomatcopy_rn.c create mode 100644 kernel/riscv64/zomatcopy_rnc.c create mode 100644 kernel/riscv64/zomatcopy_rt.c create mode 100644 kernel/riscv64/zomatcopy_rtc.c create mode 100644 kernel/riscv64/zrot.c create mode 100644 kernel/riscv64/zrot_vector.c create mode 100644 kernel/riscv64/zscal.c create mode 100644 kernel/riscv64/zscal_vector.c create mode 100644 kernel/riscv64/zswap.c create mode 100644 kernel/riscv64/zswap_vector.c create mode 100644 lapack/laswp/riscv64/Makefile diff --git a/Makefile.prebuild b/Makefile.prebuild index 48fb5e991..d6395da7b 100644 --- a/Makefile.prebuild +++ b/Makefile.prebuild @@ -41,6 +41,10 @@ ifeq ($(TARGET), I6500) TARGET_FLAGS = -mips64r6 endif +ifeq ($(TARGET), C910V) +TARGET_FLAGS = -march=rv64gcvxthead -mabi=lp64v +endif + all: getarch_2nd ./getarch_2nd 0 >> $(TARGET_MAKE) ./getarch_2nd 1 >> $(TARGET_CONF) diff --git a/Makefile.riscv64 b/Makefile.riscv64 new file mode 100644 index 000000000..15d7b059c --- /dev/null +++ b/Makefile.riscv64 @@ -0,0 +1,4 @@ +ifeq ($(CORE), C910V) +CCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v +FCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v -static +endif diff --git a/Makefile.system b/Makefile.system index 461f7370b..fe2aecd82 100644 --- a/Makefile.system +++ b/Makefile.system @@ -724,7 +724,10 @@ endif endif endif - +ifeq ($(ARCH), riscv64) +NO_BINARY_MODE = 1 +BINARY_DEFINED = 1 +endif # diff --git a/TargetList.txt b/TargetList.txt index 66eca4506..86177ebca 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -104,3 +104,6 @@ VORTEX ZARCH_GENERIC Z13 Z14 + +10.RISC-V 64: +RISCV64_GENERIC diff --git a/c_check b/c_check index 5ea93b75c..405963ae6 100644 --- a/c_check +++ b/c_check @@ -92,6 +92,7 @@ $architecture = ia64 if ($data =~ /ARCH_IA64/); $architecture = arm if ($data =~ /ARCH_ARM/); $architecture = arm64 if ($data =~ /ARCH_ARM64/); $architecture = zarch if ($data =~ /ARCH_ZARCH/); +$architecture = riscv64 if ($data =~ /ARCH_RISCV64/); $defined = 0; @@ -136,6 +137,11 @@ if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) { $binary =32; } +if ($architecture eq "riscv64") { + $defined = 1; + $binary = 64; +} + if ($compiler eq "PGI") { $compiler_name .= " -tp p7" if ($binary eq "32"); $compiler_name .= " -tp p7-64" if ($binary eq "64"); diff --git a/common.h b/common.h index a3ef99b59..faa75c447 100644 --- a/common.h +++ b/common.h @@ -437,6 +437,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246 #include "common_mips.h" #endif +#ifdef ARCH_RISCV64 +#include "common_riscv64.h" +#endif + #ifdef ARCH_MIPS64 #include "common_mips64.h" #endif diff --git a/common_riscv64.h b/common_riscv64.h new file mode 100644 index 000000000..49368c613 --- /dev/null +++ b/common_riscv64.h @@ -0,0 +1,98 @@ +/***************************************************************************** +Copyright (c) 2011-2014, The OpenBLAS Project +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************************/ + +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#ifndef COMMON_RISCV64 +#define COMMON_RISCV64 + +#define MB __sync_synchronize() +#define WMB __sync_synchronize() +#define RMB __sync_synchronize() + +#define INLINE inline + +#ifndef ASSEMBLER + + +static inline int blas_quickdivide(blasint x, blasint y){ + return x / y; +} + +#endif + + + +#define BUFFER_SIZE ( 32 << 20) +#define SEEK_ADDRESS + +#if defined(C910V) +#include +#endif + +#endif diff --git a/cpuid_riscv64.c b/cpuid_riscv64.c new file mode 100644 index 000000000..8a3209cb3 --- /dev/null +++ b/cpuid_riscv64.c @@ -0,0 +1,113 @@ +/***************************************************************************** +Copyright (c) 2011-2014, The OpenBLAS Project +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + + +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#define CPU_UNKNOWN 0 +#define CPU_C910V 1 + +static char *cpuname[] = { + "UNKOWN", + "C910V" +}; + +int detect(void){ + return CPU_UNKNOWN; +} + +char *get_corename(void){ + return cpuname[detect()]; +} + +void get_architecture(void){ + printf("RISCV64"); +} + +void get_subarchitecture(void){ +} + +void get_subdirname(void){ + printf("riscv64"); +} + +void get_cpuconfig(void){ + printf("#define UNKNOWN\n"); + printf("#define L1_DATA_SIZE 65536\n"); + printf("#define L1_DATA_LINESIZE 32\n"); + printf("#define L2_SIZE 512488\n"); + printf("#define L2_LINESIZE 32\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + printf("#define L2_ASSOCIATIVE 4\n"); +} + +void get_libname(void){ + printf("riscv64\n"); +} diff --git a/ctest.c b/ctest.c index cd84ab1bb..83a3b7d6c 100644 --- a/ctest.c +++ b/ctest.c @@ -153,6 +153,10 @@ ARCH_ARM ARCH_ARM64 #endif +#if defined(__riscv) +ARCH_RISCV64 +#endif + #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) HAVE_C11 #endif diff --git a/getarch.c b/getarch.c index e2c22d3a0..58465fb56 100644 --- a/getarch.c +++ b/getarch.c @@ -981,6 +981,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #endif +#ifdef FORCE_RISCV64_GENERIC +#define FORCE +#define ARCHITECTURE "RISCV64" +#define SUBARCHITECTURE "RISCV64_GENERIC" +#define SUBDIRNAME "riscv64" +#define ARCHCONFIG "-DRISCV64_GENERIC " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " +#define LIBNAME "riscv64_generic" +#define CORENAME "RISCV64_GENERIC" +#else +#endif + #ifdef FORCE_CORTEXA15 #define FORCE #define ARCHITECTURE "ARM" @@ -1252,6 +1266,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "Z14" #endif +#ifdef FORCE_C910V +#define FORCE +#define ARCHITECTURE "RISCV64" +#define SUBARCHITECTURE "C910V" +#define SUBDIRNAME "riscv64" +#define ARCHCONFIG "-DC910V " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " +#define LIBNAME "c910v" +#define CORENAME "C910V" +#else +#endif + + #ifndef FORCE #ifdef USER_TARGET @@ -1306,6 +1335,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define OPENBLAS_SUPPORTED #endif +#ifdef __riscv +#include "cpuid_riscv64.c" +#endif + #ifdef __arm__ #include "cpuid_arm.c" #define OPENBLAS_SUPPORTED diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 2ba593c2e..893713769 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -25,6 +25,10 @@ ifeq ($(ARCH), arm64) USE_TRMM = 1 endif +ifeq ($(ARCH), riscv64) +USE_TRMM = 1 +endif + ifeq ($(TARGET), LOONGSON3B) USE_TRMM = 1 endif diff --git a/kernel/generic/trmmkernel_16x4.c b/kernel/generic/trmmkernel_16x4.c new file mode 100644 index 000000000..7ea4e108c --- /dev/null +++ b/kernel/generic/trmmkernel_16x4.c @@ -0,0 +1,2092 @@ +#include "common.h" + +int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc ,BLASLONG offset) +{ + + BLASLONG i,j,k; + FLOAT *C0,*C1,*C2,*C3,*ptrba,*ptrbb; + + FLOAT res0_0; + FLOAT res0_1; + FLOAT res0_2; + FLOAT res0_3; + FLOAT res0_4; + FLOAT res0_5; + FLOAT res0_6; + FLOAT res0_7; + + FLOAT res0_8; + FLOAT res0_9; + FLOAT res0_10; + FLOAT res0_11; + FLOAT res0_12; + FLOAT res0_13; + FLOAT res0_14; + FLOAT res0_15; + + FLOAT res1_0; + FLOAT res1_1; + FLOAT res1_2; + FLOAT res1_3; + FLOAT res1_4; + FLOAT res1_5; + FLOAT res1_6; + FLOAT res1_7; + + FLOAT res1_8; + FLOAT res1_9; + FLOAT res1_10; + FLOAT res1_11; + FLOAT res1_12; + FLOAT res1_13; + FLOAT res1_14; + FLOAT res1_15; + + FLOAT res2_0; + FLOAT res2_1; + FLOAT res2_2; + FLOAT res2_3; + FLOAT res2_4; + FLOAT res2_5; + FLOAT res2_6; + FLOAT res2_7; + + FLOAT res2_8; + FLOAT res2_9; + FLOAT res2_10; + FLOAT res2_11; + FLOAT res2_12; + FLOAT res2_13; + FLOAT res2_14; + FLOAT res2_15; + + FLOAT res3_0; + FLOAT res3_1; + FLOAT res3_2; + FLOAT res3_3; + FLOAT res3_4; + FLOAT res3_5; + FLOAT res3_6; + FLOAT res3_7; + + FLOAT res3_8; + FLOAT res3_9; + FLOAT res3_10; + FLOAT res3_11; + FLOAT res3_12; + FLOAT res3_13; + FLOAT res3_14; + FLOAT res3_15; + + FLOAT a0; + FLOAT a1; + + FLOAT b0; + FLOAT b1; + FLOAT b2; + FLOAT b3; + + BLASLONG off, temp; + +#if !defined(LEFT) + off = -offset; +#else + off = 0; +#endif + + for (j=0; j + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + + if (n <= 0 || inc_x <= 0) return(maxf); + + maxf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) > maxf ) + { + maxf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(maxf); +} + + diff --git a/kernel/riscv64/amax_vector.c b/kernel/riscv64/amax_vector.c new file mode 100644 index 000000000..b6aec131e --- /dev/null +++ b/kernel/riscv64/amax_vector.c @@ -0,0 +1,245 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMAXVV_FLOAT vfmaxvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMAXVV_FLOAT vfmaxvv_float64xm8 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + if (n <= 0 || inc_x <= 0) return(maxf); + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_max; + + MASK_T mask0, mask1; + FLOAT zero = 0.0; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + if(gvl <= n/2){ + v_max = VFMVVF_FLOAT(0, gvl); + for(i=0,j=0; i maxf) + maxf = v0[0]; + j += gvl; + } + }else{ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + BLASLONG stride_x = inc_x * sizeof(FLOAT); + if(gvl <= n/2){ + BLASLONG inc_xv = inc_x * gvl; + v_max = VFMVVF_FLOAT(0, gvl); + for(i=0,j=0; i maxf) + maxf = v0[0]; + j += gvl; + } + } + return(maxf); +} + + diff --git a/kernel/riscv64/amin.c b/kernel/riscv64/amin.c new file mode 100644 index 000000000..78495a8e3 --- /dev/null +++ b/kernel/riscv64/amin.c @@ -0,0 +1,75 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + + if (n <= 0 || inc_x <= 0) return(minf); + + minf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) < minf ) + { + minf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(minf); +} + + diff --git a/kernel/riscv64/amin_vector.c b/kernel/riscv64/amin_vector.c new file mode 100644 index 000000000..53243ad56 --- /dev/null +++ b/kernel/riscv64/amin_vector.c @@ -0,0 +1,241 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include + +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMINVS_FLOAT vfredminvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMINVV_FLOAT vfminvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMINVS_FLOAT vfredminvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMINVV_FLOAT vfminvv_float64xm8 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + if (n <= 0 || inc_x <= 0) return(0.0); + FLOAT minf=FLT_MAX; + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_min; + + MASK_T mask0, mask1; + FLOAT zero = 0.0; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + if(gvl <= n/2){ + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + for(i=0,j=0; i + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT sumf = 0.0; + if (n <= 0 || inc_x <= 0) return(sumf); + + n *= inc_x; + while(i < n) + { + sumf += ABS(x[i]); + i += inc_x; + } + return(sumf); +} + + diff --git a/kernel/riscv64/asum_vector.c b/kernel/riscv64/asum_vector.c new file mode 100644 index 000000000..7ab7484e8 --- /dev/null +++ b/kernel/riscv64/asum_vector.c @@ -0,0 +1,131 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDSUMVS_FLOAT vfredsumvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFADDVV_FLOAT vfaddvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDSUMVS_FLOAT vfredsumvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFADDVV_FLOAT vfaddvv_float64xm8 +#endif +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + BLASLONG ix=0; + FLOAT asumf=0.0; + if (n <= 0 || inc_x <= 0) return(asumf); + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_zero,v_sum; + + MASK_T mask0, mask1; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_zero = VFMVVF_FLOAT(0, gvl); + if(gvl <= n/2){ + v_sum = VFMVVF_FLOAT(0, gvl); + for(i=0,j=0; i + +#define KERNEL8x4_I \ + "addi t1, %[PB], 1*8 \n\t"\ + "addi t2, %[PB], 2*8 \n\t"\ + "addi t3, %[PB], 3*8 \n\t"\ + "fld ft0, (%[PB]) \n\t"\ + "fld ft1, (t1) \n\t"\ + "fld ft2, (t2) \n\t"\ + "fld ft3, (t3) \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi t4, %[PA], 2*8 \n\t"\ + "addi t5, %[PA], 4*8 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "addi t6, %[PA], 6*8 \n\t"\ + "addi %[PA], %[PA], 8*8 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 8*8 \n\t"\ + "vfmv.v.f v9, ft1 \n\t"\ + "vle.v v2, (t5) \n\t"\ + "addi t5, t5, 8*8 \n\t"\ + "vle.v v3, (t6) \n\t"\ + "addi t6, t6, 8*8 \n\t"\ + "vfmv.v.f v10, ft2 \n\t"\ + "addi %[PB], %[PB], 4*8 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 8*8 \n\t"\ + "vfmv.v.f v11, ft3 \n\t"\ + "vfmacc.vv v16, v8, v0 \n\t"\ + "addi t1, t1, 4*8 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 8*8 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "addi t2, t2, 4*8 \n\t"\ + "vle.v v6, (t5) \n\t"\ + "addi t5, t5, 8*8 \n\t"\ + "vfmacc.vv v18, v8, v2 \n\t"\ + "addi t3, t3, 4*8 \n\t"\ + "vle.v v7, (t6) \n\t"\ + "addi t6, t6, 8*8 \n\t"\ + "vfmacc.vv v19, v8, v3 \n\t"\ + "fld ft4, (%[PB]) \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "fld ft5, (t1) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "fld ft6, (t2) \n\t"\ + "vfmacc.vv v22, v9, v2 \n\t"\ + "fld ft7, (t3) \n\t"\ + "vfmacc.vv v23, v9, v3 \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "vfmacc.vv v24, v10, v0 \n\t"\ + "vfmv.v.f v13, ft5 \n\t"\ + "vfmacc.vv v25, v10, v1 \n\t"\ + "vfmv.v.f v14, ft6 \n\t"\ + "vfmacc.vv v26, v10, v2 \n\t"\ + "vfmv.v.f v15, ft7 \n\t"\ + "vfmacc.vv v27, v10, v3 \n\t"\ + "addi %[PB], %[PB], 4*8 \n\t"\ + "vfmacc.vv v28, v11, v0 \n\t"\ + "addi t1, t1, 4*8 \n\t"\ + "vfmacc.vv v29, v11, v1 \n\t"\ + "addi t2, t2, 4*8 \n\t"\ + "vfmacc.vv v30, v11, v2 \n\t"\ + "addi t3, t3, 4*8 \n\t"\ + "vfmacc.vv v31, v11, v3 \n\t" + +#define KERNEL8x4_M1 \ + "vfmacc.vv v16, v8, v0 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 8*8 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 8*8 \n\t"\ + "vfmacc.vv v18, v8, v2 \n\t"\ + "vle.v v6, (t5) \n\t"\ + "addi t5, t5, 8*8 \n\t"\ + "vfmacc.vv v19, v8, v3 \n\t"\ + "vle.v v7, (t6) \n\t"\ + "addi t6, t6, 8*8 \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "fld ft4, (%[PB]) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "fld ft5, (t1) \n\t"\ + "vfmacc.vv v22, v9, v2 \n\t"\ + "fld ft6, (t2) \n\t"\ + "vfmacc.vv v23, v9, v3 \n\t"\ + "fld ft7, (t3) \n\t"\ + "addi %[PB], %[PB], 4*8 \n\t"\ + "vfmacc.vv v24, v10, v0 \n\t"\ + "addi t1, t1, 4*8 \n\t"\ + "vfmacc.vv v25, v10, v1 \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "vfmacc.vv v26, v10, v2 \n\t"\ + "addi t2, t2, 4*8 \n\t"\ + "vfmacc.vv v27, v10, v3 \n\t"\ + "vfmv.v.f v13, ft5 \n\t"\ + "vfmacc.vv v28, v11, v0 \n\t"\ + "addi t3, t3, 4*8 \n\t"\ + "vfmacc.vv v29, v11, v1 \n\t"\ + "vfmv.v.f v14, ft6 \n\t"\ + "vfmacc.vv v30, v11, v2 \n\t"\ + "vfmacc.vv v31, v11, v3 \n\t"\ + "vfmv.v.f v15, ft7 \n\t" + +#define KERNEL8x4_M2 \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 8*8 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 8*8 \n\t"\ + "vfmacc.vv v18, v12, v6 \n\t"\ + "vle.v v2, (t5) \n\t"\ + "addi t5, t5, 8*8 \n\t"\ + "vfmacc.vv v19, v12, v7 \n\t"\ + "vle.v v3, (t6) \n\t"\ + "addi t6, t6, 8*8 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "fld ft0, (%[PB]) \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "fld ft1, (t1) \n\t"\ + "vfmacc.vv v22, v13, v6 \n\t"\ + "fld ft2, (t2) \n\t"\ + "vfmacc.vv v23, v13, v7 \n\t"\ + "fld ft3, (t3) \n\t"\ + "addi %[PB], %[PB], 4*8 \n\t"\ + "vfmacc.vv v24, v14, v4 \n\t"\ + "addi t1, t1, 4*8 \n\t"\ + "vfmacc.vv v25, v14, v5 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "vfmacc.vv v26, v14, v6 \n\t"\ + "addi t2, t2, 4*8 \n\t"\ + "vfmacc.vv v27, v14, v7 \n\t"\ + "vfmv.v.f v9, ft1 \n\t"\ + "vfmacc.vv v28, v15, v4 \n\t"\ + "addi t3, t3, 4*8 \n\t"\ + "vfmacc.vv v29, v15, v5 \n\t"\ + "vfmv.v.f v10, ft2 \n\t"\ + "vfmacc.vv v30, v15, v6 \n\t"\ + "vfmacc.vv v31, v15, v7 \n\t"\ + "vfmv.v.f v11, ft3 \n\t" + +#define KERNEL8x4_E \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vfmacc.vv v18, v12, v6 \n\t"\ + "vfmacc.vv v19, v12, v7 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "vfmacc.vv v22, v13, v6 \n\t"\ + "vfmacc.vv v23, v13, v7 \n\t"\ + "vfmacc.vv v24, v14, v4 \n\t"\ + "vfmacc.vv v25, v14, v5 \n\t"\ + "vfmacc.vv v26, v14, v6 \n\t"\ + "vfmacc.vv v27, v14, v7 \n\t"\ + "vfmacc.vv v28, v15, v4 \n\t"\ + "vfmacc.vv v29, v15, v5 \n\t"\ + "vfmacc.vv v30, v15, v6 \n\t"\ + "vfmacc.vv v31, v15, v7 \n\t" + + + + +int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc +#ifdef TRMMKERNEL + ,BLASLONG offset +#endif + ) +{ + BLASLONG i,j,k; + FLOAT *C0,*C1,*C2,*C3; + FLOAT *ptrba,*ptrbb; + + FLOAT loadb0,loadb1,loadb2,loadb3; + FLOAT load0,load1,load2,load3,load4,load5,load6,load7; + + FLOAT res0,res1,res2,res3; + FLOAT res4,res5,res6,res7; + FLOAT res8,res9,res10,res11; + FLOAT res12,res13,res14,res15; + + for (j=0; j 0){ + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDSUM_FLOAT(vr, vx, gvl); + dot += vx[0]; + } + //tail + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLEV_FLOAT(&x[j], gvl); + vy = VLEV_FLOAT(&y[j], gvl); + FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl); + //vr = VFDOTVV_FLOAT(vx, vy, gvl); + vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); + vx = VFREDSUM_FLOAT(vr, vz, gvl); + dot += vx[0]; + } + }else if(inc_y == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + unsigned int stride_x = inc_x * sizeof(FLOAT); + for(i=0,j=0; i 0){ + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDSUM_FLOAT(vr, vx, gvl); + dot += vx[0]; + } + //tail + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); + vy = VLEV_FLOAT(&y[j], gvl); + FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl); + //vr = VFDOTVV_FLOAT(vx, vy, gvl); + vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); + vx = VFREDSUM_FLOAT(vr, vz, gvl); + dot += vx[0]; + } + }else if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + unsigned int stride_y = inc_y * sizeof(FLOAT); + for(i=0,j=0; i 0){ + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDSUM_FLOAT(vr, vx, gvl); + dot += vx[0]; + } + //tail + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLEV_FLOAT(&x[j], gvl); + vy = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl); + FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl); + //vr = VFDOTVV_FLOAT(vx, vy, gvl); + vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); + vx = VFREDSUM_FLOAT(vr, vz, gvl); + dot += vx[0]; + } + }else{ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + unsigned int stride_x = inc_x * sizeof(FLOAT); + unsigned int stride_y = inc_y * sizeof(FLOAT); + for(i=0,j=0; i 0){ + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDSUM_FLOAT(vr, vx, gvl); + dot += vx[0]; + } + //tail + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); + vy = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl); + FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl); + //vr = VFDOTVV_FLOAT(vx, vy, gvl); + vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); + vx = VFREDSUM_FLOAT(vr, vz, gvl); + dot += vx[0]; + } + } + return(dot); +} + + diff --git a/kernel/riscv64/gemv_n.c b/kernel/riscv64/gemv_n.c new file mode 100644 index 000000000..ef61b245b --- /dev/null +++ b/kernel/riscv64/gemv_n.c @@ -0,0 +1,67 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** + * * 2013/09/14 Saar + * * BLASTEST float : OK + * * BLASTEST double : OK + * CTEST : OK + * TEST : OK + * * + * **************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +{ + BLASLONG i; + BLASLONG ix,iy; + BLASLONG j; + FLOAT *a_ptr; + FLOAT temp; + + ix = 0; + a_ptr = a; + + for (j=0; j + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + BLASLONG max=0; + + if (n <= 0 || inc_x <= 0) return(max); + + maxf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) > maxf ) + { + max = i; + maxf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(max+1); +} + + diff --git a/kernel/riscv64/iamax_vector.c b/kernel/riscv64/iamax_vector.c new file mode 100644 index 000000000..3aa64afc9 --- /dev/null +++ b/kernel/riscv64/iamax_vector.c @@ -0,0 +1,191 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMAXVV_FLOAT vfmaxvv_float64xm8 +#define VMFGEVF_FLOAT vmfgevf_e64xm8_float64xm8 +#define VMFIRSTM vmfirstm_e64xm8 +#define UINT_V_T uint64xm8_t +#define VIDV_MASK_UINT vidv_mask_uint64xm8 +#define VIDV_UINT vidv_uint64xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 +#define VADDVX_UINT vaddvx_uint64xm8 +#define VMVVX_UINT vmvvx_uint64xm8 +#else + +#define ABS fabsf +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMAXVV_FLOAT vfmaxvv_float32xm8 +#define VMFGEVF_FLOAT vmfgevf_e32xm8_float32xm8 +#define VMFIRSTM vmfirstm_e32xm8 +#define UINT_V_T uint32xm8_t +#define VIDV_MASK_UINT vidv_mask_uint32xm8 +#define VIDV_UINT vidv_uint32xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 +#define VADDVX_UINT vaddvx_uint32xm8 +#define VMVVX_UINT vmvvx_uint32xm8 +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + FLOAT maxf=0.0; + unsigned int max_index = 0; + if (n <= 0 || inc_x <= 0) return(max_index); + + FLOAT_V_T vx, v_max; + UINT_V_T v_max_index; + MASK_T mask; + unsigned int gvl = 0; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_max_index = VMVVX_UINT(0, gvl); + v_max = VFMVVF_FLOAT(-1, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLEV_FLOAT(&x[j], gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + //index where element greater than v_max + mask = VMFLTVV_FLOAT(v_max, vx, gvl); + v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl); + v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl); + + //update v_max and start_index j + v_max = VFMAXVV_FLOAT(v_max, vx, gvl); + j += gvl; + } + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + maxf = vx[0]; + mask = VMFGEVF_FLOAT(v_max, maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLEV_FLOAT(&x[j], gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + v_max = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + FLOAT cur_maxf = vx[0]; + if(cur_maxf > maxf){ + //tail index + v_max_index = VIDV_UINT(gvl); + v_max_index = VADDVX_UINT(v_max_index, j, gvl); + + mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + } + } + }else{ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + unsigned int stride_x = inc_x * sizeof(FLOAT); + unsigned int idx = 0, inc_v = gvl * inc_x; + + v_max_index = VMVVX_UINT(0, gvl); + v_max = VFMVVF_FLOAT(-1, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + //index where element greater than v_max + mask = VMFLTVV_FLOAT(v_max, vx, gvl); + v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl); + v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl); + + //update v_max and start_index j + v_max = VFMAXVV_FLOAT(v_max, vx, gvl); + j += gvl; + idx += inc_v; + } + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + maxf = vx[0]; + mask = VMFGEVF_FLOAT(v_max, maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + v_max = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + FLOAT cur_maxf = vx[0]; + if(cur_maxf > maxf){ + //tail index + v_max_index = VIDV_UINT(gvl); + v_max_index = VADDVX_UINT(v_max_index, j, gvl); + + mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + } + } + } + return(max_index+1); +} + + diff --git a/kernel/riscv64/iamin.c b/kernel/riscv64/iamin.c new file mode 100644 index 000000000..155292bd5 --- /dev/null +++ b/kernel/riscv64/iamin.c @@ -0,0 +1,77 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + BLASLONG min=0; + + if (n <= 0 || inc_x <= 0) return(min); + + minf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) < ABS(minf) ) + { + min = i; + minf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(min+1); +} + + diff --git a/kernel/riscv64/iamin_vector.c b/kernel/riscv64/iamin_vector.c new file mode 100644 index 000000000..608f19a00 --- /dev/null +++ b/kernel/riscv64/iamin_vector.c @@ -0,0 +1,192 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include + +#if defined(DOUBLE) + +#define ABS fabs +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMINVS_FLOAT vfredminvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMINVV_FLOAT vfminvv_float64xm8 +#define VMFLEVF_FLOAT vmflevf_e64xm8_float64xm8 +#define VMFIRSTM vmfirstm_e64xm8 +#define UINT_V_T uint64xm8_t +#define VIDV_MASK_UINT vidv_mask_uint64xm8 +#define VIDV_UINT vidv_uint64xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 +#define VADDVX_UINT vaddvx_uint64xm8 +#define VMVVX_UINT vmvvx_uint64xm8 +#else + +#define ABS fabsf +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMINVS_FLOAT vfredminvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMINVV_FLOAT vfminvv_float32xm8 +#define VMFLEVF_FLOAT vmflevf_e32xm8_float32xm8 +#define VMFIRSTM vmfirstm_e32xm8 +#define UINT_V_T uint32xm8_t +#define VIDV_MASK_UINT vidv_mask_uint32xm8 +#define VIDV_UINT vidv_uint32xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 +#define VADDVX_UINT vaddvx_uint32xm8 +#define VMVVX_UINT vmvvx_uint32xm8 +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + FLOAT minf=FLT_MAX; + unsigned int min_index = 0; + if (n <= 0 || inc_x <= 0) return(min_index); + + FLOAT_V_T vx, v_min; + UINT_V_T v_min_index; + MASK_T mask; + unsigned int gvl = 0; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + v_min_index = VMVVX_UINT(0, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLEV_FLOAT(&x[j], gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + //index where element less than v_min + mask = VMFLTVV_FLOAT(vx, v_min, gvl); + v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl); + v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl); + + //update v_min and start_index j + v_min = VFMINVV_FLOAT(v_min, vx, gvl); + j += gvl; + } + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + minf = vx[0]; + mask = VMFLEVF_FLOAT(v_min, minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLEV_FLOAT(&x[j], gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + v_min = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + FLOAT cur_minf = vx[0]; + if(cur_minf < minf){ + //tail index + v_min_index = VIDV_UINT(gvl); + v_min_index = VADDVX_UINT(v_min_index, j, gvl); + + mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + } + } + }else{ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + unsigned int stride_x = inc_x * sizeof(FLOAT); + unsigned int idx = 0, inc_v = gvl * inc_x; + + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + v_min_index = VMVVX_UINT(0, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + //index where element less than v_min + mask = VMFLTVV_FLOAT(vx, v_min, gvl); + v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl); + v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl); + + //update v_min and start_index j + v_min = VFMINVV_FLOAT(v_min, vx, gvl); + j += gvl; + idx += inc_v; + } + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + minf = vx[0]; + mask = VMFLEVF_FLOAT(v_min, minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + v_min = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + FLOAT cur_minf = vx[0]; + if(cur_minf < minf){ + //tail index + v_min_index = VIDV_UINT(gvl); + v_min_index = VADDVX_UINT(v_min_index, j, gvl); + + mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + } + } + } + return(min_index+1); +} + + diff --git a/kernel/riscv64/imax.c b/kernel/riscv64/imax.c new file mode 100644 index 000000000..5072dd16e --- /dev/null +++ b/kernel/riscv64/imax.c @@ -0,0 +1,69 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + BLASLONG max=0; + + if (n <= 0 || inc_x <= 0) return(max); + + maxf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] > maxf ) + { + max = i; + maxf = x[ix]; + } + ix += inc_x; + i++; + } + return(max+1); +} + + diff --git a/kernel/riscv64/imax_vector.c b/kernel/riscv64/imax_vector.c new file mode 100644 index 000000000..44af7101b --- /dev/null +++ b/kernel/riscv64/imax_vector.c @@ -0,0 +1,176 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include + +#if defined(DOUBLE) + +#define ABS fabs +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFMAXVV_FLOAT vfmaxvv_float64xm8 +#define VMFGEVF_FLOAT vmfgevf_e64xm8_float64xm8 +#define VMFIRSTM vmfirstm_e64xm8 +#define UINT_V_T uint64xm8_t +#define VIDV_MASK_UINT vidv_mask_uint64xm8 +#define VIDV_UINT vidv_uint64xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 +#define VADDVX_UINT vaddvx_uint64xm8 +#define VMVVX_UINT vmvvx_uint64xm8 +#else + +#define ABS fabsf +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFMAXVV_FLOAT vfmaxvv_float32xm8 +#define VMFGEVF_FLOAT vmfgevf_e32xm8_float32xm8 +#define VMFIRSTM vmfirstm_e32xm8 +#define UINT_V_T uint32xm8_t +#define VIDV_MASK_UINT vidv_mask_uint32xm8 +#define VIDV_UINT vidv_uint32xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 +#define VADDVX_UINT vaddvx_uint32xm8 +#define VMVVX_UINT vmvvx_uint32xm8 +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + unsigned int max_index = 0; + if (n <= 0 || inc_x <= 0) return(max_index); + FLOAT maxf=-FLT_MAX; + + FLOAT_V_T vx, v_max; + UINT_V_T v_max_index; + MASK_T mask; + unsigned int gvl = 0; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_max_index = VMVVX_UINT(0, gvl); + v_max = VFMVVF_FLOAT(-FLT_MAX, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLEV_FLOAT(&x[j], gvl); + + //index where element greater than v_max + mask = VMFLTVV_FLOAT(v_max, vx, gvl); + v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl); + v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl); + + //update v_max and start_index j + v_max = VFMAXVV_FLOAT(v_max, vx, gvl); + j += gvl; + } + vx = VFMVVF_FLOAT(-FLT_MAX, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + maxf = vx[0]; + mask = VMFGEVF_FLOAT(v_max, maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + v_max = VLEV_FLOAT(&x[j], gvl); + + vx = VFMVVF_FLOAT(-FLT_MAX, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + FLOAT cur_maxf = vx[0]; + if(cur_maxf > maxf){ + //tail index + v_max_index = VIDV_UINT(gvl); + v_max_index = VADDVX_UINT(v_max_index, j, gvl); + + mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + } + } + }else{ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + unsigned int stride_x = inc_x * sizeof(FLOAT); + unsigned int idx = 0, inc_v = gvl * inc_x; + + v_max = VFMVVF_FLOAT(-FLT_MAX, gvl); + v_max_index = VMVVX_UINT(0, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); + + //index where element greater than v_max + mask = VMFLTVV_FLOAT(v_max, vx, gvl); + v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl); + v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl); + + //update v_max and start_index j + v_max = VFMAXVV_FLOAT(v_max, vx, gvl); + j += gvl; + idx += inc_v; + } + vx = VFMVVF_FLOAT(-FLT_MAX, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + maxf = vx[0]; + mask = VMFGEVF_FLOAT(v_max, maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + v_max = VLSEV_FLOAT(&x[idx], stride_x, gvl); + + vx = VFMVVF_FLOAT(-FLT_MAX, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + FLOAT cur_maxf = vx[0]; + if(cur_maxf > maxf){ + //tail index + v_max_index = VIDV_UINT(gvl); + v_max_index = VADDVX_UINT(v_max_index, j, gvl); + + mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + } + } + } + return(max_index+1); +} + + diff --git a/kernel/riscv64/imin.c b/kernel/riscv64/imin.c new file mode 100644 index 000000000..ffc65226e --- /dev/null +++ b/kernel/riscv64/imin.c @@ -0,0 +1,67 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** +* 2013/08/19 Saar +* BLASTEST float +* BLASTEST double +* +**************************************************************************************/ + +#include "common.h" +#include + + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + BLASLONG min=0; + + if (n <= 0 || inc_x <= 0) return(min); + + minf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] < minf ) + { + min = i; + minf = x[ix]; + } + ix += inc_x; + i++; + } + return(min+1); +} + + diff --git a/kernel/riscv64/imin_vector.c b/kernel/riscv64/imin_vector.c new file mode 100644 index 000000000..e6e0e9f9f --- /dev/null +++ b/kernel/riscv64/imin_vector.c @@ -0,0 +1,212 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include + +#if defined(DOUBLE) + +#define ABS fabs +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMINVS_FLOAT vfredminvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFMINVV_FLOAT vfminvv_float64xm8 +#define VMFLEVF_FLOAT vmflevf_e64xm8_float64xm8 +#define VMFIRSTM vmfirstm_e64xm8 +#define UINT_V_T uint64xm8_t +#define VIDV_MASK_UINT vidv_mask_uint64xm8 +#define VIDV_UINT vidv_uint64xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 +#define VADDVX_UINT vaddvx_uint64xm8 +#define VMVVX_UINT vmvvx_uint64xm8 +#else + +#define ABS fabsf +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMINVS_FLOAT vfredminvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFMINVV_FLOAT vfminvv_float32xm8 +#define VMFLEVF_FLOAT vmflevf_e32xm8_float32xm8 +#define VMFIRSTM vmfirstm_e32xm8 +#define UINT_V_T uint32xm8_t +#define VIDV_MASK_UINT vidv_mask_uint32xm8 +#define VIDV_UINT vidv_uint32xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 +#define VADDVX_UINT vaddvx_uint32xm8 +#define VMVVX_UINT vmvvx_uint32xm8 +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + FLOAT minf=FLT_MAX; + unsigned int min_index = 0; + if (n <= 0 || inc_x <= 0) return(min_index); + + FLOAT_V_T vx, v_min; + UINT_V_T v_min_index; + MASK_T mask; + unsigned int gvl = 0; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + v_min_index = VMVVX_UINT(0, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLEV_FLOAT(&x[j], gvl); + //index where element less than v_min + mask = VMFLTVV_FLOAT(vx, v_min, gvl); + v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e64,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_min_index) + :"v"(mask), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e32,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_min_index) + :"v"(mask), "r"(gvl) + :"v0"); +#endif +*/ + v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl); + + //update v_min and start_index j + v_min = VFMINVV_FLOAT(v_min, vx, gvl); + j += gvl; + } + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + minf = vx[0]; + mask = VMFLEVF_FLOAT(v_min, minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + v_min = VLEV_FLOAT(&x[j], gvl); + + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + FLOAT cur_minf = vx[0]; + if(cur_minf < minf){ + //tail index + v_min_index = VIDV_UINT(gvl); + v_min_index = VADDVX_UINT(v_min_index, j, gvl); + mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + } + } + }else{ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + unsigned int stride_x = inc_x * sizeof(FLOAT); + unsigned int idx = 0, inc_v = gvl * inc_x; + + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + v_min_index = VMVVX_UINT(0, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); + + //index where element less than v_min + mask = VMFLTVV_FLOAT(vx, v_min, gvl); + v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e64,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_min_index) + :"v"(mask), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e32,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_min_index) + :"v"(mask), "r"(gvl) + :"v0"); +#endif +*/ + + v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl); + + //update v_min and start_index j + v_min = VFMINVV_FLOAT(v_min, vx, gvl); + j += gvl; + idx += inc_v; + } + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + minf = vx[0]; + mask = VMFLEVF_FLOAT(v_min, minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + v_min = VLSEV_FLOAT(&x[idx], stride_x, gvl); + + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + FLOAT cur_minf = vx[0]; + if(cur_minf < minf){ + //tail index + v_min_index = VIDV_UINT(gvl); + v_min_index = VADDVX_UINT(v_min_index, j, gvl); + mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + } + } + } + return(min_index+1); +} + + diff --git a/kernel/riscv64/izamax.c b/kernel/riscv64/izamax.c new file mode 100644 index 000000000..8fe33e95b --- /dev/null +++ b/kernel/riscv64/izamax.c @@ -0,0 +1,81 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf; + BLASLONG max=0; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(max); + + inc_x2 = 2 * inc_x; + + maxf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) > maxf ) + { + max = i; + maxf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(max+1); +} + + diff --git a/kernel/riscv64/izamax_vector.c b/kernel/riscv64/izamax_vector.c new file mode 100644 index 000000000..62c95d973 --- /dev/null +++ b/kernel/riscv64/izamax_vector.c @@ -0,0 +1,246 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define RVV_EFLOAT RVV_E64 +#define FLOAT_V_T float64xm8_t +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMAXVV_FLOAT vfmaxvv_float64xm8 +#define VMFGEVF_FLOAT vmfgevf_e64xm8_float64xm8 +#define VMFIRSTM vmfirstm_e64xm8 +#define UINT_V_T uint64xm8_t +#define VIDV_MASK_UINT vidv_mask_uint64xm8 +#define VIDV_UINT vidv_uint64xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 +#define VADDVX_UINT vaddvx_uint64xm8 +#define VFADDVV_FLOAT vfaddvv_float64xm8 +#define VMVVX_UINT vmvvx_uint64xm8 +#else + +#define ABS fabsf +#define RVV_EFLOAT RVV_E32 +#define FLOAT_V_T float32xm8_t +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMAXVV_FLOAT vfmaxvv_float32xm8 +#define VMFGEVF_FLOAT vmfgevf_e32xm8_float32xm8 +#define VMFIRSTM vmfirstm_e32xm8 +#define UINT_V_T uint32xm8_t +#define VIDV_MASK_UINT vidv_mask_uint32xm8 +#define VIDV_UINT vidv_uint32xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 +#define VADDVX_UINT vaddvx_uint32xm8 +#define VFADDVV_FLOAT vfaddvv_float32xm8 +#define VMVVX_UINT vmvvx_uint32xm8 +#endif + +#define RVV_M RVV_M8 + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + FLOAT maxf=0.0; + unsigned int max_index = 0; + if (n <= 0 || inc_x <= 0) return(max_index); + + FLOAT_V_T vx0, vx1, v_max; + UINT_V_T v_max_index; + MASK_T mask0, mask1; + unsigned int gvl = 0; + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_max_index = VMVVX_UINT(0, gvl); + v_max = VFMVVF_FLOAT(-1, gvl); + BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT); + BLASLONG inc_xv = gvl * inc_x * 2; + BLASLONG ix = 0; + for(i=0,j=0; i < n/gvl; i++){ + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + //fabs(vector) + mask0 = VMFLTVF_FLOAT(vx0, 0, gvl); + vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); + //fabs(vector) + mask1 = VMFLTVF_FLOAT(vx1, 0, gvl); + vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + vx0 = VFADDVV_FLOAT(vx0, vx1, gvl); + + //index where element greater than v_max + mask0 = VMFLTVV_FLOAT(v_max, vx0, gvl); + v_max_index = VIDV_MASK_UINT(v_max_index, mask0, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e64,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_max_index) + :"v"(mask0), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e32,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_max_index) + :"v"(mask0), "r"(gvl) + :"v0"); +#endif +*/ + v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask0, gvl); + + //update v_max and start_index j + v_max = VFMAXVV_FLOAT(v_max, vx0, gvl); + j += gvl; + ix += inc_xv; + } + vx0 = VFMVVF_FLOAT(0, gvl); + vx0 = VFREDMAXVS_FLOAT(v_max, vx0, gvl); + maxf = vx0[0]; + mask0 = VMFGEVF_FLOAT(v_max, maxf, gvl); + max_index = VMFIRSTM(mask0,gvl); + max_index = v_max_index[max_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + v_max_index = VMVVX_UINT(0, gvl); + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + //fabs(vector) + mask0 = VMFLTVF_FLOAT(vx0, 0, gvl); + vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); + //fabs(vector) + mask1 = VMFLTVF_FLOAT(vx1, 0, gvl); + vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + v_max = VFADDVV_FLOAT(vx0, vx1, gvl); + vx0 = VFMVVF_FLOAT(0, gvl); + vx0 = VFREDMAXVS_FLOAT(v_max, vx0, gvl); + FLOAT cur_maxf = vx0[0]; + if(cur_maxf > maxf){ + //tail index + v_max_index = VIDV_UINT(gvl); + v_max_index = VADDVX_UINT(v_max_index, j, gvl); + + mask0 = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); + max_index = VMFIRSTM(mask0,gvl); + max_index = v_max_index[max_index]; + } + } + return(max_index+1); +} + + diff --git a/kernel/riscv64/izamin.c b/kernel/riscv64/izamin.c new file mode 100644 index 000000000..fb5a0d4cb --- /dev/null +++ b/kernel/riscv64/izamin.c @@ -0,0 +1,81 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf; + BLASLONG min=0; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(min); + + inc_x2 = 2 * inc_x; + + minf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) < minf ) + { + min = i; + minf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(min+1); +} + + diff --git a/kernel/riscv64/izamin_vector.c b/kernel/riscv64/izamin_vector.c new file mode 100644 index 000000000..38eccf1b5 --- /dev/null +++ b/kernel/riscv64/izamin_vector.c @@ -0,0 +1,247 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include + +#if defined(DOUBLE) + +#define RVV_EFLOAT RVV_E64 +#define FLOAT_V_T float64xm8_t +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMINVS_FLOAT vfredminvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMINVV_FLOAT vfminvv_float64xm8 +#define VMFLEVF_FLOAT vmflevf_e64xm8_float64xm8 +#define VMFIRSTM vmfirstm_e64xm8 +#define UINT_V_T uint64xm8_t +#define VIDV_MASK_UINT vidv_mask_uint64xm8 +#define VIDV_UINT vidv_uint64xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 +#define VADDVX_UINT vaddvx_uint64xm8 +#define VFADDVV_FLOAT vfaddvv_float64xm8 +#define VMVVX_UINT vmvvx_uint64xm8 +#else + +#define ABS fabsf +#define RVV_EFLOAT RVV_E32 +#define FLOAT_V_T float32xm8_t +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMINVS_FLOAT vfredminvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMINVV_FLOAT vfminvv_float32xm8 +#define VMFLEVF_FLOAT vmflevf_e32xm8_float32xm8 +#define VMFIRSTM vmfirstm_e32xm8 +#define UINT_V_T uint32xm8_t +#define VIDV_MASK_UINT vidv_mask_uint32xm8 +#define VIDV_UINT vidv_uint32xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 +#define VADDVX_UINT vaddvx_uint32xm8 +#define VFADDVV_FLOAT vfaddvv_float32xm8 +#define VMVVX_UINT vmvvx_uint32xm8 +#endif + +#define RVV_M RVV_M8 + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + FLOAT minf=FLT_MAX; + unsigned int min_index = 0; + if (n <= 0 || inc_x <= 0) return(min_index); + + FLOAT_V_T vx0, vx1, v_min; + UINT_V_T v_min_index; + MASK_T mask0, mask1; + unsigned int gvl = 0; + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_min_index = VMVVX_UINT(0, gvl); + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT); + BLASLONG inc_xv = gvl * inc_x * 2; + BLASLONG ix = 0; + for(i=0,j=0; i < n/gvl; i++){ + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + //fabs(vector) + mask0 = VMFLTVF_FLOAT(vx0, 0, gvl); + vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); + //fabs(vector) + mask1 = VMFLTVF_FLOAT(vx1, 0, gvl); + vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + vx0 = VFADDVV_FLOAT(vx0, vx1, gvl); + + //index where element less than v_min + mask0 = VMFLTVV_FLOAT(vx0, v_min, gvl); + v_min_index = VIDV_MASK_UINT(v_min_index, mask0, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e64,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_min_index) + :"v"(mask0), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e32,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_min_index) + :"v"(mask0), "r"(gvl) + :"v0"); +#endif +*/ + v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask0, gvl); + + //update v_min and start_index j + v_min = VFMINVV_FLOAT(v_min, vx0, gvl); + j += gvl; + ix += inc_xv; + } + vx0 = VFMVVF_FLOAT(FLT_MAX, gvl); + vx0 = VFREDMINVS_FLOAT(v_min, vx0, gvl); + minf = vx0[0]; + mask0 = VMFLEVF_FLOAT(v_min, minf, gvl); + min_index = VMFIRSTM(mask0,gvl); + min_index = v_min_index[min_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + v_min_index = VMVVX_UINT(0, gvl); + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + //fabs(vector) + mask0 = VMFLTVF_FLOAT(vx0, 0, gvl); + vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); + //fabs(vector) + mask1 = VMFLTVF_FLOAT(vx1, 0, gvl); + vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + v_min = VFADDVV_FLOAT(vx0, vx1, gvl); + vx0 = VFMVVF_FLOAT(FLT_MAX, gvl); + vx0 = VFREDMINVS_FLOAT(v_min, vx0, gvl); + FLOAT cur_minf = vx0[0]; + if(cur_minf < minf){ + //tail index + v_min_index = VIDV_UINT(gvl); + v_min_index = VADDVX_UINT(v_min_index, j, gvl); + + mask0 = VMFLEVF_FLOAT(v_min, cur_minf, gvl); + min_index = VMFIRSTM(mask0,gvl); + min_index = v_min_index[min_index]; + } + } + return(min_index+1); +} + + diff --git a/kernel/riscv64/max.c b/kernel/riscv64/max.c new file mode 100644 index 000000000..2ad956bc0 --- /dev/null +++ b/kernel/riscv64/max.c @@ -0,0 +1,65 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + + if (n <= 0 || inc_x <= 0) return(maxf); + + maxf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] > maxf ) + { + maxf = x[ix]; + } + ix += inc_x; + i++; + } + return(maxf); +} + + diff --git a/kernel/riscv64/max_vector.c b/kernel/riscv64/max_vector.c new file mode 100644 index 000000000..4ef75452d --- /dev/null +++ b/kernel/riscv64/max_vector.c @@ -0,0 +1,116 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFMAXVV_FLOAT vfmaxvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFMAXVV_FLOAT vfmaxvv_float64xm8 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + if (n <= 0 || inc_x <= 0) return(0.0); + FLOAT maxf=-FLT_MAX; + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_max; + + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + if(gvl <= n/2){ + v_max = VFMVVF_FLOAT(-FLT_MAX, gvl); + for(i=0,j=0; i maxf) + maxf = v0[0]; + j += gvl; + } + }else{ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + BLASLONG stride_x = inc_x * sizeof(FLOAT); + if(gvl <= n/2){ + v_max = VFMVVF_FLOAT(-FLT_MAX, gvl); + BLASLONG idx = 0, inc_xv = inc_x * gvl; + for(i=0,j=0; i maxf) + maxf = v0[0]; + j += gvl; + } + } + return(maxf); +} + + diff --git a/kernel/riscv64/min.c b/kernel/riscv64/min.c new file mode 100644 index 000000000..2812fe397 --- /dev/null +++ b/kernel/riscv64/min.c @@ -0,0 +1,65 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + + if (n <= 0 || inc_x <= 0) return(minf); + + minf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] < minf ) + { + minf = x[ix]; + } + ix += inc_x; + i++; + } + return(minf); +} + + diff --git a/kernel/riscv64/min_vector.c b/kernel/riscv64/min_vector.c new file mode 100644 index 000000000..83c965bfa --- /dev/null +++ b/kernel/riscv64/min_vector.c @@ -0,0 +1,116 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMINVS_FLOAT vfredminvs_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFMINVV_FLOAT vfminvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMINVS_FLOAT vfredminvs_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFMINVV_FLOAT vfminvv_float64xm8 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + if (n <= 0 || inc_x <= 0) return(0.0); + FLOAT minf=FLT_MAX; + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_min; + + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + if(gvl <= n/2){ + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + for(i=0,j=0; i + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT scale = 0.0; + FLOAT ssq = 1.0; + FLOAT absxi = 0.0; + + + if (n <= 0 || inc_x <= 0) return(0.0); + if ( n == 1 ) return( ABS(x[0]) ); + + n *= inc_x; + while(i < n) + { + + if ( x[i] != 0.0 ) + { + absxi = ABS( x[i] ); + if ( scale < absxi ) + { + ssq = 1 + ssq * ( scale / absxi ) * ( scale / absxi ); + scale = absxi ; + } + else + { + ssq += ( absxi/scale ) * ( absxi/scale ); + } + + } + i += inc_x; + } + scale = scale * sqrt( ssq ); + return(scale); + +} + + diff --git a/kernel/riscv64/nrm2_vector.c b/kernel/riscv64/nrm2_vector.c new file mode 100644 index 000000000..785c0d2f8 --- /dev/null +++ b/kernel/riscv64/nrm2_vector.c @@ -0,0 +1,220 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M4 +#define FLOAT_V_T float32xm4_t +#define VLEV_FLOAT vlev_float32xm4 +#define VLSEV_FLOAT vlsev_float32xm4 +#define VFREDSUM_FLOAT vfredsumvs_float32xm4 +#define VFMACCVV_FLOAT vfmaccvv_float32xm4 +#define VFMVVF_FLOAT vfmvvf_float32xm4 +#define VFDOTVV_FLOAT vfdotvv_float32xm4 +#define ABS fabsf +#define MASK_T e32xm4_t +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm4 +#define VMFGTVF_FLOAT vmfgtvf_e32xm4_float32xm4 +#define VMFIRSTM vmfirstm_e32xm4 +#define VFDIVVF_FLOAT vfdivvf_float32xm4 +#define VMFLTVF_FLOAT vmfltvf_e32xm4_float32xm4 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm4 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M4 +#define FLOAT_V_T float64xm4_t +#define VLEV_FLOAT vlev_float64xm4 +#define VLSEV_FLOAT vlsev_float64xm4 +#define VFREDSUM_FLOAT vfredsumvs_float64xm4 +#define VFMACCVV_FLOAT vfmaccvv_float64xm4 +#define VFMVVF_FLOAT vfmvvf_float64xm4 +#define VFDOTVV_FLOAT vfdotvv_float64xm4 +#define ABS fabs +#define MASK_T e64xm4_t +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm4 +#define VMFGTVF_FLOAT vmfgtvf_e64xm4_float64xm4 +#define VMFIRSTM vmfirstm_e64xm4 +#define VFDIVVF_FLOAT vfdivvf_float64xm4 +#define VMFLTVF_FLOAT vmfltvf_e64xm4_float64xm4 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm4 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + + if ( n < 0 ) return(0.0); + if(n == 1) return (ABS(x[0])); + + FLOAT_V_T vr, v0, v_zero; + unsigned int gvl = 0; + FLOAT scale = 0.0, ssq = 0.0; + MASK_T mask; + BLASLONG index = 0; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + v_zero = VFMVVF_FLOAT(0, gvl); + for(i=0,j=0; i + +#define KERNEL16x4_I \ + "addi t1, %[PB], 1*4 \n\t"\ + "addi t2, %[PB], 2*4 \n\t"\ + "addi t3, %[PB], 3*4 \n\t"\ + "flw ft0, (%[PB]) \n\t"\ + "flw ft1, (t1) \n\t"\ + "flw ft2, (t2) \n\t"\ + "flw ft3, (t3) \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi t4, %[PA], 4*4 \n\t"\ + "addi t5, %[PA], 8*4 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "addi t6, %[PA], 12*4 \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmv.v.f v9, ft1 \n\t"\ + "vle.v v2, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vle.v v3, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "vfmv.v.f v10, ft2 \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vfmv.v.f v11, ft3 \n\t"\ + "vfmacc.vv v16, v8, v0 \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "vle.v v6, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vfmacc.vv v18, v8, v2 \n\t"\ + "addi t3, t3, 4*4 \n\t"\ + "vle.v v7, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "vfmacc.vv v19, v8, v3 \n\t"\ + "flw ft4, (%[PB]) \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "flw ft5, (t1) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "flw ft6, (t2) \n\t"\ + "vfmacc.vv v22, v9, v2 \n\t"\ + "flw ft7, (t3) \n\t"\ + "vfmacc.vv v23, v9, v3 \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "vfmacc.vv v24, v10, v0 \n\t"\ + "vfmv.v.f v13, ft5 \n\t"\ + "vfmacc.vv v25, v10, v1 \n\t"\ + "vfmv.v.f v14, ft6 \n\t"\ + "vfmacc.vv v26, v10, v2 \n\t"\ + "vfmv.v.f v15, ft7 \n\t"\ + "vfmacc.vv v27, v10, v3 \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "vfmacc.vv v28, v11, v0 \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vfmacc.vv v29, v11, v1 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "vfmacc.vv v30, v11, v2 \n\t"\ + "addi t3, t3, 4*4 \n\t"\ + "vfmacc.vv v31, v11, v3 \n\t" + +#define KERNEL16x4_M1 \ + "vfmacc.vv v16, v8, v0 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmacc.vv v18, v8, v2 \n\t"\ + "vle.v v6, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vfmacc.vv v19, v8, v3 \n\t"\ + "vle.v v7, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "flw ft4, (%[PB]) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "flw ft5, (t1) \n\t"\ + "vfmacc.vv v22, v9, v2 \n\t"\ + "flw ft6, (t2) \n\t"\ + "vfmacc.vv v23, v9, v3 \n\t"\ + "flw ft7, (t3) \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "vfmacc.vv v24, v10, v0 \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vfmacc.vv v25, v10, v1 \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "vfmacc.vv v26, v10, v2 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "vfmacc.vv v27, v10, v3 \n\t"\ + "vfmv.v.f v13, ft5 \n\t"\ + "vfmacc.vv v28, v11, v0 \n\t"\ + "addi t3, t3, 4*4 \n\t"\ + "vfmacc.vv v29, v11, v1 \n\t"\ + "vfmv.v.f v14, ft6 \n\t"\ + "vfmacc.vv v30, v11, v2 \n\t"\ + "vfmacc.vv v31, v11, v3 \n\t"\ + "vfmv.v.f v15, ft7 \n\t" + +#define KERNEL16x4_M2 \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmacc.vv v18, v12, v6 \n\t"\ + "vle.v v2, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vfmacc.vv v19, v12, v7 \n\t"\ + "vle.v v3, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "flw ft0, (%[PB]) \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "flw ft1, (t1) \n\t"\ + "vfmacc.vv v22, v13, v6 \n\t"\ + "flw ft2, (t2) \n\t"\ + "vfmacc.vv v23, v13, v7 \n\t"\ + "flw ft3, (t3) \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "vfmacc.vv v24, v14, v4 \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vfmacc.vv v25, v14, v5 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "vfmacc.vv v26, v14, v6 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "vfmacc.vv v27, v14, v7 \n\t"\ + "vfmv.v.f v9, ft1 \n\t"\ + "vfmacc.vv v28, v15, v4 \n\t"\ + "addi t3, t3, 4*4 \n\t"\ + "vfmacc.vv v29, v15, v5 \n\t"\ + "vfmv.v.f v10, ft2 \n\t"\ + "vfmacc.vv v30, v15, v6 \n\t"\ + "vfmacc.vv v31, v15, v7 \n\t"\ + "vfmv.v.f v11, ft3 \n\t" + +#define KERNEL16x4_E \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vfmacc.vv v18, v12, v6 \n\t"\ + "vfmacc.vv v19, v12, v7 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "vfmacc.vv v22, v13, v6 \n\t"\ + "vfmacc.vv v23, v13, v7 \n\t"\ + "vfmacc.vv v24, v14, v4 \n\t"\ + "vfmacc.vv v25, v14, v5 \n\t"\ + "vfmacc.vv v26, v14, v6 \n\t"\ + "vfmacc.vv v27, v14, v7 \n\t"\ + "vfmacc.vv v28, v15, v4 \n\t"\ + "vfmacc.vv v29, v15, v5 \n\t"\ + "vfmacc.vv v30, v15, v6 \n\t"\ + "vfmacc.vv v31, v15, v7 \n\t" + + +#define KERNEL8x4_I \ + "addi t1, %[PB], 1*4 \n\t"\ + "addi t2, %[PB], 2*4 \n\t"\ + "addi t3, %[PB], 3*4 \n\t"\ + "flw ft0, (%[PB]) \n\t"\ + "flw ft1, (t1) \n\t"\ + "flw ft2, (t2) \n\t"\ + "flw ft3, (t3) \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi t4, %[PA], 4*4 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "addi %[PA], %[PA], 8*4 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 8*4 \n\t"\ + "vfmv.v.f v9, ft1 \n\t"\ + "vfmv.v.f v10, ft2 \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 8*4 \n\t"\ + "vfmv.v.f v11, ft3 \n\t"\ + "vfmacc.vv v16, v8, v0 \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 8*4 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "flw ft4, (%[PB]) \n\t"\ + "addi t3, t3, 4*4 \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "flw ft5, (t1) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "flw ft6, (t2) \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "flw ft7, (t3) \n\t"\ + "vfmacc.vv v24, v10, v0 \n\t"\ + "vfmv.v.f v13, ft5 \n\t"\ + "vfmacc.vv v25, v10, v1 \n\t"\ + "vfmv.v.f v14, ft6 \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "vfmv.v.f v15, ft7 \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vfmacc.vv v28, v11, v0 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "vfmacc.vv v29, v11, v1 \n\t"\ + "addi t3, t3, 4*4 \n\t" + + +#define KERNEL8x4_M1 \ + "vfmacc.vv v16, v8, v0 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 8*4 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 8*4 \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "flw ft4, (%[PB]) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "flw ft5, (t1) \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "flw ft6, (t2) \n\t"\ + "vfmacc.vv v24, v10, v0 \n\t"\ + "flw ft7, (t3) \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vfmacc.vv v25, v10, v1 \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "vfmv.v.f v13, ft5 \n\t"\ + "vfmacc.vv v28, v11, v0 \n\t"\ + "addi t3, t3, 4*4 \n\t"\ + "vfmacc.vv v29, v11, v1 \n\t"\ + "vfmv.v.f v14, ft6 \n\t"\ + "vfmv.v.f v15, ft7 \n\t" + +#define KERNEL8x4_M2 \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 8*4 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 8*4 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "flw ft0, (%[PB]) \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "flw ft1, (t1) \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "flw ft2, (t2) \n\t"\ + "vfmacc.vv v24, v14, v4 \n\t"\ + "flw ft3, (t3) \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vfmacc.vv v25, v14, v5 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "vfmv.v.f v9, ft1 \n\t"\ + "vfmacc.vv v28, v15, v4 \n\t"\ + "addi t3, t3, 4*4 \n\t"\ + "vfmacc.vv v29, v15, v5 \n\t"\ + "vfmv.v.f v10, ft2 \n\t"\ + "vfmv.v.f v11, ft3 \n\t" + +#define KERNEL8x4_E \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "vfmacc.vv v24, v14, v4 \n\t"\ + "vfmacc.vv v25, v14, v5 \n\t"\ + "vfmacc.vv v28, v15, v4 \n\t"\ + "vfmacc.vv v29, v15, v5 \n\t" + + +#define KERNEL16x2_I \ + "addi t1, %[PB], 1*4 \n\t"\ + "flw ft0, (%[PB]) \n\t"\ + "flw ft1, (t1) \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi t4, %[PA], 4*4 \n\t"\ + "addi t5, %[PA], 8*4 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "addi t6, %[PA], 12*4 \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmv.v.f v9, ft1 \n\t"\ + "vle.v v2, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vle.v v3, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "addi %[PB], %[PB], 2*4 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vfmacc.vv v16, v8, v0 \n\t"\ + "addi t1, t1, 2*4 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "vle.v v6, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vfmacc.vv v18, v8, v2 \n\t"\ + "vle.v v7, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "vfmacc.vv v19, v8, v3 \n\t"\ + "flw ft4, (%[PB]) \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "flw ft5, (t1) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "addi %[PB], %[PB], 2*4 \n\t"\ + "vfmacc.vv v22, v9, v2 \n\t"\ + "addi t1, t1, 2*4 \n\t"\ + "vfmacc.vv v23, v9, v3 \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "vfmv.v.f v13, ft5 \n\t" + + +#define KERNEL16x2_M1 \ + "vfmacc.vv v16, v8, v0 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmacc.vv v18, v8, v2 \n\t"\ + "vle.v v6, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vfmacc.vv v19, v8, v3 \n\t"\ + "vle.v v7, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "flw ft4, (%[PB]) \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "flw ft5, (t1) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "vfmacc.vv v22, v9, v2 \n\t"\ + "addi t1, t1, 2*4 \n\t"\ + "vfmacc.vv v23, v9, v3 \n\t"\ + "addi %[PB], %[PB], 2*4 \n\t"\ + "vfmv.v.f v13, ft5 \n\t" + + +#define KERNEL16x2_M2 \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmacc.vv v18, v12, v6 \n\t"\ + "vle.v v2, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vfmacc.vv v19, v12, v7 \n\t"\ + "vle.v v3, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "flw ft0, (%[PB]) \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "flw ft1, (t1) \n\t"\ + "vfmacc.vv v22, v13, v6 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "vfmacc.vv v23, v13, v7 \n\t"\ + "addi %[PB], %[PB], 2*4 \n\t"\ + "addi t1, t1, 2*4 \n\t"\ + "vfmv.v.f v9, ft1 \n\t" + + +#define KERNEL16x2_E \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vfmacc.vv v18, v12, v6 \n\t"\ + "vfmacc.vv v19, v12, v7 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "vfmacc.vv v22, v13, v6 \n\t"\ + "vfmacc.vv v23, v13, v7 \n\t" + + +int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc +#ifdef TRMMKERNEL + ,BLASLONG offset +#endif + ) +{ + BLASLONG i,j,k; + FLOAT *C0,*C1,*C2,*C3; + FLOAT *ptrba,*ptrbb; + + FLOAT loadb0,loadb1,loadb2,loadb3; + FLOAT load0,load1,load2,load3,load4,load5,load6,load7; + + FLOAT res0,res1,res2,res3; + FLOAT res4,res5,res6,res7; + FLOAT res8,res9,res10,res11; + FLOAT res12,res13,res14,res15; + + for (j=0; j + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp; + + if ( n < 0 ) return(0); + + while(i < n) + { + + temp = x[ix] ; + x[ix] = y[iy] ; + y[iy] = temp ; + + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/riscv64/swap_vector.c b/kernel/riscv64/swap_vector.c new file mode 100644 index 000000000..9377bf4b9 --- /dev/null +++ b/kernel/riscv64/swap_vector.c @@ -0,0 +1,173 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VSEV_FLOAT vsev_float32xm8 +#define VSSEV_FLOAT vssev_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VSEV_FLOAT vsev_float64xm8 +#define VSSEV_FLOAT vssev_float64xm8 +#endif + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i = 0, j = 0; + BLASLONG ix = 0,iy = 0; + BLASLONG stride_x, stride_y; + FLOAT_V_T vx0, vx1, vy0, vy1; + unsigned int gvl = 0; + + if (n < 0) return(0); + if(inc_x == 1 && inc_y == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + if(gvl <= n/2){ + for(i=0,j=0; i 0){ + gvl = vsetvli(len, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < len / gvl; k++){ + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VLEV_FLOAT(&y[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < m){ + gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); + vy = VLEV_FLOAT(&y[i], gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[j] += alpha * temp2; + a_ptr += lda; + } + }else if(inc_x == 1){ + jy = 0; + stride_y = inc_y * sizeof(FLOAT); + for (j=0; j 0){ + gvl = vsetvli(len, RVV_EFLOAT, RVV_M); + inc_yv = inc_y * gvl; + vr = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < len / gvl; k++){ + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + iy += inc_yv; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < m){ + gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[jy] += alpha * temp2; + jy += inc_y; + a_ptr += lda; + } + }else if(inc_y == 1){ + jx = 0; + stride_x = inc_x * sizeof(FLOAT); + for (j=0; j 0){ + gvl = vsetvli(len, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + inc_xv = inc_x * gvl; + for(k = 0; k < len / gvl; k++){ + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VLEV_FLOAT(&y[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + ix += inc_xv; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < m){ + gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); + vy = VLEV_FLOAT(&y[i], gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[j] += alpha * temp2; + jx += inc_x; + a_ptr += lda; + } + }else{ + stride_x = inc_x * sizeof(FLOAT); + stride_y = inc_y * sizeof(FLOAT); + jx = 0; + jy = 0; + for (j=0; j 0){ + gvl = vsetvli(len, RVV_EFLOAT, RVV_M); + inc_xv = inc_x * gvl; + inc_yv = inc_y * gvl; + vr = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < len / gvl; k++){ + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + ix += inc_xv; + iy += inc_yv; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < m){ + gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[jy] += alpha * temp2; + jx += inc_x; + jy += inc_y; + a_ptr += lda; + } + } + return(0); +} + diff --git a/kernel/riscv64/symv_U.c b/kernel/riscv64/symv_U.c new file mode 100644 index 000000000..b5a0c96e9 --- /dev/null +++ b/kernel/riscv64/symv_U.c @@ -0,0 +1,71 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +{ + BLASLONG i; + BLASLONG ix,iy; + BLASLONG jx,jy; + BLASLONG j; + FLOAT temp1; + FLOAT temp2; + +#if 0 + if( m != offset ) + printf("Symv_U: m=%d offset=%d\n",m,offset); +#endif + + BLASLONG m1 = m - offset; + + jx = m1 * inc_x; + jy = m1 * inc_y; + + for (j=m1; j 0){ + i = 0; + gvl = vsetvli(j, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < j / gvl; k++){ + vy = VLEV_FLOAT(&y[i], gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < j){ + gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); + vy = VLEV_FLOAT(&y[i], gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[j] += temp1 * a_ptr[j] + alpha * temp2; + a_ptr += lda; + } + }else if(inc_x == 1){ + jy = m1 * inc_y; + a_ptr += m1 * lda; + stride_y = inc_y * sizeof(FLOAT); + for (j=m1; j 0){ + iy = 0; + i = 0; + gvl = vsetvli(j, RVV_EFLOAT, RVV_M); + inc_yv = inc_y * gvl; + vr = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < j / gvl; k++){ + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + iy += inc_yv; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < j){ + gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[jy] += temp1 * a_ptr[j] + alpha * temp2; + a_ptr += lda; + jy += inc_y; + } + }else if(inc_y == 1){ + jx = m1 * inc_x; + a_ptr += m1 * lda; + stride_x = inc_x * sizeof(FLOAT); + for (j=m1; j 0){ + ix = 0; + i = 0; + gvl = vsetvli(j, RVV_EFLOAT, RVV_M); + inc_xv = inc_x * gvl; + vr = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < j / gvl; k++){ + vy = VLEV_FLOAT(&y[i], gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + ix += inc_xv; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < j){ + gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); + vy = VLEV_FLOAT(&y[i], gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[j] += temp1 * a_ptr[j] + alpha * temp2; + a_ptr += lda; + jx += inc_x; + } + }else{ + jx = m1 * inc_x; + jy = m1 * inc_y; + a_ptr += m1 * lda; + stride_x = inc_x * sizeof(FLOAT); + stride_y = inc_y * sizeof(FLOAT); + for (j=m1; j 0){ + ix = 0; + iy = 0; + i = 0; + gvl = vsetvli(j, RVV_EFLOAT, RVV_M); + inc_xv = inc_x * gvl; + inc_yv = inc_y * gvl; + vr = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < j / gvl; k++){ + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + ix += inc_xv; + iy += inc_yv; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < j){ + gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[jy] += temp1 * a_ptr[j] + alpha * temp2; + a_ptr += lda; + jx += inc_x; + jy += inc_y; + } + } + return(0); +} + diff --git a/kernel/riscv64/zamax.c b/kernel/riscv64/zamax.c new file mode 100644 index 000000000..a39bd7821 --- /dev/null +++ b/kernel/riscv64/zamax.c @@ -0,0 +1,79 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(0.0); + + inc_x2 = 2 * inc_x; + + maxf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) > maxf ) + { + maxf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(maxf); +} + + diff --git a/kernel/riscv64/zamax_vector.c b/kernel/riscv64/zamax_vector.c new file mode 100644 index 000000000..a6c742b14 --- /dev/null +++ b/kernel/riscv64/zamax_vector.c @@ -0,0 +1,104 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMAXVV_FLOAT vfmaxvv_float32xm8 +#define VFADDVV_FLOAT vfaddvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMAXVV_FLOAT vfmaxvv_float64xm8 +#define VFADDVV_FLOAT vfaddvv_float64xm8 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + if (n <= 0 || inc_x <= 0) return(maxf); + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_max; + + MASK_T mask0, mask1; + BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2; + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_max = VFMVVF_FLOAT(0, gvl); + BLASLONG inc_xv = inc_x * gvl * 2; + for(; i maxf) + maxf = v_max[0]; + } + return(maxf); +} diff --git a/kernel/riscv64/zamin.c b/kernel/riscv64/zamin.c new file mode 100644 index 000000000..02eab3e75 --- /dev/null +++ b/kernel/riscv64/zamin.c @@ -0,0 +1,79 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(0.0); + + inc_x2 = 2 * inc_x; + + minf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) < minf ) + { + minf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(minf); +} + + diff --git a/kernel/riscv64/zamin_vector.c b/kernel/riscv64/zamin_vector.c new file mode 100644 index 000000000..44a7cf1dc --- /dev/null +++ b/kernel/riscv64/zamin_vector.c @@ -0,0 +1,104 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include + +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMINVS_FLOAT vfredminvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMINVV_FLOAT vfminvv_float32xm8 +#define VFADDVV_FLOAT vfaddvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMINVS_FLOAT vfredminvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMINVV_FLOAT vfminvv_float64xm8 +#define VFADDVV_FLOAT vfaddvv_float64xm8 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + BLASLONG ix=0; + if (n <= 0 || inc_x <= 0) return(0.0); + FLOAT minf=FLT_MAX; + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_min; + MASK_T mask0, mask1; + BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2; + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + BLASLONG inc_xv = inc_x * gvl * 2; + for(; i + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT sumf = 0.0; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(sumf); + + inc_x2 = 2 * inc_x; + + n *= inc_x2; + while(i < n) + { + sumf += CABS1(x,i); + i += inc_x2; + } + return(sumf); +} + + diff --git a/kernel/riscv64/zasum_vector.c b/kernel/riscv64/zasum_vector.c new file mode 100644 index 000000000..d9fa88971 --- /dev/null +++ b/kernel/riscv64/zasum_vector.c @@ -0,0 +1,136 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDSUMVS_FLOAT vfredsumvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFADDVV_FLOAT vfaddvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDSUMVS_FLOAT vfredsumvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFADDVV_FLOAT vfaddvv_float64xm8 +#endif +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + BLASLONG ix=0; + FLOAT asumf=0.0; + if (n <= 0 || inc_x <= 0) return(asumf); + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_zero,v_sum; + + MASK_T mask0, mask1; + if(inc_x == 1){ + BLASLONG n2 = n * 2; + gvl = vsetvli(n2, RVV_EFLOAT, RVV_M); + v_zero = VFMVVF_FLOAT(0, gvl); + if(gvl <= n2/2){ + v_sum = VFMVVF_FLOAT(0, gvl); + for(i=0,j=0; i 0){ + gvl = vsetvli(len, RVV_EFLOAT, RVV_M); + inc_xv = incx * gvl * 2; + inc_yv = incy * gvl * 2; + inc_av = gvl * 2; + vr0 = VFMVVF_FLOAT(0, gvl); + vr1 = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < len / gvl; k++){ + va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl); + va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl); + vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); + vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); +#ifndef HEMVREV + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#else + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#endif + VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); + VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); + + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); +#ifndef HEMVREV + vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl); + vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl); + vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl); +#else + vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl); + vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl); + +#endif + i += gvl; + ix += inc_xv; + iy += inc_yv; + ia += inc_av; + } + va0 = VFMVVF_FLOAT(0, gvl); + vx0 = VFREDSUM_FLOAT(vr0, va0, gvl); + temp_r2 = vx0[0]; + vx1 = VFREDSUM_FLOAT(vr1, va0, gvl); + temp_i2 = vx1[0]; + if(i < m){ + gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); + va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl); + va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl); + vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); + vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); +#ifndef HEMVREV + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#else + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#endif + VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); + VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); + + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); +#ifndef HEMVREV + vr0 = VFMULVV_FLOAT(vx0, va0, gvl); + vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMULVV_FLOAT(vx1, va0, gvl); + vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl); +#else + vr0 = VFMULVV_FLOAT(vx0, va0, gvl); + vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMULVV_FLOAT(vx1, va0, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl); +#endif + + va0 = VFMVVF_FLOAT(0, gvl); + vx0 = VFREDSUM_FLOAT(vr0, va0, gvl); + temp_r2 += vx0[0]; + vx1 = VFREDSUM_FLOAT(vr1, va0, gvl); + temp_i2 += vx1[0]; + } + } + y[jy] += alpha_r * temp_r2 - alpha_i * temp_i2; + y[jy+1] += alpha_r * temp_i2 + alpha_i * temp_r2; + jx += inc_x2; + jy += inc_y2; + ja += 2; + a_ptr += lda2; + } + return(0); +} diff --git a/kernel/riscv64/zhemv_UV_vector.c b/kernel/riscv64/zhemv_UV_vector.c new file mode 100644 index 000000000..6fe12c76c --- /dev/null +++ b/kernel/riscv64/zhemv_UV_vector.c @@ -0,0 +1,192 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M4 +#define FLOAT_V_T float32xm4_t +#define VLSEV_FLOAT vlsev_float32xm4 +#define VSSEV_FLOAT vssev_float32xm4 +#define VFREDSUM_FLOAT vfredsumvs_float32xm4 +#define VFMACCVV_FLOAT vfmaccvv_float32xm4 +#define VFMACCVF_FLOAT vfmaccvf_float32xm4 +#define VFMVVF_FLOAT vfmvvf_float32xm4 +#define VFMULVV_FLOAT vfmulvv_float32xm4 +#define VFNMSACVF_FLOAT vfnmsacvf_float32xm4 +#define VFNMSACVV_FLOAT vfnmsacvv_float32xm4 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M4 +#define FLOAT_V_T float64xm4_t +#define VLSEV_FLOAT vlsev_float64xm4 +#define VSSEV_FLOAT vssev_float64xm4 +#define VFREDSUM_FLOAT vfredsumvs_float64xm4 +#define VFMACCVV_FLOAT vfmaccvv_float64xm4 +#define VFMACCVF_FLOAT vfmaccvf_float64xm4 +#define VFMVVF_FLOAT vfmvvf_float64xm4 +#define VFMULVV_FLOAT vfmulvv_float64xm4 +#define VFNMSACVF_FLOAT vfnmsacvf_float64xm4 +#define VFNMSACVV_FLOAT vfnmsacvv_float64xm4 +#endif + +int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){ + BLASLONG i, j, k; + BLASLONG ix, iy, ia; + BLASLONG jx, jy, ja; + FLOAT temp_r1, temp_i1; + FLOAT temp_r2, temp_i2; + FLOAT *a_ptr = a; + unsigned int gvl = 0; + + + FLOAT_V_T va0, va1, vx0, vx1, vy0, vy1, vr0, vr1; + BLASLONG stride_x, stride_y, stride_a, inc_xv, inc_yv, inc_av, lda2; + + BLASLONG inc_x2 = incx * 2; + BLASLONG inc_y2 = incy * 2; + stride_x = inc_x2 * sizeof(FLOAT); + stride_y = inc_y2 * sizeof(FLOAT); + stride_a = 2 * sizeof(FLOAT); + lda2 = lda * 2; + + BLASLONG m1 = m - offset; + a_ptr = a + m1 * lda2; + jx = m1 * inc_x2; + jy = m1 * inc_y2; + ja = m1 * 2; + for(j = m1; j < m; j++){ + temp_r1 = alpha_r * x[jx] - alpha_i * x[jx+1];; + temp_i1 = alpha_r * x[jx+1] + alpha_i * x[jx]; + temp_r2 = 0; + temp_i2 = 0; + ix = 0; + iy = 0; + ia = 0; + i = 0; + if(j > 0){ + gvl = vsetvli(j, RVV_EFLOAT, RVV_M); + inc_xv = incx * gvl * 2; + inc_yv = incy * gvl * 2; + inc_av = gvl * 2; + vr0 = VFMVVF_FLOAT(0, gvl); + vr1 = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < j / gvl; k++){ + va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl); + va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl); + vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); + vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); +#ifndef HEMVREV + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#else + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#endif + VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); + VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); + + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); +#ifndef HEMVREV + vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl); + vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl); + vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl); +#else + vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl); + vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl); + +#endif + i += gvl; + ix += inc_xv; + iy += inc_yv; + ia += inc_av; + } + va0 = VFMVVF_FLOAT(0, gvl); + vx0 = VFREDSUM_FLOAT(vr0, va0, gvl); + temp_r2 = vx0[0]; + vx1 = VFREDSUM_FLOAT(vr1, va0, gvl); + temp_i2 = vx1[0]; + if(i < j){ + gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); + va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl); + va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl); + vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); + vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); +#ifndef HEMVREV + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#else + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#endif + VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); + VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); + + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); +#ifndef HEMVREV + vr0 = VFMULVV_FLOAT(vx0, va0, gvl); + vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMULVV_FLOAT(vx1, va0, gvl); + vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl); +#else + vr0 = VFMULVV_FLOAT(vx0, va0, gvl); + vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMULVV_FLOAT(vx1, va0, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl); +#endif + + va0 = VFMVVF_FLOAT(0, gvl); + vx0 = VFREDSUM_FLOAT(vr0, va0, gvl); + temp_r2 += vx0[0]; + vx1 = VFREDSUM_FLOAT(vr1, va0, gvl); + temp_i2 += vx1[0]; + } + } + y[jy] += temp_r1 * a_ptr[ja]; + y[jy+1] += temp_i1 * a_ptr[ja]; + y[jy] += alpha_r * temp_r2 - alpha_i * temp_i2; + y[jy+1] += alpha_r * temp_i2 + alpha_i * temp_r2; + jx += inc_x2; + jy += inc_y2; + ja += 2; + a_ptr += lda2; + } + return(0); +} diff --git a/kernel/riscv64/znrm2.c b/kernel/riscv64/znrm2.c new file mode 100644 index 000000000..fc1c8b54a --- /dev/null +++ b/kernel/riscv64/znrm2.c @@ -0,0 +1,106 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/13 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT scale = 0.0; + FLOAT ssq = 1.0; + BLASLONG inc_x2; + FLOAT temp; + + if (n <= 0 || inc_x <= 0) return(0.0); + + inc_x2 = 2 * inc_x; + + n *= inc_x2; + while(i < n) + { + + if ( x[i] != 0.0 ) + { + temp = ABS( x[i] ); + if ( scale < temp ) + { + ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); + scale = temp ; + } + else + { + ssq += ( temp / scale ) * ( temp / scale ); + } + + } + + if ( x[i+1] != 0.0 ) + { + temp = ABS( x[i+1] ); + if ( scale < temp ) + { + ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); + scale = temp ; + } + else + { + ssq += ( temp / scale ) * ( temp / scale ); + } + + } + + + i += inc_x2; + } + scale = scale * sqrt( ssq ); + return(scale); + +} + + diff --git a/kernel/riscv64/znrm2_vector.c b/kernel/riscv64/znrm2_vector.c new file mode 100644 index 000000000..b0ebfa5f4 --- /dev/null +++ b/kernel/riscv64/znrm2_vector.c @@ -0,0 +1,278 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M4 +#define FLOAT_V_T float32xm4_t +#define VLEV_FLOAT vlev_float32xm4 +#define VLSEV_FLOAT vlsev_float32xm4 +#define VFREDSUM_FLOAT vfredsumvs_float32xm4 +#define VFMACCVV_FLOAT vfmaccvv_float32xm4 +#define VFMVVF_FLOAT vfmvvf_float32xm4 +#define VFDOTVV_FLOAT vfdotvv_float32xm4 +#define ABS fabsf +#define MASK_T e32xm4_t +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm4 +#define VMFGTVF_FLOAT vmfgtvf_e32xm4_float32xm4 +#define VMFIRSTM vmfirstm_e32xm4 +#define VFDIVVF_FLOAT vfdivvf_float32xm4 +#define VMFLTVF_FLOAT vmfltvf_e32xm4_float32xm4 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm4 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M4 +#define FLOAT_V_T float64xm4_t +#define VLEV_FLOAT vlev_float64xm4 +#define VLSEV_FLOAT vlsev_float64xm4 +#define VFREDSUM_FLOAT vfredsumvs_float64xm4 +#define VFMACCVV_FLOAT vfmaccvv_float64xm4 +#define VFMVVF_FLOAT vfmvvf_float64xm4 +#define VFDOTVV_FLOAT vfdotvv_float64xm4 +#define ABS fabs +#define MASK_T e64xm4_t +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm4 +#define VMFGTVF_FLOAT vmfgtvf_e64xm4_float64xm4 +#define VMFIRSTM vmfirstm_e64xm4 +#define VFDIVVF_FLOAT vfdivvf_float64xm4 +#define VMFLTVF_FLOAT vmfltvf_e64xm4_float64xm4 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm4 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + + if ( n < 0 ) return(0.0); +// if(n == 1) return (ABS(x[0])); + + FLOAT_V_T vr, v0, v_zero; + unsigned int gvl = 0; + FLOAT scale = 0.0, ssq = 0.0; + MASK_T mask; + BLASLONG index = 0; + if(inc_x == 1){ + BLASLONG n2 = n * 2; + gvl = vsetvli(n2, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + v_zero = VFMVVF_FLOAT(0, gvl); + for(i=0,j=0; i + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dummy4, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp[2]; + BLASLONG inc_x2; + BLASLONG inc_y2; + + if ( n < 0 ) return(0); + + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; + + while(i < n) + { + + temp[0] = x[ix] ; + temp[1] = x[ix+1] ; + x[ix] = y[iy] ; + x[ix+1] = y[iy+1] ; + y[iy] = temp[0] ; + y[iy+1] = temp[1] ; + + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/riscv64/zswap_vector.c b/kernel/riscv64/zswap_vector.c new file mode 100644 index 000000000..b655a968c --- /dev/null +++ b/kernel/riscv64/zswap_vector.c @@ -0,0 +1,117 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VSEV_FLOAT vsev_float32xm8 +#define VSSEV_FLOAT vssev_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VSEV_FLOAT vsev_float64xm8 +#define VSSEV_FLOAT vssev_float64xm8 +#endif + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dummy4, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i = 0, j = 0; + BLASLONG ix = 0,iy = 0; + BLASLONG stride_x, stride_y; + FLOAT_V_T vx0, vx1, vy0, vy1; + unsigned int gvl = 0; + + if (n < 0) return(0); + if(inc_x == 1 && inc_y == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + BLASLONG n2 = n * 2; + if(gvl <= n2/2){ + for(i=0,j=0; i Date: Mon, 2 Nov 2020 13:04:53 +0000 Subject: [PATCH 09/22] allow setting soname without suffix or prefix Allows to create a library with a different SONAME without the need to add suffixes to symbols Backwards compatible and should have no effect on the workflow and previous users. Useful for allowing INTERFACE64 library alongside the standard library without file conflicts --- Makefile.install | 16 ++++++++-------- Makefile.system | 8 ++++++-- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Makefile.install b/Makefile.install index 7c1a3ca43..e8b64465f 100644 --- a/Makefile.install +++ b/Makefile.install @@ -9,7 +9,7 @@ OPENBLAS_INCLUDE_DIR := $(PREFIX)/include OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib OPENBLAS_BINARY_DIR := $(PREFIX)/bin OPENBLAS_BUILD_DIR := $(CURDIR) -OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas +OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/$(LIBSONAMEBASE) OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig @@ -150,13 +150,13 @@ endif endif #Generating openblas.pc - @echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)" - @echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" - @echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" - @echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" - @echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" - @echo 'extralib='$(PKG_EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" - @cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" + @echo Generating $(LIBSONAMEBASE).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)" + @echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" + @echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" + @echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" + @echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" + @echo 'extralib='$(PKG_EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" + @cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" #Generating OpenBLASConfig.cmake diff --git a/Makefile.system b/Makefile.system index 52d3e2cdc..afbdb6bab 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1263,10 +1263,14 @@ ifndef SYMBOLSUFFIX SYMBOLSUFFIX = endif +ifndef LIBSONAMEBASE +LIBSONAMEBASE = openblas +endif + ifndef LIBNAMESUFFIX -LIBNAMEBASE = $(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) +LIBNAMEBASE = $(SYMBOLPREFIX)$(LIBSONAMEBASE)$(SYMBOLSUFFIX) else -LIBNAMEBASE = $(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX) +LIBNAMEBASE = $(SYMBOLPREFIX)$(LIBSONAMEBASE)$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX) endif ifeq ($(OSNAME), CYGWIN_NT) From c4c591ac5afc10b5619d1c58b10d5095dc82a2ff Mon Sep 17 00:00:00 2001 From: Qiyu8 Date: Tue, 10 Nov 2020 16:16:38 +0800 Subject: [PATCH 10/22] fix sum optimize issues --- kernel/arm/sum.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/kernel/arm/sum.c b/kernel/arm/sum.c index 63584b95c..a486a1868 100644 --- a/kernel/arm/sum.c +++ b/kernel/arm/sum.c @@ -42,24 +42,27 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) n *= inc_x; if (inc_x == 1) { -#if V_SIMD +#if V_SIMD && (!defined(DOUBLE) || (defined(DOUBLE) && V_SIMD_F64 && V_SIMD > 128)) #ifdef DOUBLE const int vstep = v_nlanes_f64; - const int unrollx2 = n & (-vstep * 2); + const int unrollx4 = n & (-vstep * 4); const int unrollx = n & -vstep; v_f64 vsum0 = v_zero_f64(); v_f64 vsum1 = v_zero_f64(); - while (i < unrollx2) - { - vsum0 = v_add_f64(vsum0, v_loadu_f64(x)); - vsum1 = v_add_f64(vsum1, v_loadu_f64(x + vstep)); - i += vstep * 2; - } - vsum0 = v_add_f64(vsum0, vsum1); - while (i < unrollx) + v_f64 vsum2 = v_zero_f64(); + v_f64 vsum3 = v_zero_f64(); + for (; i < unrollx4; i += vstep * 4) + { + vsum0 = v_add_f64(vsum0, v_loadu_f64(x + i)); + vsum1 = v_add_f64(vsum1, v_loadu_f64(x + i + vstep)); + vsum2 = v_add_f64(vsum2, v_loadu_f64(x + i + vstep * 2)); + vsum3 = v_add_f64(vsum3, v_loadu_f64(x + i + vstep * 3)); + } + vsum0 = v_add_f64( + v_add_f64(vsum0, vsum1), v_add_f64(vsum2, vsum3)); + for (; i < unrollx; i += vstep) { vsum0 = v_add_f64(vsum0, v_loadu_f64(x + i)); - i += vstep; } sumf = v_sum_f64(vsum0); #else @@ -70,20 +73,18 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v_f32 vsum1 = v_zero_f32(); v_f32 vsum2 = v_zero_f32(); v_f32 vsum3 = v_zero_f32(); - while (i < unrollx4) + for (; i < unrollx4; i += vstep * 4) { - vsum0 = v_add_f32(vsum0, v_loadu_f32(x)); - vsum1 = v_add_f32(vsum1, v_loadu_f32(x + vstep)); - vsum2 = v_add_f32(vsum2, v_loadu_f32(x + vstep * 2)); - vsum3 = v_add_f32(vsum3, v_loadu_f32(x + vstep * 3)); - i += vstep * 4; + vsum0 = v_add_f32(vsum0, v_loadu_f32(x + i)); + vsum1 = v_add_f32(vsum1, v_loadu_f32(x + i + vstep)); + vsum2 = v_add_f32(vsum2, v_loadu_f32(x + i + vstep * 2)); + vsum3 = v_add_f32(vsum3, v_loadu_f32(x + i + vstep * 3)); } vsum0 = v_add_f32( v_add_f32(vsum0, vsum1), v_add_f32(vsum2, vsum3)); - while (i < unrollx) + for (; i < unrollx; i += vstep) { vsum0 = v_add_f32(vsum0, v_loadu_f32(x + i)); - i += vstep; } sumf = v_sum_f32(vsum0); #endif From 8c0b206d4cf9909017a52919a41406ee303f472e Mon Sep 17 00:00:00 2001 From: Qiyu8 Date: Wed, 11 Nov 2020 14:33:12 +0800 Subject: [PATCH 11/22] Optimize the performance of rot by using universal intrinsics --- kernel/simd/intrin_avx.h | 10 ++++++ kernel/simd/intrin_avx512.h | 5 +++ kernel/simd/intrin_neon.h | 10 ++++++ kernel/simd/intrin_sse.h | 13 +++++++ kernel/x86_64/drot.c | 68 ++++++++++++++++++++++++++++++++++- kernel/x86_64/srot.c | 70 ++++++++++++++++++++++++++++++++++++- 6 files changed, 174 insertions(+), 2 deletions(-) diff --git a/kernel/simd/intrin_avx.h b/kernel/simd/intrin_avx.h index 3f79646e0..fbe531417 100644 --- a/kernel/simd/intrin_avx.h +++ b/kernel/simd/intrin_avx.h @@ -12,6 +12,8 @@ typedef __m256d v_f64; ***************************/ #define v_add_f32 _mm256_add_ps #define v_add_f64 _mm256_add_pd +#define v_sub_f32 _mm256_sub_ps +#define v_sub_f64 _mm256_sub_pd #define v_mul_f32 _mm256_mul_ps #define v_mul_f64 _mm256_mul_pd @@ -19,12 +21,20 @@ typedef __m256d v_f64; // multiply and add, a*b + c #define v_muladd_f32 _mm256_fmadd_ps #define v_muladd_f64 _mm256_fmadd_pd + // multiply and subtract, a*b - c + #define v_mulsub_f32 _mm256_fmsub_ps + #define v_mulsub_f64 _mm256_fmsub_pd #else // multiply and add, a*b + c BLAS_FINLINE v_f32 v_muladd_f32(v_f32 a, v_f32 b, v_f32 c) { return v_add_f32(v_mul_f32(a, b), c); } BLAS_FINLINE v_f64 v_muladd_f64(v_f64 a, v_f64 b, v_f64 c) { return v_add_f64(v_mul_f64(a, b), c); } + // multiply and subtract, a*b - c + BLAS_FINLINE v_f32 v_mulsub_f32(v_f32 a, v_f32 b, v_f32 c) + { return v_sub_f32(v_mul_f32(a, b), c); } + BLAS_FINLINE v_f64 v_mulsub_f64(v_f64 a, v_f64 b, v_f64 c) + { return v_sub_f64(v_mul_f64(a, b), c); } #endif // !HAVE_FMA3 // Horizontal add: Calculates the sum of all vector elements. diff --git a/kernel/simd/intrin_avx512.h b/kernel/simd/intrin_avx512.h index f00af53e9..8f38eedd9 100644 --- a/kernel/simd/intrin_avx512.h +++ b/kernel/simd/intrin_avx512.h @@ -12,11 +12,16 @@ typedef __m512d v_f64; ***************************/ #define v_add_f32 _mm512_add_ps #define v_add_f64 _mm512_add_pd +#define v_sub_f32 _mm512_sub_ps +#define v_sub_f64 _mm512_sub_pd #define v_mul_f32 _mm512_mul_ps #define v_mul_f64 _mm512_mul_pd // multiply and add, a*b + c #define v_muladd_f32 _mm512_fmadd_ps #define v_muladd_f64 _mm512_fmadd_pd +// multiply and subtract, a*b - c +#define v_mulsub_f32 _mm512_fmsub_ps +#define v_mulsub_f64 _mm512_fmsub_pd BLAS_FINLINE float v_sum_f32(v_f32 a) { __m512 h64 = _mm512_shuffle_f32x4(a, a, _MM_SHUFFLE(3, 2, 3, 2)); diff --git a/kernel/simd/intrin_neon.h b/kernel/simd/intrin_neon.h index 22cef10ca..cd44599fe 100644 --- a/kernel/simd/intrin_neon.h +++ b/kernel/simd/intrin_neon.h @@ -18,6 +18,8 @@ typedef float32x4_t v_f32; ***************************/ #define v_add_f32 vaddq_f32 #define v_add_f64 vaddq_f64 +#define v_sub_f32 vsubq_f32 +#define v_sub_f64 vsubq_f64 #define v_mul_f32 vmulq_f32 #define v_mul_f64 vmulq_f64 @@ -26,16 +28,24 @@ typedef float32x4_t v_f32; // multiply and add, a*b + c BLAS_FINLINE v_f32 v_muladd_f32(v_f32 a, v_f32 b, v_f32 c) { return vfmaq_f32(c, a, b); } + // multiply and subtract, a*b - c + BLAS_FINLINE v_f32 v_mulsub_f32(v_f32 a, v_f32 b, v_f32 c) + { return vfmaq_f32(vnegq_f32(c), a, b); } #else // multiply and add, a*b + c BLAS_FINLINE v_f32 v_muladd_f32(v_f32 a, v_f32 b, v_f32 c) { return vmlaq_f32(c, a, b); } + // multiply and subtract, a*b - c + BLAS_FINLINE v_f32 v_mulsub_f32(v_f32 a, v_f32 b, v_f32 c) + { return vmlaq_f32(vnegq_f32(c), a, b); } #endif // FUSED F64 #if V_SIMD_F64 BLAS_FINLINE v_f64 v_muladd_f64(v_f64 a, v_f64 b, v_f64 c) { return vfmaq_f64(c, a, b); } + BLAS_FINLINE v_f64 v_mulsub_f64(v_f64 a, v_f64 b, v_f64 c) + { return vfmaq_f64(vnegq_f64(c), a, b); } #endif // Horizontal add: Calculates the sum of all vector elements. diff --git a/kernel/simd/intrin_sse.h b/kernel/simd/intrin_sse.h index 06a3fe78b..6a542072e 100644 --- a/kernel/simd/intrin_sse.h +++ b/kernel/simd/intrin_sse.h @@ -12,22 +12,35 @@ typedef __m128d v_f64; ***************************/ #define v_add_f32 _mm_add_ps #define v_add_f64 _mm_add_pd +#define v_sub_f32 _mm_sub_ps +#define v_sub_f64 _mm_sub_pd #define v_mul_f32 _mm_mul_ps #define v_mul_f64 _mm_mul_pd #ifdef HAVE_FMA3 // multiply and add, a*b + c #define v_muladd_f32 _mm_fmadd_ps #define v_muladd_f64 _mm_fmadd_pd + // multiply and subtract, a*b - c + #define v_mulsub_f32 _mm_fmsub_ps + #define v_mulsub_f64 _mm_fmsub_pd #elif defined(HAVE_FMA4) // multiply and add, a*b + c #define v_muladd_f32 _mm_macc_ps #define v_muladd_f64 _mm_macc_pd + // multiply and subtract, a*b - c + #define v_mulsub_f32 _mm_msub_ps + #define v_mulsub_f64 _mm_msub_pd #else // multiply and add, a*b + c BLAS_FINLINE v_f32 v_muladd_f32(v_f32 a, v_f32 b, v_f32 c) { return v_add_f32(v_mul_f32(a, b), c); } BLAS_FINLINE v_f64 v_muladd_f64(v_f64 a, v_f64 b, v_f64 c) { return v_add_f64(v_mul_f64(a, b), c); } + // multiply and subtract, a*b - c + BLAS_FINLINE v_f32 v_mulsub_f32(v_f32 a, v_f32 b, v_f32 c) + { return v_sub_f32(v_mul_f32(a, b), c); } + BLAS_FINLINE v_f64 v_mulsub_f64(v_f64 a, v_f64 b, v_f64 c) + { return v_sub_f64(v_mul_f64(a, b), c); } #endif // HAVE_FMA3 // Horizontal add: Calculates the sum of all vector elements. diff --git a/kernel/x86_64/drot.c b/kernel/x86_64/drot.c index a312b7ff9..66e9ff907 100644 --- a/kernel/x86_64/drot.c +++ b/kernel/x86_64/drot.c @@ -7,10 +7,76 @@ #endif #ifndef HAVE_DROT_KERNEL +#include "../simd/intrin.h" static void drot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s) { BLASLONG i = 0; +#if V_SIMD_F64 && V_SIMD > 256 + const int vstep = v_nlanes_f64; + const int unrollx4 = n & (-vstep * 4); + const int unrollx = n & -vstep; + + v_f64 __c = v_setall_f64(c); + v_f64 __s = v_setall_f64(s); + v_f64 vx0, vx1, vx2, vx3; + v_f64 vy0, vy1, vy2, vy3; + v_f64 vt0, vt1, vt2, vt3; + + for (; i < unrollx4; i += vstep * 4) { + vx0 = v_loadu_f64(x + i); + vx1 = v_loadu_f64(x + i + vstep); + vx2 = v_loadu_f64(x + i + vstep * 2); + vx3 = v_loadu_f64(x + i + vstep * 3); + vy0 = v_loadu_f64(y + i); + vy1 = v_loadu_f64(y + i + vstep); + vy2 = v_loadu_f64(y + i + vstep * 2); + vy3 = v_loadu_f64(y + i + vstep * 3); + + vt0 = v_mul_f64(__s, vy0); + vt1 = v_mul_f64(__s, vy1); + vt2 = v_mul_f64(__s, vy2); + vt3 = v_mul_f64(__s, vy3); + + vt0 = v_muladd_f64(__c, vx0, vt0); + vt1 = v_muladd_f64(__c, vx1, vt1); + vt2 = v_muladd_f64(__c, vx2, vt2); + vt3 = v_muladd_f64(__c, vx3, vt3); + + v_storeu_f64(x + i, vt0); + v_storeu_f64(x + i + vstep, vt1); + v_storeu_f64(x + i + vstep * 2, vt2); + v_storeu_f64(x + i + vstep * 3, vt3); + + vt0 = v_mul_f64(__s, vx0); + vt1 = v_mul_f64(__s, vx1); + vt2 = v_mul_f64(__s, vx2); + vt3 = v_mul_f64(__s, vx3); + + vt0 = v_mulsub_f64(__c, vy0, vt0); + vt1 = v_mulsub_f64(__c, vy1, vt1); + vt2 = v_mulsub_f64(__c, vy2, vt2); + vt3 = v_mulsub_f64(__c, vy3, vt3); + + v_storeu_f64(y + i, vt0); + v_storeu_f64(y + i + vstep, vt1); + v_storeu_f64(y + i + vstep * 2, vt2); + v_storeu_f64(y + i + vstep * 3, vt3); + } + + for (; i < unrollx; i += vstep) { + vx0 = v_loadu_f64(x + i); + vy0 = v_loadu_f64(y + i); + + vt0 = v_mul_f64(__s, vy0); + vt0 = v_muladd_f64(__c, vx0, vt0); + v_storeu_f64(x + i, vt0); + + vt0 = v_mul_f64(__s, vx0); + vt0 = v_mulsub_f64(__c, vy0, vt0); + v_storeu_f64(y + i, vt0); + } +#else FLOAT f0, f1, f2, f3; FLOAT x0, x1, x2, x3; FLOAT g0, g1, g2, g3; @@ -53,7 +119,7 @@ static void drot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s) yp += 4; i += 4; } - +#endif while (i < n) { FLOAT temp = c*x[i] + s*y[i]; y[i] = c*y[i] - s*x[i]; diff --git a/kernel/x86_64/srot.c b/kernel/x86_64/srot.c index 021c20d82..d9583cdfa 100644 --- a/kernel/x86_64/srot.c +++ b/kernel/x86_64/srot.c @@ -7,10 +7,78 @@ #endif #ifndef HAVE_SROT_KERNEL +#include"../simd/intrin.h" static void srot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s) { BLASLONG i = 0; + +#if V_SIMD + const int vstep = v_nlanes_f32; + const int unrollx4 = n & (-vstep * 4); + const int unrollx = n & -vstep; + + v_f32 __c = v_setall_f32(c); + v_f32 __s = v_setall_f32(s); + v_f32 vx0, vx1, vx2, vx3; + v_f32 vy0, vy1, vy2, vy3; + v_f32 vt0, vt1, vt2, vt3; + + for (; i < unrollx4; i += vstep * 4) { + vx0 = v_loadu_f32(x + i); + vx1 = v_loadu_f32(x + i + vstep); + vx2 = v_loadu_f32(x + i + vstep * 2); + vx3 = v_loadu_f32(x + i + vstep * 3); + vy0 = v_loadu_f32(y + i); + vy1 = v_loadu_f32(y + i + vstep); + vy2 = v_loadu_f32(y + i + vstep * 2); + vy3 = v_loadu_f32(y + i + vstep * 3); + + vt0 = v_mul_f32(__s, vy0); + vt1 = v_mul_f32(__s, vy1); + vt2 = v_mul_f32(__s, vy2); + vt3 = v_mul_f32(__s, vy3); + + vt0 = v_muladd_f32(__c, vx0, vt0); + vt1 = v_muladd_f32(__c, vx1, vt1); + vt2 = v_muladd_f32(__c, vx2, vt2); + vt3 = v_muladd_f32(__c, vx3, vt3); + + v_storeu_f32(x + i, vt0); + v_storeu_f32(x + i + vstep, vt1); + v_storeu_f32(x + i + vstep * 2, vt2); + v_storeu_f32(x + i + vstep * 3, vt3); + + vt0 = v_mul_f32(__s, vx0); + vt1 = v_mul_f32(__s, vx1); + vt2 = v_mul_f32(__s, vx2); + vt3 = v_mul_f32(__s, vx3); + + vt0 = v_mulsub_f32(__c, vy0, vt0); + vt1 = v_mulsub_f32(__c, vy1, vt1); + vt2 = v_mulsub_f32(__c, vy2, vt2); + vt3 = v_mulsub_f32(__c, vy3, vt3); + + v_storeu_f32(y + i, vt0); + v_storeu_f32(y + i + vstep, vt1); + v_storeu_f32(y + i + vstep * 2, vt2); + v_storeu_f32(y + i + vstep * 3, vt3); + + } + + for (; i < unrollx; i += vstep) { + vx0 = v_loadu_f32(x + i); + vy0 = v_loadu_f32(y + i); + + vt0 = v_mul_f32(__s, vy0); + vt0 = v_muladd_f32(__c, vx0, vt0); + v_storeu_f32(x + i, vt0); + + vt0 = v_mul_f32(__s, vx0); + vt0 = v_mulsub_f32(__c, vy0, vt0); + v_storeu_f32(y + i, vt0); + } +#else FLOAT f0, f1, f2, f3; FLOAT x0, x1, x2, x3; FLOAT g0, g1, g2, g3; @@ -20,7 +88,6 @@ static void srot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s) FLOAT* yp = y; BLASLONG n1 = n & (~7); - while (i < n1) { x0 = xp[0]; y0 = yp[0]; @@ -53,6 +120,7 @@ static void srot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s) yp += 4; i += 4; } +#endif while (i < n) { FLOAT temp = c*x[i] + s*y[i]; From 5bc0a7583fed3328f176b69419ae12a063f2f4e0 Mon Sep 17 00:00:00 2001 From: Qiyu8 Date: Wed, 11 Nov 2020 15:18:01 +0800 Subject: [PATCH 12/22] only FMA3 and vector larger than 128 have positive effects. --- kernel/x86_64/srot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/srot.c b/kernel/x86_64/srot.c index d9583cdfa..4273f7fe7 100644 --- a/kernel/x86_64/srot.c +++ b/kernel/x86_64/srot.c @@ -13,7 +13,7 @@ static void srot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s) { BLASLONG i = 0; -#if V_SIMD +#if V_SIMD && (HAVE_FMA3 || V_SIMD > 128) const int vstep = v_nlanes_f32; const int unrollx4 = n & (-vstep * 4); const int unrollx = n & -vstep; From a87e537b8cd5844159dd5806204470a945be695d Mon Sep 17 00:00:00 2001 From: Qiyu8 Date: Wed, 11 Nov 2020 15:53:48 +0800 Subject: [PATCH 13/22] modify macro --- kernel/x86_64/srot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/srot.c b/kernel/x86_64/srot.c index 4273f7fe7..3de586cb8 100644 --- a/kernel/x86_64/srot.c +++ b/kernel/x86_64/srot.c @@ -13,7 +13,7 @@ static void srot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s) { BLASLONG i = 0; -#if V_SIMD && (HAVE_FMA3 || V_SIMD > 128) +#if V_SIMD && (defined(HAVE_FMA3) || V_SIMD > 128) const int vstep = v_nlanes_f32; const int unrollx4 = n & (-vstep * 4); const int unrollx = n & -vstep; From e5c2ceb6750c4e649aef87e06bd87ed4fcbdc6a5 Mon Sep 17 00:00:00 2001 From: Qiyu8 Date: Thu, 12 Nov 2020 17:35:17 +0800 Subject: [PATCH 14/22] fix the CI failure of lack the head --- kernel/simd/intrin.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/simd/intrin.h b/kernel/simd/intrin.h index ef8fcb865..3802a91e1 100644 --- a/kernel/simd/intrin.h +++ b/kernel/simd/intrin.h @@ -47,7 +47,7 @@ extern "C" { #endif /** AVX **/ -#ifdef HAVE_AVX +#if defined(HAVE_AVX) || defined(HAVE_FMA3) #include #endif From e0dac6b53b27b2d79404577d17fdee8b2303e123 Mon Sep 17 00:00:00 2001 From: Qiyu8 Date: Thu, 12 Nov 2020 20:31:03 +0800 Subject: [PATCH 15/22] fix the CI failure of target specific option mismatch --- kernel/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/Makefile b/kernel/Makefile index fb1d5d39a..fd9105fee 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -5,6 +5,10 @@ endif TOPDIR = .. include $(TOPDIR)/Makefile.system +ifdef HAVE_FMA3 +CFLAGS += -mfma +endif + ifeq ($(ARCH), power) ifeq ($(C_COMPILER), CLANG) override CFLAGS += -fno-integrated-as From ae0b1dea19bf836fb0c8af3630ccfcbbf4b8e37f Mon Sep 17 00:00:00 2001 From: Qiyu8 Date: Fri, 13 Nov 2020 10:20:24 +0800 Subject: [PATCH 16/22] modify system.cmake to enable fma flag --- cmake/system.cmake | 2 +- kernel/Makefile | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/cmake/system.cmake b/cmake/system.cmake index 66e95c6d3..68df2d900 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -174,7 +174,7 @@ if (DEFINED TARGET) endif() if (DEFINED HAVE_AVX) if (NOT NO_AVX) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx") + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx -mfma") endif() endif() if (DEFINED HAVE_AVX2) diff --git a/kernel/Makefile b/kernel/Makefile index fd9105fee..fb1d5d39a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -5,10 +5,6 @@ endif TOPDIR = .. include $(TOPDIR)/Makefile.system -ifdef HAVE_FMA3 -CFLAGS += -mfma -endif - ifeq ($(ARCH), power) ifeq ($(C_COMPILER), CLANG) override CFLAGS += -fno-integrated-as From d6e7e05bb36d77f26274abf7d8be03dd2bd78c1d Mon Sep 17 00:00:00 2001 From: Gengxin Xie Date: Fri, 13 Nov 2020 14:20:52 +0800 Subject: [PATCH 17/22] Improve the performance of dasum and sasum when SMP is defined --- kernel/x86_64/dasum.c | 66 +++++++++++++++++++++++++++++++++++++------ kernel/x86_64/sasum.c | 61 ++++++++++++++++++++++++++++++++++----- 2 files changed, 111 insertions(+), 16 deletions(-) diff --git a/kernel/x86_64/dasum.c b/kernel/x86_64/dasum.c index 8a40ea4b9..ddec21383 100644 --- a/kernel/x86_64/dasum.c +++ b/kernel/x86_64/dasum.c @@ -58,21 +58,19 @@ static FLOAT dasum_kernel(BLASLONG n, FLOAT *x1) } #endif - -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +static FLOAT asum_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) { - BLASLONG i=0; + BLASLONG i = 0; FLOAT sumf = 0.0; + + if (n <= 0 || inc_x <= 0) return (sumf); - if (n <= 0 || inc_x <= 0) return(sumf); - - if ( inc_x == 1 ) { + if (inc_x == 1) { sumf = dasum_kernel(n, x); - } + } else { n *= inc_x; - - while(i < n) { + while (i < n) { sumf += ABS_K(x[i]); i += inc_x; } @@ -80,3 +78,53 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) return(sumf); } +#if defined(SMP) +static int asum_thread_function(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy2, FLOAT *x, BLASLONG inc_x, FLOAT *dummy3, BLASLONG dummy4, FLOAT *result, BLASLONG dummy5) +{ + *(FLOAT *)result = asum_compute(n, x, inc_x); + return 0; +} + +extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(), int nthreads); +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ +#if defined(SMP) + int nthreads; + FLOAT dummy_alpha; + FLOAT * dummy_b; +#endif + FLOAT sumf = 0.0; + +#if defined(SMP) + int num_cpu = num_cpu_avail(1); + if (n <= 100000 || inc_x <= 0) + nthreads = 1; + else + nthreads = num_cpu < n/100000 ? num_cpu : n/100000; + + if (nthreads == 1) { + sumf = asum_compute(n, x, inc_x); + } else { + int mode, i; + char result[MAX_CPU_NUMBER * sizeof(double) *2]; + FLOAT *ptr; +#if !defined(DOUBLE) + mode = BLAS_SINGLE | BLAS_REAL; +#else + mode = BLAS_DOUBLE | BLAS_REAL; +#endif + blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, dummy_b, 0, result, 0, (void *)asum_thread_function, nthreads); + ptr = (FLOAT *)result; + for (i = 0; i < nthreads; i++) { + sumf += (*ptr); + ptr = (FLOAT *)(((char *)ptr) + sizeof(double) *2); + } + } +#else + sumf = asum_compute(n, x, inc_x); +#endif + return(sumf); +} + diff --git a/kernel/x86_64/sasum.c b/kernel/x86_64/sasum.c index 36ec4a737..d0cea9bee 100644 --- a/kernel/x86_64/sasum.c +++ b/kernel/x86_64/sasum.c @@ -67,24 +67,71 @@ static FLOAT sasum_kernel(BLASLONG n, FLOAT *x1) #endif -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +static FLOAT asum_compute(BLASLONG n, FLOAT * x, BLASLONG inc_x) { - BLASLONG i=0; + BLASLONG i = 0; FLOAT sumf = 0.0; + + if (n <= 0 || inc_x <= 0) return (sumf); - if (n <= 0 || inc_x <= 0) return(sumf); - - if ( inc_x == 1 ) { + if (inc_x == 1) { sumf = sasum_kernel(n, x); } else { - n *= inc_x; while(i < n) { sumf += ABS_K(x[i]); i += inc_x; } - } + return (sumf); +} + +#if defined(SMP) +static int asum_thread_function(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy2, FLOAT *x, BLASLONG inc_x, FLOAT *dummy3, BLASLONG dummy4, FLOAT *result, BLASLONG dummy5) +{ + *(FLOAT *)result = asum_compute(n, x, inc_x); + return 0; +} + +extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void * alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int(*function)(), int nthreads); +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ +#if defined(SMP) + int nthreads; + FLOAT dummy_alpha; +#endif + FLOAT sumf = 0.0; + +#if defined(SMP) + int num_cpu = num_cpu_avail(1); + if (n <= 100000 || inc_x <= 0) + nthreads = 1; + else + nthreads = num_cpu < n/100000 ? num_cpu : n/100000; + if (nthreads == 1) { + sumf = asum_compute(n, x, inc_x); + } + else { + int mode, i; + char result[MAX_CPU_NUMBER * sizeof(double) *2]; + FLOAT * ptr; +#if !defined(DOUBLE) + mode = BLAS_SINGLE | BLAS_REAL; +#else + mode = BLAS_DOUBLE | BLAS_REAL; +#endif + blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, NULL, 0, result, 0, (void *)asum_thread_function, nthreads); + ptr = (FLOAT *)result; + for (i = 0; i < nthreads; i++) { + sumf += (*ptr); + ptr = (FLOAT *)(((char *)ptr) + sizeof(double) * 2); + } + } +#else + sumf = asum_compute(n, x, inc_x); +#endif return(sumf); } From ec4d77c47c46358521c3b38e42eb8bfebcb94ec3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 13 Nov 2020 09:16:34 +0100 Subject: [PATCH 18/22] Add -mfma for HAVE_FMA3 in the non-DYNAMIC_ARCH case as well --- cmake/cc.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/cc.cmake b/cmake/cc.cmake index b963940d6..76952152b 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -124,6 +124,9 @@ if (NOT DYNAMIC_ARCH) if (HAVE_AVX) set (CCOMMON_OPT "${CCOMMON_OPT} -mavx") endif () + if (HAVE_FMA3) + set (CCOMMON_OPT "${CCOMMON_OPT} -mfma") + endif () if (HAVE_SSE) set (CCOMMON_OPT "${CCOMMON_OPT} -msse") endif () From b00a0de1323732a1b82c15bc4f0b0bac3e01c262 Mon Sep 17 00:00:00 2001 From: Qiyu8 Date: Mon, 16 Nov 2020 09:14:56 +0800 Subject: [PATCH 19/22] remove the -mfma flag in when the host has AVX. --- cmake/system.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/system.cmake b/cmake/system.cmake index 68df2d900..66e95c6d3 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -174,7 +174,7 @@ if (DEFINED TARGET) endif() if (DEFINED HAVE_AVX) if (NOT NO_AVX) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx -mfma") + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx") endif() endif() if (DEFINED HAVE_AVX2) From 60005eb47b5d30dcf35edff8c824a9f9fd9f6e6c Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 19 Nov 2020 14:39:00 +0100 Subject: [PATCH 20/22] Don't overwrite blas_thread_buffer if already set After a fork it is possible that blas_thread_buffer has already allocated memory buffers: goto_set_num_threads does allocate those already and it may be called by num_cpu_avail in case the OpenBLAS NUM_THREADS differ from the OMP num threads. This leads to a memory leak which can cause subsequent execution of BLAS kernels to fail. Fixes #2993 --- driver/others/blas_server_omp.c | 48 +++++++++++++++------------------ 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index a8b3e9a4b..a576127aa 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -76,10 +76,28 @@ static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; static _Bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; #endif -void goto_set_num_threads(int num_threads) { +static void adjust_thread_buffers() { int i=0, j=0; + //adjust buffer for each thread + for(i=0; i < MAX_PARALLEL_NUMBER; i++) { + for(j=0; j < blas_cpu_number; j++){ + if(blas_thread_buffer[i][j] == NULL){ + blas_thread_buffer[i][j] = blas_memory_alloc(2); + } + } + for(; j < MAX_CPU_NUMBER; j++){ + if(blas_thread_buffer[i][j] != NULL){ + blas_memory_free(blas_thread_buffer[i][j]); + blas_thread_buffer[i][j] = NULL; + } + } + } +} + +void goto_set_num_threads(int num_threads) { + if (num_threads < 1) num_threads = blas_num_threads; if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER; @@ -92,20 +110,7 @@ void goto_set_num_threads(int num_threads) { omp_set_num_threads(blas_cpu_number); - //adjust buffer for each thread - for(i=0; i Date: Thu, 19 Nov 2020 15:24:57 +0100 Subject: [PATCH 21/22] Add reproducer test for crash after fork See #2993 for an analysis --- utest/CMakeLists.txt | 6 +- utest/Makefile | 3 +- utest/test_fork.c | 4 +- utest/test_post_fork.c | 131 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 140 insertions(+), 4 deletions(-) create mode 100644 utest/test_post_fork.c diff --git a/utest/CMakeLists.txt b/utest/CMakeLists.txt index dc5175fc5..357e61301 100644 --- a/utest/CMakeLists.txt +++ b/utest/CMakeLists.txt @@ -27,13 +27,17 @@ endif () # known to hang with the native Windows and Android threads # FIXME needs checking if this works on any of the other platforms -if (NOT USE_OPENMP) if (OS_CYGWIN_NT OR OS_LINUX) +if (NOT USE_OPENMP) set(OpenBLAS_utest_src ${OpenBLAS_utest_src} test_fork.c ) endif() +set(OpenBLAS_utest_src + ${OpenBLAS_utest_src} + test_post_fork.c + ) endif() if (NOT NO_LAPACK) diff --git a/utest/Makefile b/utest/Makefile index 31d4ccf00..ac8c6f72a 100644 --- a/utest/Makefile +++ b/utest/Makefile @@ -25,10 +25,11 @@ endif #this does not work with OpenMP nor with native Windows or Android threads # FIXME TBD if this works on OSX, SunOS, POWER and zarch -ifndef USE_OPENMP ifeq ($(OSNAME), $(filter $(OSNAME),Linux CYGWIN_NT)) +ifneq ($(USE_OPENMP), 1) OBJS += test_fork.o endif +OBJS += test_post_fork.o endif ifeq ($(C_COMPILER), PGI) diff --git a/utest/test_fork.c b/utest/test_fork.c index 5c976f920..bd531e7fb 100644 --- a/utest/test_fork.c +++ b/utest/test_fork.c @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "openblas_utest.h" -void* xmalloc(size_t n) +static void* xmalloc(size_t n) { void* tmp; tmp = malloc(n); @@ -49,7 +49,7 @@ void* xmalloc(size_t n) } #ifdef BUILD_DOUBLE -void check_dgemm(double *a, double *b, double *result, double *expected, blasint n) +static void check_dgemm(double *a, double *b, double *result, double *expected, blasint n) { char trans1 = 'T'; char trans2 = 'N'; diff --git a/utest/test_post_fork.c b/utest/test_post_fork.c new file mode 100644 index 000000000..9370a02ce --- /dev/null +++ b/utest/test_post_fork.c @@ -0,0 +1,131 @@ +/***************************************************************************** +Copyright (c) 2011-2020, The OpenBLAS Project +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + +#include +#include +#include +#ifdef USE_OPENMP +#include +#endif +#include "openblas_utest.h" + +static void* xmalloc(size_t n) +{ + void* tmp; + tmp = malloc(n); + if (tmp == NULL) { + fprintf(stderr, "You are about to die\n"); + exit(1); + } else { + return tmp; + } +} + +#ifdef BUILD_DOUBLE +static void check_dgemm(double *a, double *b, double *result, double *expected, blasint n) +{ + char trans1 = 'T'; + char trans2 = 'N'; + double zerod = 0, oned = 1; + int i; + BLASFUNC(dgemm)(&trans1, &trans2, &n, &n, &n, &oned, a, &n, b, &n, &zerod, result, &n); + for(i = 0; i < n * n; ++i) { + ASSERT_DBL_NEAR_TOL(expected[i], result[i], DOUBLE_EPS); + } +} +#endif + +CTEST(fork, safety_after_fork_in_parent) +{ +#ifndef BUILD_DOUBLE +exit(0); +#else + blasint n = 100; + int i, nthreads_omp; + + double *a, *b, *c, *d; + size_t n_bytes; + + pid_t fork_pid; + + n_bytes = sizeof(*a) * n * n; + + a = xmalloc(n_bytes); + b = xmalloc(n_bytes); + c = xmalloc(n_bytes); + d = xmalloc(n_bytes); + + // Put ones in a, b and n in c (result) + for(i = 0; i < n * n; ++i) { + a[i] = 1; + b[i] = 1; + c[i] = 1 * n; + } + + // Test that OpenBLAS works after a fork. + // This situation routinely happens with Pythons numpy where a + // `sys.platform` calls `uname` in a forked process. + // So we simulate this situation here. + + // There was an issue where a different number of OpenBLAS and OpenMP + // threads triggered a memory leak. So run this multiple times + // with different number of threads set. +#ifdef USE_OPENMP + nthreads_omp = omp_get_max_threads(); + // Run with half the max OMP threads, the max threads and twice that + for(i = (nthreads_omp + 1) / 2; i <= nthreads_omp * 2; i *= 2) { + omp_set_num_threads(i); +#endif + + fork_pid = fork(); + if (fork_pid == -1) { + CTEST_ERR("Failed to fork process."); + } else if (fork_pid == 0) { + // Just pretend to do something, e.g. call `uname`, then exit + exit(0); + } else { + // Wait for the child to finish and check the exit code. + int child_status = 0; + pid_t wait_pid = wait(&child_status); + ASSERT_EQUAL(wait_pid, fork_pid); + ASSERT_EQUAL(0, WEXITSTATUS (child_status)); + + // Now OpenBLAS has to work + check_dgemm(a, b, d, c, n); + } +#ifdef USE_OPENMP + } +#endif + +#endif +} From c6c9c24d1b64430033e733c7341a5d37c79e4668 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Sun, 22 Nov 2020 16:02:19 +0800 Subject: [PATCH 22/22] Update doc for C910. --- README.md | 7 +++++++ TargetList.txt | 2 ++ 2 files changed, 9 insertions(+) diff --git a/README.md b/README.md index ca034e747..267df5358 100644 --- a/README.md +++ b/README.md @@ -172,6 +172,13 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th - **Z13**: Optimized Level-3 BLAS and Level-1,2 - **Z14**: Optimized Level-3 BLAS and (single precision) Level-1,2 +#### RISC-V + +- **C910V**: Optimized Leve-3 BLAS (real) and Level-1,2 by RISC-V Vector extension 0.7.1. + ```sh + make HOSTCC=gcc TARGET=C910V CC=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran + ``` + ### Support for multiple targets in a single library OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake. diff --git a/TargetList.txt b/TargetList.txt index 86177ebca..d19964916 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -107,3 +107,5 @@ Z14 10.RISC-V 64: RISCV64_GENERIC +C910V +