From c167a3d6f41ed2f5680c2a72846b4c9b5d416543 Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Mon, 16 Apr 2018 12:34:43 -0700 Subject: [PATCH 1/9] Added RISCV build --- Makefile.riscv64 | 0 Makefile.system | 4 + c_check | 1 + common.h | 5 ++ common_riscv64.h | 93 +++++++++++++++++++ cpuid_riscv64.c | 111 +++++++++++++++++++++++ ctest.c | 4 + getarch.c | 19 ++++ kernel/riscv64/KERNEL | 149 +++++++++++++++++++++++++++++++ kernel/riscv64/amax.c | 75 ++++++++++++++++ kernel/riscv64/amin.c | 75 ++++++++++++++++ kernel/riscv64/asum.c | 67 ++++++++++++++ kernel/riscv64/axpby.c | 96 ++++++++++++++++++++ kernel/riscv64/axpy.c | 64 ++++++++++++++ kernel/riscv64/copy.c | 59 +++++++++++++ kernel/riscv64/dot.c | 64 ++++++++++++++ kernel/riscv64/gemv_n.c | 67 ++++++++++++++ kernel/riscv64/gemv_t.c | 68 ++++++++++++++ kernel/riscv64/iamax.c | 77 ++++++++++++++++ kernel/riscv64/iamin.c | 77 ++++++++++++++++ kernel/riscv64/imax.c | 69 +++++++++++++++ kernel/riscv64/imin.c | 67 ++++++++++++++ kernel/riscv64/izamax.c | 81 +++++++++++++++++ kernel/riscv64/izamin.c | 81 +++++++++++++++++ kernel/riscv64/max.c | 65 ++++++++++++++ kernel/riscv64/min.c | 65 ++++++++++++++ kernel/riscv64/nrm2.c | 88 ++++++++++++++++++ kernel/riscv64/omatcopy_cn.c | 90 +++++++++++++++++++ kernel/riscv64/omatcopy_ct.c | 89 +++++++++++++++++++ kernel/riscv64/omatcopy_rn.c | 90 +++++++++++++++++++ kernel/riscv64/omatcopy_rt.c | 62 +++++++++++++ kernel/riscv64/rot.c | 62 +++++++++++++ kernel/riscv64/scal.c | 63 +++++++++++++ kernel/riscv64/swap.c | 62 +++++++++++++ kernel/riscv64/symv_L.c | 70 +++++++++++++++ kernel/riscv64/symv_U.c | 71 +++++++++++++++ kernel/riscv64/zamax.c | 79 +++++++++++++++++ kernel/riscv64/zamin.c | 79 +++++++++++++++++ kernel/riscv64/zasum.c | 72 +++++++++++++++ kernel/riscv64/zaxpby.c | 118 +++++++++++++++++++++++++ kernel/riscv64/zaxpy.c | 74 ++++++++++++++++ kernel/riscv64/zcopy.c | 65 ++++++++++++++ kernel/riscv64/zdot.c | 80 +++++++++++++++++ kernel/riscv64/zgemv_n.c | 157 +++++++++++++++++++++++++++++++++ kernel/riscv64/zgemv_t.c | 140 +++++++++++++++++++++++++++++ kernel/riscv64/znrm2.c | 106 ++++++++++++++++++++++ kernel/riscv64/zomatcopy_cn.c | 70 +++++++++++++++ kernel/riscv64/zomatcopy_cnc.c | 69 +++++++++++++++ kernel/riscv64/zomatcopy_ct.c | 71 +++++++++++++++ kernel/riscv64/zomatcopy_ctc.c | 71 +++++++++++++++ kernel/riscv64/zomatcopy_rn.c | 70 +++++++++++++++ kernel/riscv64/zomatcopy_rnc.c | 69 +++++++++++++++ kernel/riscv64/zomatcopy_rt.c | 72 +++++++++++++++ kernel/riscv64/zomatcopy_rtc.c | 72 +++++++++++++++ kernel/riscv64/zrot.c | 70 +++++++++++++++ kernel/riscv64/zscal.c | 88 ++++++++++++++++++ kernel/riscv64/zswap.c | 72 +++++++++++++++ lapack/laswp/riscv64/Makefile | 13 +++ param.h | 39 ++++++++ 59 files changed, 4166 insertions(+) create mode 100644 Makefile.riscv64 create mode 100644 common_riscv64.h create mode 100644 cpuid_riscv64.c create mode 100644 kernel/riscv64/KERNEL create mode 100644 kernel/riscv64/amax.c create mode 100644 kernel/riscv64/amin.c create mode 100644 kernel/riscv64/asum.c create mode 100644 kernel/riscv64/axpby.c create mode 100644 kernel/riscv64/axpy.c create mode 100644 kernel/riscv64/copy.c create mode 100644 kernel/riscv64/dot.c create mode 100644 kernel/riscv64/gemv_n.c create mode 100644 kernel/riscv64/gemv_t.c create mode 100644 kernel/riscv64/iamax.c create mode 100644 kernel/riscv64/iamin.c create mode 100644 kernel/riscv64/imax.c create mode 100644 kernel/riscv64/imin.c create mode 100644 kernel/riscv64/izamax.c create mode 100644 kernel/riscv64/izamin.c create mode 100644 kernel/riscv64/max.c create mode 100644 kernel/riscv64/min.c create mode 100644 kernel/riscv64/nrm2.c create mode 100644 kernel/riscv64/omatcopy_cn.c create mode 100644 kernel/riscv64/omatcopy_ct.c create mode 100644 kernel/riscv64/omatcopy_rn.c create mode 100644 kernel/riscv64/omatcopy_rt.c create mode 100644 kernel/riscv64/rot.c create mode 100644 kernel/riscv64/scal.c create mode 100644 kernel/riscv64/swap.c create mode 100644 kernel/riscv64/symv_L.c create mode 100644 kernel/riscv64/symv_U.c create mode 100644 kernel/riscv64/zamax.c create mode 100644 kernel/riscv64/zamin.c create mode 100644 kernel/riscv64/zasum.c create mode 100644 kernel/riscv64/zaxpby.c create mode 100644 kernel/riscv64/zaxpy.c create mode 100644 kernel/riscv64/zcopy.c create mode 100644 kernel/riscv64/zdot.c create mode 100644 kernel/riscv64/zgemv_n.c create mode 100644 kernel/riscv64/zgemv_t.c create mode 100644 kernel/riscv64/znrm2.c create mode 100644 kernel/riscv64/zomatcopy_cn.c create mode 100644 kernel/riscv64/zomatcopy_cnc.c create mode 100644 kernel/riscv64/zomatcopy_ct.c create mode 100644 kernel/riscv64/zomatcopy_ctc.c create mode 100644 kernel/riscv64/zomatcopy_rn.c create mode 100644 kernel/riscv64/zomatcopy_rnc.c create mode 100644 kernel/riscv64/zomatcopy_rt.c create mode 100644 kernel/riscv64/zomatcopy_rtc.c create mode 100644 kernel/riscv64/zrot.c create mode 100644 kernel/riscv64/zscal.c create mode 100644 kernel/riscv64/zswap.c create mode 100644 lapack/laswp/riscv64/Makefile diff --git a/Makefile.riscv64 b/Makefile.riscv64 new file mode 100644 index 000000000..e69de29bb diff --git a/Makefile.system b/Makefile.system index 142cb420f..02d392d9c 100644 --- a/Makefile.system +++ b/Makefile.system @@ -593,7 +593,9 @@ endif ifndef BINARY_DEFINED ifneq ($(OSNAME), AIX) ifdef BINARY64 +ifneq ($(ARCH), riscv64) CCOMMON_OPT += -m64 +endif else CCOMMON_OPT += -m32 endif @@ -687,8 +689,10 @@ endif else ifdef BINARY64 ifneq ($(OSNAME), AIX) +ifneq ($(ARCH), riscv64) FCOMMON_OPT += -m64 endif +endif ifdef INTERFACE64 ifneq ($(INTERFACE64), 0) FCOMMON_OPT += -fdefault-integer-8 diff --git a/c_check b/c_check index a3b337602..c564855f3 100644 --- a/c_check +++ b/c_check @@ -76,6 +76,7 @@ $architecture = ia64 if ($data =~ /ARCH_IA64/); $architecture = arm if ($data =~ /ARCH_ARM/); $architecture = arm64 if ($data =~ /ARCH_ARM64/); $architecture = zarch if ($data =~ /ARCH_ZARCH/); +$architecture = riscv64 if ($data =~ /ARCH_RISCV64/); $defined = 0; diff --git a/common.h b/common.h index 5a599a5af..3d23d9ee6 100644 --- a/common.h +++ b/common.h @@ -408,6 +408,11 @@ please https://github.com/xianyi/OpenBLAS/issues/246 #include "common_mips.h" #endif + +#ifdef ARCH_RISCV64 +#include "common_riscv64.h" +#endif + #ifdef ARCH_MIPS64 #include "common_mips64.h" #endif diff --git a/common_riscv64.h b/common_riscv64.h new file mode 100644 index 000000000..fe4e0a6d3 --- /dev/null +++ b/common_riscv64.h @@ -0,0 +1,93 @@ +/***************************************************************************** +Copyright (c) 2011-2014, The OpenBLAS Project +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************************/ + +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#ifndef COMMON_RISCV64 +#define COMMON_RISCV64 + +#define MB __sync_synchronize() +#define WMB __sync_synchronize() + +#define INLINE inline + +#ifndef ASSEMBLER + + +static inline int blas_quickdivide(blasint x, blasint y){ + return x / y; +} + +#endif + + + +#define BUFFER_SIZE ( 32 << 20) +#define SEEK_ADDRESS + +#endif diff --git a/cpuid_riscv64.c b/cpuid_riscv64.c new file mode 100644 index 000000000..129ed11b0 --- /dev/null +++ b/cpuid_riscv64.c @@ -0,0 +1,111 @@ +/***************************************************************************** +Copyright (c) 2011-2014, The OpenBLAS Project +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + + +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#define CPU_UNKNOWN 0 + +static char *cpuname[] = { + "UNKOWN", +}; + +int detect(void){ + return CPU_UNKNOWN; +} + +char *get_corename(void){ + return cpuname[detect()]; +} + +void get_architecture(void){ + printf("RISCV64"); +} + +void get_subarchitecture(void){ +} + +void get_subdirname(void){ + printf("riscv64"); +} + +void get_cpuconfig(void){ + printf("#define UNKNOWN\n"); + printf("#define L1_DATA_SIZE 65536\n"); + printf("#define L1_DATA_LINESIZE 32\n"); + printf("#define L2_SIZE 512488\n"); + printf("#define L2_LINESIZE 32\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + printf("#define L2_ASSOCIATIVE 4\n"); +} + +void get_libname(void){ + printf("riscv64\n"); +} diff --git a/ctest.c b/ctest.c index 00be423d1..cab939887 100644 --- a/ctest.c +++ b/ctest.c @@ -149,3 +149,7 @@ ARCH_ARM ARCH_ARM64 #endif +#if defined(__riscv) +ARCH_RISCV64 +#endif + diff --git a/getarch.c b/getarch.c index 992fc2b95..7f7fd97c4 100644 --- a/getarch.c +++ b/getarch.c @@ -604,6 +604,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif + #ifdef FORCE_PPCG4 #define FORCE #define ARCHITECTURE "POWER" @@ -859,6 +860,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #endif +#ifdef FORCE_RISCV64 +#define FORCE +#define ARCHITECTURE "RISCV64" +#define SUBARCHITECTURE "RISCV64" +#define SUBDIRNAME "riscv64" +#define ARCHCONFIG "-DRISCV64 " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " +#define LIBNAME "riscv64" +#define CORENAME "RISCV64" +#else +#endif + #ifdef FORCE_CORTEXA15 #define FORCE #define ARCHITECTURE "ARM" @@ -1051,6 +1066,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define OPENBLAS_SUPPORTED #endif +#ifdef __riscv +#include "cpuid_riscv64.c" +#endif + #ifdef __arm__ #include "cpuid_arm.c" #define OPENBLAS_SUPPORTED diff --git a/kernel/riscv64/KERNEL b/kernel/riscv64/KERNEL new file mode 100644 index 000000000..7d854ced6 --- /dev/null +++ b/kernel/riscv64/KERNEL @@ -0,0 +1,149 @@ +SAMAXKERNEL = ../riscv64/amax.c +DAMAXKERNEL = ../riscv64/amax.c +CAMAXKERNEL = ../riscv64/zamax.c +ZAMAXKERNEL = ../riscv64/zamax.c + +SAMINKERNEL = ../riscv64/amin.c +DAMINKERNEL = ../riscv64/amin.c +CAMINKERNEL = ../riscv64/zamin.c +ZAMINKERNEL = ../riscv64/zamin.c + +SMAXKERNEL = ../riscv64/max.c +DMAXKERNEL = ../riscv64/max.c + +SMINKERNEL = ../riscv64/min.c +DMINKERNEL = ../riscv64/min.c + +ISAMAXKERNEL = ../riscv64/iamax.c +IDAMAXKERNEL = ../riscv64/iamax.c +ICAMAXKERNEL = ../riscv64/izamax.c +IZAMAXKERNEL = ../riscv64/izamax.c + +ISAMINKERNEL = ../riscv64/iamin.c +IDAMINKERNEL = ../riscv64/iamin.c +ICAMINKERNEL = ../riscv64/izamin.c +IZAMINKERNEL = ../riscv64/izamin.c + +ISMAXKERNEL = ../riscv64/imax.c +IDMAXKERNEL = ../riscv64/imax.c + +ISMINKERNEL = ../riscv64/imin.c +IDMINKERNEL = ../riscv64/imin.c + +SASUMKERNEL = ../riscv64/asum.c +DASUMKERNEL = ../riscv64/asum.c +CASUMKERNEL = ../riscv64/zasum.c +ZASUMKERNEL = ../riscv64/zasum.c + +SAXPYKERNEL = ../riscv64/axpy.c +DAXPYKERNEL = ../riscv64/axpy.c +CAXPYKERNEL = ../riscv64/zaxpy.c +ZAXPYKERNEL = ../riscv64/zaxpy.c + +SCOPYKERNEL = ../riscv64/copy.c +DCOPYKERNEL = ../riscv64/copy.c +CCOPYKERNEL = ../riscv64/zcopy.c +ZCOPYKERNEL = ../riscv64/zcopy.c + +SDOTKERNEL = ../riscv64/dot.c +DDOTKERNEL = ../riscv64/dot.c +CDOTKERNEL = ../riscv64/zdot.c +ZDOTKERNEL = ../riscv64/zdot.c + +SNRM2KERNEL = ../riscv64/nrm2.c +DNRM2KERNEL = ../riscv64/nrm2.c +CNRM2KERNEL = ../riscv64/znrm2.c +ZNRM2KERNEL = ../riscv64/znrm2.c + +SROTKERNEL = ../riscv64/rot.c +DROTKERNEL = ../riscv64/rot.c +CROTKERNEL = ../riscv64/zrot.c +ZROTKERNEL = ../riscv64/zrot.c + +SSCALKERNEL = ../riscv64/scal.c +DSCALKERNEL = ../riscv64/scal.c +CSCALKERNEL = ../riscv64/zscal.c +ZSCALKERNEL = ../riscv64/zscal.c + +SSWAPKERNEL = ../riscv64/swap.c +DSWAPKERNEL = ../riscv64/swap.c +CSWAPKERNEL = ../riscv64/zswap.c +ZSWAPKERNEL = ../riscv64/zswap.c + +SGEMVNKERNEL = ../riscv64/gemv_n.c +DGEMVNKERNEL = ../riscv64/gemv_n.c +CGEMVNKERNEL = ../riscv64/zgemv_n.c +ZGEMVNKERNEL = ../riscv64/zgemv_n.c + +SGEMVTKERNEL = ../riscv64/gemv_t.c +DGEMVTKERNEL = ../riscv64/gemv_t.c +CGEMVTKERNEL = ../riscv64/zgemv_t.c +ZGEMVTKERNEL = ../riscv64/zgemv_t.c + +STRMMKERNEL = ../generic/trmmkernel_2x2.c +DTRMMKERNEL = ../generic/trmmkernel_2x2.c +CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c + +SGEMMKERNEL = ../generic/gemmkernel_2x2.c +SGEMMONCOPY = ../generic/gemm_ncopy_2.c +SGEMMOTCOPY = ../generic/gemm_tcopy_2.c +SGEMMONCOPYOBJ = sgemm_oncopy.o +SGEMMOTCOPYOBJ = sgemm_otcopy.o + +DGEMMKERNEL = ../generic/gemmkernel_2x2.c +DGEMMONCOPY = ../generic/gemm_ncopy_2.c +DGEMMOTCOPY = ../generic/gemm_tcopy_2.c +DGEMMONCOPYOBJ = dgemm_oncopy.o +DGEMMOTCOPYOBJ = dgemm_otcopy.o + +CGEMMKERNEL = ../generic/zgemmkernel_2x2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPYOBJ = cgemm_oncopy.o +CGEMMOTCOPYOBJ = cgemm_otcopy.o + +ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPYOBJ = zgemm_oncopy.o +ZGEMMOTCOPYOBJ = zgemm_otcopy.o + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +LSAME_KERNEL = ../generic/lsame.c + +SCABS_KERNEL = ../generic/cabs.c +DCABS_KERNEL = ../generic/cabs.c +QCABS_KERNEL = ../generic/cabs.c + +ifndef SGEMM_BETA +SGEMM_BETA = ../generic/gemm_beta.c +endif +ifndef DGEMM_BETA +DGEMM_BETA = ../generic/gemm_beta.c +endif +ifndef CGEMM_BETA +CGEMM_BETA = ../generic/zgemm_beta.c +endif +ifndef ZGEMM_BETA +ZGEMM_BETA = ../generic/zgemm_beta.c +endif diff --git a/kernel/riscv64/amax.c b/kernel/riscv64/amax.c new file mode 100644 index 000000000..792e68bd9 --- /dev/null +++ b/kernel/riscv64/amax.c @@ -0,0 +1,75 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + + if (n <= 0 || inc_x <= 0) return(maxf); + + maxf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) > maxf ) + { + maxf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(maxf); +} + + diff --git a/kernel/riscv64/amin.c b/kernel/riscv64/amin.c new file mode 100644 index 000000000..78495a8e3 --- /dev/null +++ b/kernel/riscv64/amin.c @@ -0,0 +1,75 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + + if (n <= 0 || inc_x <= 0) return(minf); + + minf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) < minf ) + { + minf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(minf); +} + + diff --git a/kernel/riscv64/asum.c b/kernel/riscv64/asum.c new file mode 100644 index 000000000..b284ae3fc --- /dev/null +++ b/kernel/riscv64/asum.c @@ -0,0 +1,67 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT sumf = 0.0; + if (n <= 0 || inc_x <= 0) return(sumf); + + n *= inc_x; + while(i < n) + { + sumf += ABS(x[i]); + i += inc_x; + } + return(sumf); +} + + diff --git a/kernel/riscv64/axpby.c b/kernel/riscv64/axpby.c new file mode 100644 index 000000000..278747f75 --- /dev/null +++ b/kernel/riscv64/axpby.c @@ -0,0 +1,96 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix,iy; + + if ( n < 0 ) return(0); + + ix = 0; + iy = 0; + + if ( beta == 0.0 ) + { + + if ( alpha == 0.0 ) + { + while(i < n) + { + y[iy] = 0.0 ; + iy += inc_y ; + i++ ; + } + } + else + { + while(i < n) + { + y[iy] = alpha * x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + } + + + } + + } + else + { + + if ( alpha == 0.0 ) + { + while(i < n) + { + y[iy] = beta * y[iy] ; + iy += inc_y ; + i++ ; + } + } + else + { + while(i < n) + { + y[iy] = alpha * x[ix] + beta * y[iy] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + } + } + + } + + return(0); + +} + + diff --git a/kernel/riscv64/axpy.c b/kernel/riscv64/axpy.c new file mode 100644 index 000000000..fb1094dd9 --- /dev/null +++ b/kernel/riscv64/axpy.c @@ -0,0 +1,64 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix,iy; + + if ( n < 0 ) return(0); + if ( da == 0.0 ) return(0); + + ix = 0; + iy = 0; + + while(i < n) + { + + y[iy] += da * x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/riscv64/copy.c b/kernel/riscv64/copy.c new file mode 100644 index 000000000..7b4f04f30 --- /dev/null +++ b/kernel/riscv64/copy.c @@ -0,0 +1,59 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + + if ( n < 0 ) return(0); + + while(i < n) + { + + y[iy] = x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/riscv64/dot.c b/kernel/riscv64/dot.c new file mode 100644 index 000000000..46a84ad18 --- /dev/null +++ b/kernel/riscv64/dot.c @@ -0,0 +1,64 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +#if defined(DSDOT) +double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#else +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#endif +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + double dot = 0.0 ; + + if ( n < 0 ) return(dot); + + while(i < n) + { + + dot += y[iy] * x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(dot); + +} + + diff --git a/kernel/riscv64/gemv_n.c b/kernel/riscv64/gemv_n.c new file mode 100644 index 000000000..ef61b245b --- /dev/null +++ b/kernel/riscv64/gemv_n.c @@ -0,0 +1,67 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** + * * 2013/09/14 Saar + * * BLASTEST float : OK + * * BLASTEST double : OK + * CTEST : OK + * TEST : OK + * * + * **************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +{ + BLASLONG i; + BLASLONG ix,iy; + BLASLONG j; + FLOAT *a_ptr; + FLOAT temp; + + ix = 0; + a_ptr = a; + + for (j=0; j + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + BLASLONG max=0; + + if (n <= 0 || inc_x <= 0) return(max); + + maxf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) > maxf ) + { + max = i; + maxf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(max+1); +} + + diff --git a/kernel/riscv64/iamin.c b/kernel/riscv64/iamin.c new file mode 100644 index 000000000..155292bd5 --- /dev/null +++ b/kernel/riscv64/iamin.c @@ -0,0 +1,77 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + BLASLONG min=0; + + if (n <= 0 || inc_x <= 0) return(min); + + minf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) < ABS(minf) ) + { + min = i; + minf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(min+1); +} + + diff --git a/kernel/riscv64/imax.c b/kernel/riscv64/imax.c new file mode 100644 index 000000000..5072dd16e --- /dev/null +++ b/kernel/riscv64/imax.c @@ -0,0 +1,69 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + BLASLONG max=0; + + if (n <= 0 || inc_x <= 0) return(max); + + maxf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] > maxf ) + { + max = i; + maxf = x[ix]; + } + ix += inc_x; + i++; + } + return(max+1); +} + + diff --git a/kernel/riscv64/imin.c b/kernel/riscv64/imin.c new file mode 100644 index 000000000..598cba387 --- /dev/null +++ b/kernel/riscv64/imin.c @@ -0,0 +1,67 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** +* 2013/08/19 Saar +* BLASTEST float +* BLASTEST double +* +**************************************************************************************/ + +#include "common.h" +#include + + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + BLASLONG min=0; + + if (n <= 0 || inc_x <= 0) return(min); + + minf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] > minf ) + { + min = i; + minf = x[ix]; + } + ix += inc_x; + i++; + } + return(min+1); +} + + diff --git a/kernel/riscv64/izamax.c b/kernel/riscv64/izamax.c new file mode 100644 index 000000000..8fe33e95b --- /dev/null +++ b/kernel/riscv64/izamax.c @@ -0,0 +1,81 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf; + BLASLONG max=0; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(max); + + inc_x2 = 2 * inc_x; + + maxf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) > maxf ) + { + max = i; + maxf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(max+1); +} + + diff --git a/kernel/riscv64/izamin.c b/kernel/riscv64/izamin.c new file mode 100644 index 000000000..fb5a0d4cb --- /dev/null +++ b/kernel/riscv64/izamin.c @@ -0,0 +1,81 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf; + BLASLONG min=0; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(min); + + inc_x2 = 2 * inc_x; + + minf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) < minf ) + { + min = i; + minf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(min+1); +} + + diff --git a/kernel/riscv64/max.c b/kernel/riscv64/max.c new file mode 100644 index 000000000..2ad956bc0 --- /dev/null +++ b/kernel/riscv64/max.c @@ -0,0 +1,65 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + + if (n <= 0 || inc_x <= 0) return(maxf); + + maxf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] > maxf ) + { + maxf = x[ix]; + } + ix += inc_x; + i++; + } + return(maxf); +} + + diff --git a/kernel/riscv64/min.c b/kernel/riscv64/min.c new file mode 100644 index 000000000..2812fe397 --- /dev/null +++ b/kernel/riscv64/min.c @@ -0,0 +1,65 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + + if (n <= 0 || inc_x <= 0) return(minf); + + minf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] < minf ) + { + minf = x[ix]; + } + ix += inc_x; + i++; + } + return(minf); +} + + diff --git a/kernel/riscv64/nrm2.c b/kernel/riscv64/nrm2.c new file mode 100644 index 000000000..fcff09337 --- /dev/null +++ b/kernel/riscv64/nrm2.c @@ -0,0 +1,88 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/13 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT scale = 0.0; + FLOAT ssq = 1.0; + FLOAT absxi = 0.0; + + + if (n <= 0 || inc_x <= 0) return(0.0); + if ( n == 1 ) return( ABS(x[0]) ); + + n *= inc_x; + while(i < n) + { + + if ( x[i] != 0.0 ) + { + absxi = ABS( x[i] ); + if ( scale < absxi ) + { + ssq = 1 + ssq * ( scale / absxi ) * ( scale / absxi ); + scale = absxi ; + } + else + { + ssq += ( absxi/scale ) * ( absxi/scale ); + } + + } + i += inc_x; + } + scale = scale * sqrt( ssq ); + return(scale); + +} + + diff --git a/kernel/riscv64/omatcopy_cn.c b/kernel/riscv64/omatcopy_cn.c new file mode 100644 index 000000000..4d11b9125 --- /dev/null +++ b/kernel/riscv64/omatcopy_cn.c @@ -0,0 +1,90 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + +/***************************************************** + * 2014/06/09 Saar + * + * Order ColMajor + * No Trans + * +******************************************************/ + +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) +{ + BLASLONG i,j; + FLOAT *aptr,*bptr; + + if ( rows <= 0 ) return(0); + if ( cols <= 0 ) return(0); + + aptr = a; + bptr = b; + + if ( alpha == 0.0 ) + { + for ( i=0; i + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp; + + if ( n < 0 ) return(0); + + while(i < n) + { + + temp = x[ix] ; + x[ix] = y[iy] ; + y[iy] = temp ; + + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/riscv64/symv_L.c b/kernel/riscv64/symv_L.c new file mode 100644 index 000000000..8f48d03f5 --- /dev/null +++ b/kernel/riscv64/symv_L.c @@ -0,0 +1,70 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +{ + BLASLONG i; + BLASLONG ix,iy; + BLASLONG jx,jy; + BLASLONG j; + FLOAT temp1; + FLOAT temp2; + +#if 0 + if ( m != offset ) + printf("Symv_L: m=%d offset=%d\n",m,offset); +#endif + + jx = 0; + jy = 0; + + for (j=0; j + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(0.0); + + inc_x2 = 2 * inc_x; + + maxf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) > maxf ) + { + maxf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(maxf); +} + + diff --git a/kernel/riscv64/zamin.c b/kernel/riscv64/zamin.c new file mode 100644 index 000000000..02eab3e75 --- /dev/null +++ b/kernel/riscv64/zamin.c @@ -0,0 +1,79 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(0.0); + + inc_x2 = 2 * inc_x; + + minf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) < minf ) + { + minf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(minf); +} + + diff --git a/kernel/riscv64/zasum.c b/kernel/riscv64/zasum.c new file mode 100644 index 000000000..61e85cae6 --- /dev/null +++ b/kernel/riscv64/zasum.c @@ -0,0 +1,72 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT sumf = 0.0; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(sumf); + + inc_x2 = 2 * inc_x; + + n *= inc_x2; + while(i < n) + { + sumf += CABS1(x,i); + i += inc_x2; + } + return(sumf); +} + + diff --git a/kernel/riscv64/zaxpby.c b/kernel/riscv64/zaxpby.c new file mode 100644 index 000000000..445354416 --- /dev/null +++ b/kernel/riscv64/zaxpby.c @@ -0,0 +1,118 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/*************************************************************************** +* 2014/06/07 Saar +* +***************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FLOAT beta_r, FLOAT beta_i,FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix,iy; + FLOAT temp; + BLASLONG inc_x2, inc_y2; + + if ( n <= 0 ) return(0); + + ix = 0; + iy = 0; + + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; + + if ( beta_r == 0.0 && beta_i == 0.0) + { + if ( alpha_r == 0.0 && alpha_i == 0.0 ) + { + + while(i < n) + { + y[iy] = 0.0 ; + y[iy+1] = 0.0 ; + iy += inc_y2 ; + i++ ; + } + + } + else + { + + while(i < n) + { + y[iy] = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) ; + y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) ; + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + } + + + } + + } + else + { + if ( alpha_r == 0.0 && alpha_i == 0.0 ) + { + + while(i < n) + { + temp = ( beta_r * y[iy] - beta_i * y[iy+1] ) ; + y[iy+1] = ( beta_r * y[iy+1] + beta_i * y[iy] ) ; + y[iy] = temp; + iy += inc_y2 ; + i++ ; + } + + } + else + { + + while(i < n) + { + temp = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) + ( beta_r * y[iy] - beta_i * y[iy+1] ) ; + y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) + ( beta_r * y[iy+1] + beta_i * y[iy] ) ; + y[iy] = temp; + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + } + + + } + + + + } + return(0); + +} + + diff --git a/kernel/riscv64/zaxpy.c b/kernel/riscv64/zaxpy.c new file mode 100644 index 000000000..1dcaeac27 --- /dev/null +++ b/kernel/riscv64/zaxpy.c @@ -0,0 +1,74 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/15 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix,iy; + BLASLONG inc_x2; + BLASLONG inc_y2; + + if ( n < 0 ) return(0); + if ( da_r == 0.0 && da_i == 0.0 ) return(0); + + ix = 0; + iy = 0; + + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; + + while(i < n) + { +#if !defined(CONJ) + y[iy] += ( da_r * x[ix] - da_i * x[ix+1] ) ; + y[iy+1] += ( da_r * x[ix+1] + da_i * x[ix] ) ; +#else + y[iy] += ( da_r * x[ix] + da_i * x[ix+1] ) ; + y[iy+1] -= ( da_r * x[ix+1] - da_i * x[ix] ) ; +#endif + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/riscv64/zcopy.c b/kernel/riscv64/zcopy.c new file mode 100644 index 000000000..07fe584c5 --- /dev/null +++ b/kernel/riscv64/zcopy.c @@ -0,0 +1,65 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + BLASLONG inc_x2; + BLASLONG inc_y2; + + if ( n < 0 ) return(0); + + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; + + while(i < n) + { + + y[iy] = x[ix] ; + y[iy+1] = x[ix+1] ; + ix += inc_x2; + iy += inc_y2; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/riscv64/zdot.c b/kernel/riscv64/zdot.c new file mode 100644 index 000000000..733c235c6 --- /dev/null +++ b/kernel/riscv64/zdot.c @@ -0,0 +1,80 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : FAIL +* BLASTEST double : FAIL +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) + +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT dot[2]; + OPENBLAS_COMPLEX_FLOAT result; + BLASLONG inc_x2; + BLASLONG inc_y2; + + dot[0]=0.0; + dot[1]=0.0; + + CREAL(result) = 0.0 ; + CIMAG(result) = 0.0 ; + + if ( n < 1 ) return(result); + + inc_x2 = 2 * inc_x ; + inc_y2 = 2 * inc_y ; + + while(i < n) + { +#if !defined(CONJ) + dot[0] += ( x[ix] * y[iy] - x[ix+1] * y[iy+1] ) ; + dot[1] += ( x[ix+1] * y[iy] + x[ix] * y[iy+1] ) ; +#else + dot[0] += ( x[ix] * y[iy] + x[ix+1] * y[iy+1] ) ; + dot[1] -= ( x[ix+1] * y[iy] - x[ix] * y[iy+1] ) ; +#endif + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + CREAL(result) = dot[0]; + CIMAG(result) = dot[1]; + return(result); + +} + + diff --git a/kernel/riscv64/zgemv_n.c b/kernel/riscv64/zgemv_n.c new file mode 100644 index 000000000..b9b03f792 --- /dev/null +++ b/kernel/riscv64/zgemv_n.c @@ -0,0 +1,157 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** + * * 2013/11/23 Saar + * * BLASTEST float : OK + * * BLASTEST double : OK + * CTEST : OK + * TEST : OK + * * + * **************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +{ + BLASLONG i; + BLASLONG ix,iy; + BLASLONG j; + FLOAT *a_ptr; + FLOAT temp_r,temp_i; + BLASLONG inc_x2,inc_y2; + BLASLONG lda2; + BLASLONG i2; + + lda2 = 2*lda; + + ix = 0; + a_ptr = a; + + if ( inc_x == 1 && inc_y == 1 ) + { + + for (j=0; j + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT scale = 0.0; + FLOAT ssq = 1.0; + BLASLONG inc_x2; + FLOAT temp; + + if (n <= 0 || inc_x <= 0) return(0.0); + + inc_x2 = 2 * inc_x; + + n *= inc_x2; + while(i < n) + { + + if ( x[i] != 0.0 ) + { + temp = ABS( x[i] ); + if ( scale < temp ) + { + ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); + scale = temp ; + } + else + { + ssq += ( temp / scale ) * ( temp / scale ); + } + + } + + if ( x[i+1] != 0.0 ) + { + temp = ABS( x[i+1] ); + if ( scale < temp ) + { + ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); + scale = temp ; + } + else + { + ssq += ( temp / scale ) * ( temp / scale ); + } + + } + + + i += inc_x2; + } + scale = scale * sqrt( ssq ); + return(scale); + +} + + diff --git a/kernel/riscv64/zomatcopy_cn.c b/kernel/riscv64/zomatcopy_cn.c new file mode 100644 index 000000000..f5a7a6284 --- /dev/null +++ b/kernel/riscv64/zomatcopy_cn.c @@ -0,0 +1,70 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + +/***************************************************** + * 2014/06/09 Saar + * + * Order ColMajor + * No Trans + * +******************************************************/ + +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) +{ + BLASLONG i,j,ia; + FLOAT *aptr,*bptr; + + if ( rows <= 0 ) return(0); + if ( cols <= 0 ) return(0); + + aptr = a; + bptr = b; + + lda *= 2; + ldb *= 2; + + for ( i=0; i + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dummy4, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp[2]; + BLASLONG inc_x2; + BLASLONG inc_y2; + + if ( n < 0 ) return(0); + + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; + + while(i < n) + { + + temp[0] = x[ix] ; + temp[1] = x[ix+1] ; + x[ix] = y[iy] ; + x[ix+1] = y[iy+1] ; + y[iy] = temp[0] ; + y[iy+1] = temp[1] ; + + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + return(0); + +} + + diff --git a/lapack/laswp/riscv64/Makefile b/lapack/laswp/riscv64/Makefile new file mode 100644 index 000000000..75411deb5 --- /dev/null +++ b/lapack/laswp/riscv64/Makefile @@ -0,0 +1,13 @@ +TOPDIR = ../../.. +include ../../../Makefile.system + +ifndef LASWP +LASWP = ../generic/laswp_k.c +endif + +ifndef ZLASWP +ZLASWP = ../generic/zlaswp_k.c +endif + +include ../generic/Makefile + diff --git a/param.h b/param.h index 189cdc4a0..52675bc25 100644 --- a/param.h +++ b/param.h @@ -2343,6 +2343,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif +#ifdef RISCV64 +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN 0x03fffUL + +#define SGEMM_DEFAULT_UNROLL_M 4 +#define SGEMM_DEFAULT_UNROLL_N 4 + +#define DGEMM_DEFAULT_UNROLL_M 4 +#define DGEMM_DEFAULT_UNROLL_N 4 + +#define CGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_N 2 + +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 + +#define SGEMM_DEFAULT_P 128 +#define DGEMM_DEFAULT_P 128 +#define CGEMM_DEFAULT_P 96 +#define ZGEMM_DEFAULT_P 64 + +#define SGEMM_DEFAULT_Q 240 +#define DGEMM_DEFAULT_Q 120 +#define CGEMM_DEFAULT_Q 120 +#define ZGEMM_DEFAULT_Q 120 + +#define SGEMM_DEFAULT_R 12288 +#define DGEMM_DEFAULT_R 8192 +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 + +#define SYMV_P 16 + +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 + +#endif + #ifdef ARMV7 #define SNUMOPT 2 #define DNUMOPT 2 From 0ee395db35ee824aff77d4d2b812aaedb111addd Mon Sep 17 00:00:00 2001 From: Jerry Zhao Date: Wed, 18 Apr 2018 18:03:32 -0700 Subject: [PATCH 2/9] Fixed TRMM and SYMM for RISCV --- kernel/Makefile.L3 | 4 ++++ kernel/riscv64/KERNEL | 10 ++++++++++ param.h | 8 ++++---- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 4284fbfa0..63e09a56d 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -20,6 +20,10 @@ ifeq ($(ARCH), arm64) USE_TRMM = 1 endif +ifeq ($(ARCH), riscv64) +USE_TRMM = 1 +endif + ifeq ($(TARGET), LOONGSON3B) USE_TRMM = 1 endif diff --git a/kernel/riscv64/KERNEL b/kernel/riscv64/KERNEL index 7d854ced6..04d82b4ce 100644 --- a/kernel/riscv64/KERNEL +++ b/kernel/riscv64/KERNEL @@ -129,6 +129,16 @@ ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +SSYMV_U_KERNEL = ../generic/symv_k.c +SSYMV_L_KERNEL = ../generic/symv_k.c +DSYMV_U_KERNEL = ../generic/symv_k.c +DSYMV_L_KERNEL = ../generic/symv_k.c +CSYMV_U_KERNEL = ../generic/zsymv_k.c +CSYMV_L_KERNEL = ../generic/zsymv_k.c +ZSYMV_U_KERNEL = ../generic/zsymv_k.c +ZSYMV_L_KERNEL = ../generic/zsymv_k.c + + LSAME_KERNEL = ../generic/lsame.c SCABS_KERNEL = ../generic/cabs.c diff --git a/param.h b/param.h index 52675bc25..22d837960 100644 --- a/param.h +++ b/param.h @@ -2348,11 +2348,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_ALIGN 0x03fffUL -#define SGEMM_DEFAULT_UNROLL_M 4 -#define SGEMM_DEFAULT_UNROLL_N 4 +#define SGEMM_DEFAULT_UNROLL_M 2 +#define SGEMM_DEFAULT_UNROLL_N 2 -#define DGEMM_DEFAULT_UNROLL_M 4 -#define DGEMM_DEFAULT_UNROLL_N 4 +#define DGEMM_DEFAULT_UNROLL_M 2 +#define DGEMM_DEFAULT_UNROLL_N 2 #define CGEMM_DEFAULT_UNROLL_M 2 #define CGEMM_DEFAULT_UNROLL_N 2 From db17ce896fbbf53cbef34f81e1f1ec6887965ec4 Mon Sep 17 00:00:00 2001 From: Dumi Loghin Date: Wed, 5 Sep 2018 12:49:37 +0800 Subject: [PATCH 3/9] replace ARCH with AR in lapack-netlib --- Makefile | 4 +-- c_check | 4 +++ lapack-netlib/BLAS/SRC/Makefile | 10 +++--- lapack-netlib/CBLAS/src/Makefile | 32 +++++++++---------- lapack-netlib/DOCS/lawn81.tex | 2 +- lapack-netlib/INSTALL/make.inc.ALPHA | 4 +-- lapack-netlib/INSTALL/make.inc.HPPA | 4 +-- lapack-netlib/INSTALL/make.inc.IRIX64 | 4 +-- lapack-netlib/INSTALL/make.inc.O2K | 4 +-- lapack-netlib/INSTALL/make.inc.SGI5 | 4 +-- lapack-netlib/INSTALL/make.inc.SUN4 | 4 +-- lapack-netlib/INSTALL/make.inc.SUN4SOL2 | 4 +-- lapack-netlib/INSTALL/make.inc.XLF | 4 +-- lapack-netlib/INSTALL/make.inc.gfortran | 4 +-- lapack-netlib/INSTALL/make.inc.gfortran_debug | 4 +-- lapack-netlib/INSTALL/make.inc.ifort | 4 +-- lapack-netlib/INSTALL/make.inc.pgf95 | 4 +-- lapack-netlib/INSTALL/make.inc.pghpf | 4 +-- lapack-netlib/LAPACKE/src/Makefile | 10 +++--- lapack-netlib/LAPACKE/utils/Makefile | 2 +- lapack-netlib/SRC/Makefile | 10 +++--- lapack-netlib/SRC/VARIANTS/Makefile | 12 +++---- lapack-netlib/TESTING/MATGEN/Makefile | 10 +++--- lapack-netlib/make.inc.example | 4 +-- make.inc | 2 +- 25 files changed, 79 insertions(+), 75 deletions(-) diff --git a/Makefile b/Makefile index c0e5fbcf8..aaeb0c498 100644 --- a/Makefile +++ b/Makefile @@ -237,8 +237,8 @@ ifndef NOFORTRAN -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc diff --git a/c_check b/c_check index c564855f3..f86a37b5b 100644 --- a/c_check +++ b/c_check @@ -121,6 +121,10 @@ if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) { $binary =32; } +if ($architecture eq "riscv64") { + $defined = 1; +} + if ($compiler eq "PGI") { $compiler_name .= " -tp p7" if ($binary eq "32"); $compiler_name .= " -tp p7-64" if ($binary eq "64"); diff --git a/lapack-netlib/BLAS/SRC/Makefile b/lapack-netlib/BLAS/SRC/Makefile index a436365aa..f7236318b 100644 --- a/lapack-netlib/BLAS/SRC/Makefile +++ b/lapack-netlib/BLAS/SRC/Makefile @@ -138,23 +138,23 @@ ALLOBJ = $(SBLAS1) $(SBLAS2) $(SBLAS3) $(DBLAS1) $(DBLAS2) $(DBLAS3) \ $(ZBLAS2) $(ZBLAS3) $(ALLBLAS) $(BLASLIB): $(ALLOBJ) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ + $(AR) $(ARFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ + $(AR) $(ARFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ + $(AR) $(ARFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ + $(AR) $(ARFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) FRC: diff --git a/lapack-netlib/CBLAS/src/Makefile b/lapack-netlib/CBLAS/src/Makefile index 6c0518ac7..9b9063d8d 100644 --- a/lapack-netlib/CBLAS/src/Makefile +++ b/lapack-netlib/CBLAS/src/Makefile @@ -45,22 +45,22 @@ sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o # Single precision real slib1: $(slev1) $(sclev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision real dlib1: $(dlev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Single precision complex clib1: $(clev1) $(sclev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision complex zlib1: $(zlev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # @@ -97,22 +97,22 @@ zlev2 = cblas_zgemv.o cblas_zgbmv.o cblas_zhemv.o cblas_zhbmv.o cblas_zhpmv.o \ # Single precision real slib2: $(slev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision real dlib2: $(dlev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Single precision complex clib2: $(clev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision complex zlib2: $(zlev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # @@ -143,22 +143,22 @@ zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o \ # Single precision real slib3: $(slev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision real dlib3: $(dlev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Single precision complex clib3: $(clev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision complex zlib3: $(zlev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) @@ -168,22 +168,22 @@ alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3) # All level 1 all1: $(alev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # All level 2 all2: $(alev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # All level 3 all3: $(alev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ + $(AR) $(ARFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # All levels and precisions $(CBLASLIB): $(alev1) $(alev2) $(alev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ FRC: diff --git a/lapack-netlib/DOCS/lawn81.tex b/lapack-netlib/DOCS/lawn81.tex index 291735299..01c7c39e2 100644 --- a/lapack-netlib/DOCS/lawn81.tex +++ b/lapack-netlib/DOCS/lawn81.tex @@ -466,7 +466,7 @@ TIMER = EXT_ETIME Refer to the section~\ref{second} to get more information. -Next, you will need to modify \texttt{ARCH}, \texttt{ARCHFLAGS}, and \texttt{RANLIB} to specify archiver, +Next, you will need to modify \texttt{AR}, \texttt{ARFLAGS}, and \texttt{RANLIB} to specify archiver, archiver options, and ranlib for your machine. If your architecture does not require \texttt{ranlib} to be run after each archive command (as is the case with CRAY computers running UNICOS, Hewlett Packard diff --git a/lapack-netlib/INSTALL/make.inc.ALPHA b/lapack-netlib/INSTALL/make.inc.ALPHA index 0ceeaa155..049cf0b13 100644 --- a/lapack-netlib/INSTALL/make.inc.ALPHA +++ b/lapack-netlib/INSTALL/make.inc.ALPHA @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.HPPA b/lapack-netlib/INSTALL/make.inc.HPPA index 8eabbbdf4..2bd8ee16e 100644 --- a/lapack-netlib/INSTALL/make.inc.HPPA +++ b/lapack-netlib/INSTALL/make.inc.HPPA @@ -29,8 +29,8 @@ LOADOPTS = -Aa +U77 # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.IRIX64 b/lapack-netlib/INSTALL/make.inc.IRIX64 index d9e71e1bf..0f57941b5 100644 --- a/lapack-netlib/INSTALL/make.inc.IRIX64 +++ b/lapack-netlib/INSTALL/make.inc.IRIX64 @@ -32,8 +32,8 @@ LOADOPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.O2K b/lapack-netlib/INSTALL/make.inc.O2K index 3ffcadacc..d99beca41 100644 --- a/lapack-netlib/INSTALL/make.inc.O2K +++ b/lapack-netlib/INSTALL/make.inc.O2K @@ -32,8 +32,8 @@ LOADOPTS = -O3 -64 -mips4 -r10000 # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.SGI5 b/lapack-netlib/INSTALL/make.inc.SGI5 index c7019ac16..c4a702d48 100644 --- a/lapack-netlib/INSTALL/make.inc.SGI5 +++ b/lapack-netlib/INSTALL/make.inc.SGI5 @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.SUN4 b/lapack-netlib/INSTALL/make.inc.SUN4 index 4e44f1beb..6a78e9576 100644 --- a/lapack-netlib/INSTALL/make.inc.SUN4 +++ b/lapack-netlib/INSTALL/make.inc.SUN4 @@ -29,8 +29,8 @@ LOADOPTS = -dalign -O4 -fast # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.SUN4SOL2 b/lapack-netlib/INSTALL/make.inc.SUN4SOL2 index e6d79add3..0ac3cc4e4 100644 --- a/lapack-netlib/INSTALL/make.inc.SUN4SOL2 +++ b/lapack-netlib/INSTALL/make.inc.SUN4SOL2 @@ -33,8 +33,8 @@ LOADOPTS = -f -dalign -native -xO2 -xarch=v8plusa # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.XLF b/lapack-netlib/INSTALL/make.inc.XLF index 9466ee332..27e22cac9 100644 --- a/lapack-netlib/INSTALL/make.inc.XLF +++ b/lapack-netlib/INSTALL/make.inc.XLF @@ -30,8 +30,8 @@ LOADOPTS = -qnosave # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.gfortran b/lapack-netlib/INSTALL/make.inc.gfortran index 39d98d4d4..b342b18a8 100644 --- a/lapack-netlib/INSTALL/make.inc.gfortran +++ b/lapack-netlib/INSTALL/make.inc.gfortran @@ -33,8 +33,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.gfortran_debug b/lapack-netlib/INSTALL/make.inc.gfortran_debug index 10e6381df..1eaed2102 100644 --- a/lapack-netlib/INSTALL/make.inc.gfortran_debug +++ b/lapack-netlib/INSTALL/make.inc.gfortran_debug @@ -33,8 +33,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.ifort b/lapack-netlib/INSTALL/make.inc.ifort index b067bd484..a3c37428e 100644 --- a/lapack-netlib/INSTALL/make.inc.ifort +++ b/lapack-netlib/INSTALL/make.inc.ifort @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.pgf95 b/lapack-netlib/INSTALL/make.inc.pgf95 index a9a5cec98..931ff378f 100644 --- a/lapack-netlib/INSTALL/make.inc.pgf95 +++ b/lapack-netlib/INSTALL/make.inc.pgf95 @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.pghpf b/lapack-netlib/INSTALL/make.inc.pghpf index 1d9bf549c..0dfe8c683 100644 --- a/lapack-netlib/INSTALL/make.inc.pghpf +++ b/lapack-netlib/INSTALL/make.inc.pghpf @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/LAPACKE/src/Makefile b/lapack-netlib/LAPACKE/src/Makefile index 44884d4a5..03c140bf7 100644 --- a/lapack-netlib/LAPACKE/src/Makefile +++ b/lapack-netlib/LAPACKE/src/Makefile @@ -2455,16 +2455,16 @@ endif all: ../../$(LAPACKELIB) ../../$(LAPACKELIB): $(OBJ_A) $(OBJ_B) $(DEPRECATED) $(EXTENDED) $(MATGEN) - $(ARCH) $(ARCHFLAGS) $@ $(OBJ_A) - $(ARCH) $(ARCHFLAGS) $@ $(OBJ_B) + $(AR) $(ARFLAGS) $@ $(OBJ_A) + $(AR) $(ARFLAGS) $@ $(OBJ_B) ifdef BUILD_DEPRECATED - $(ARCH) $(ARCHFLAGS) $@ $(DEPRECATED) + $(AR) $(ARFLAGS) $@ $(DEPRECATED) endif ifdef (USEXBLAS) - $(ARCH) $(ARCHFLAGS) $@ $(EXTENDED) + $(AR) $(ARFLAGS) $@ $(EXTENDED) endif ifdef LAPACKE_WITH_TMG - $(ARCH) $(ARCHFLAGS) $@ $(MATGEN) + $(AR) $(ARFLAGS) $@ $(MATGEN) endif $(RANLIB) $@ diff --git a/lapack-netlib/LAPACKE/utils/Makefile b/lapack-netlib/LAPACKE/utils/Makefile index 1f639c6ea..c6204ee3b 100644 --- a/lapack-netlib/LAPACKE/utils/Makefile +++ b/lapack-netlib/LAPACKE/utils/Makefile @@ -186,7 +186,7 @@ OBJ = lapacke_cgb_nancheck.o \ all: lib lib: $(OBJ) - $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $^ + $(AR) $(ARFLAGS) ../../$(LAPACKELIB) $^ $(RANLIB) ../../$(LAPACKELIB) clean: cleanobj diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index 531cb51fc..e5bb7a3db 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -553,26 +553,26 @@ endif all: ../$(LAPACKLIB) ../$(LAPACKLIB): $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) - $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) + $(AR) $(ARFLAGS) $@ $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) $(RANLIB) $@ single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX) - $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \ + $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \ $(SXLASRC) $(SCLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) complex: $(CLASRC) $(ZCLASRC) $(CXLASRC) $(SCLAUX) $(ALLAUX) - $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(CLASRC) $(ZCLASRC) \ + $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(CLASRC) $(ZCLASRC) \ $(CXLASRC) $(SCLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) double: $(DLASRC) $(DSLASRC) $(DXLASRC) $(DZLAUX) $(ALLAUX) - $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(DLASRC) $(DSLASRC) \ + $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(DLASRC) $(DSLASRC) \ $(DXLASRC) $(DZLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) complex16: $(ZLASRC) $(ZCLASRC) $(ZXLASRC) $(DZLAUX) $(ALLAUX) - $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(ZLASRC) $(ZCLASRC) \ + $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(ZLASRC) $(ZCLASRC) \ $(ZXLASRC) $(DZLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) diff --git a/lapack-netlib/SRC/VARIANTS/Makefile b/lapack-netlib/SRC/VARIANTS/Makefile index 9f1410755..7d0e8824c 100644 --- a/lapack-netlib/SRC/VARIANTS/Makefile +++ b/lapack-netlib/SRC/VARIANTS/Makefile @@ -33,27 +33,27 @@ QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a cholrl.a: $(CHOLRL) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ choltop.a: $(CHOLTOP) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ lucr.a: $(LUCR) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ lull.a: $(LULL) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ lurec.a: $(LUREC) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ qrll.a: $(QRLL) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ clean: cleanobj cleanlib diff --git a/lapack-netlib/TESTING/MATGEN/Makefile b/lapack-netlib/TESTING/MATGEN/Makefile index e20004c2f..f5ea5a8c0 100644 --- a/lapack-netlib/TESTING/MATGEN/Makefile +++ b/lapack-netlib/TESTING/MATGEN/Makefile @@ -58,23 +58,23 @@ ALLOBJ = $(SMATGEN) $(CMATGEN) $(SCATGEN) $(DMATGEN) $(ZMATGEN) \ $(DZATGEN) ../../$(TMGLIB): $(ALLOBJ) - $(ARCH) $(ARCHFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ single: $(SMATGEN) $(SCATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ + $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) complex: $(CMATGEN) $(SCATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ + $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) double: $(DMATGEN) $(DZATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ + $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) complex16: $(ZMATGEN) $(DZATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ + $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) $(SCATGEN): $(FRC) diff --git a/lapack-netlib/make.inc.example b/lapack-netlib/make.inc.example index d780c3a23..3ddb9eafc 100644 --- a/lapack-netlib/make.inc.example +++ b/lapack-netlib/make.inc.example @@ -33,8 +33,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/make.inc b/make.inc index b6ed098c0..93b355103 100644 --- a/make.inc +++ b/make.inc @@ -1,6 +1,6 @@ SHELL = /bin/sh PLAT = _LINUX DRVOPTS = $(NOOPT) -#ARCHFLAGS= $(ARFLAGS) -ru +#ARFLAGS= $(ARFLAGS) -ru #RANLIB = ranlib From 0b7ccb9e381d4bc3d0149c158631389c2c2d411c Mon Sep 17 00:00:00 2001 From: Dumi Loghin Date: Thu, 6 Sep 2018 13:08:30 +0800 Subject: [PATCH 4/9] Revert "replace ARCH with AR in lapack-netlib" This reverts commit db17ce896fbbf53cbef34f81e1f1ec6887965ec4. --- Makefile | 4 +-- c_check | 4 --- lapack-netlib/BLAS/SRC/Makefile | 10 +++--- lapack-netlib/CBLAS/src/Makefile | 32 +++++++++---------- lapack-netlib/DOCS/lawn81.tex | 2 +- lapack-netlib/INSTALL/make.inc.ALPHA | 4 +-- lapack-netlib/INSTALL/make.inc.HPPA | 4 +-- lapack-netlib/INSTALL/make.inc.IRIX64 | 4 +-- lapack-netlib/INSTALL/make.inc.O2K | 4 +-- lapack-netlib/INSTALL/make.inc.SGI5 | 4 +-- lapack-netlib/INSTALL/make.inc.SUN4 | 4 +-- lapack-netlib/INSTALL/make.inc.SUN4SOL2 | 4 +-- lapack-netlib/INSTALL/make.inc.XLF | 4 +-- lapack-netlib/INSTALL/make.inc.gfortran | 4 +-- lapack-netlib/INSTALL/make.inc.gfortran_debug | 4 +-- lapack-netlib/INSTALL/make.inc.ifort | 4 +-- lapack-netlib/INSTALL/make.inc.pgf95 | 4 +-- lapack-netlib/INSTALL/make.inc.pghpf | 4 +-- lapack-netlib/LAPACKE/src/Makefile | 10 +++--- lapack-netlib/LAPACKE/utils/Makefile | 2 +- lapack-netlib/SRC/Makefile | 10 +++--- lapack-netlib/SRC/VARIANTS/Makefile | 12 +++---- lapack-netlib/TESTING/MATGEN/Makefile | 10 +++--- lapack-netlib/make.inc.example | 4 +-- make.inc | 2 +- 25 files changed, 75 insertions(+), 79 deletions(-) diff --git a/Makefile b/Makefile index aaeb0c498..c0e5fbcf8 100644 --- a/Makefile +++ b/Makefile @@ -237,8 +237,8 @@ ifndef NOFORTRAN -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc diff --git a/c_check b/c_check index f86a37b5b..c564855f3 100644 --- a/c_check +++ b/c_check @@ -121,10 +121,6 @@ if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) { $binary =32; } -if ($architecture eq "riscv64") { - $defined = 1; -} - if ($compiler eq "PGI") { $compiler_name .= " -tp p7" if ($binary eq "32"); $compiler_name .= " -tp p7-64" if ($binary eq "64"); diff --git a/lapack-netlib/BLAS/SRC/Makefile b/lapack-netlib/BLAS/SRC/Makefile index f7236318b..a436365aa 100644 --- a/lapack-netlib/BLAS/SRC/Makefile +++ b/lapack-netlib/BLAS/SRC/Makefile @@ -138,23 +138,23 @@ ALLOBJ = $(SBLAS1) $(SBLAS2) $(SBLAS3) $(DBLAS1) $(DBLAS2) $(DBLAS3) \ $(ZBLAS2) $(ZBLAS3) $(ALLBLAS) $(BLASLIB): $(ALLOBJ) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3) - $(AR) $(ARFLAGS) $(BLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3) - $(AR) $(ARFLAGS) $(BLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3) - $(AR) $(ARFLAGS) $(BLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3) - $(AR) $(ARFLAGS) $(BLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ $(RANLIB) $(BLASLIB) FRC: diff --git a/lapack-netlib/CBLAS/src/Makefile b/lapack-netlib/CBLAS/src/Makefile index 9b9063d8d..6c0518ac7 100644 --- a/lapack-netlib/CBLAS/src/Makefile +++ b/lapack-netlib/CBLAS/src/Makefile @@ -45,22 +45,22 @@ sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o # Single precision real slib1: $(slev1) $(sclev1) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision real dlib1: $(dlev1) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Single precision complex clib1: $(clev1) $(sclev1) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision complex zlib1: $(zlev1) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # @@ -97,22 +97,22 @@ zlev2 = cblas_zgemv.o cblas_zgbmv.o cblas_zhemv.o cblas_zhbmv.o cblas_zhpmv.o \ # Single precision real slib2: $(slev2) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision real dlib2: $(dlev2) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Single precision complex clib2: $(clev2) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision complex zlib2: $(zlev2) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # @@ -143,22 +143,22 @@ zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o \ # Single precision real slib3: $(slev3) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision real dlib3: $(dlev3) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Single precision complex clib3: $(clev3) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # Double precision complex zlib3: $(zlev3) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) @@ -168,22 +168,22 @@ alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3) # All level 1 all1: $(alev1) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # All level 2 all2: $(alev2) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # All level 3 all3: $(alev3) $(errhand) - $(AR) $(ARFLAGS) $(CBLASLIB) $^ + $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ $(RANLIB) $(CBLASLIB) # All levels and precisions $(CBLASLIB): $(alev1) $(alev2) $(alev3) $(errhand) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ FRC: diff --git a/lapack-netlib/DOCS/lawn81.tex b/lapack-netlib/DOCS/lawn81.tex index 01c7c39e2..291735299 100644 --- a/lapack-netlib/DOCS/lawn81.tex +++ b/lapack-netlib/DOCS/lawn81.tex @@ -466,7 +466,7 @@ TIMER = EXT_ETIME Refer to the section~\ref{second} to get more information. -Next, you will need to modify \texttt{AR}, \texttt{ARFLAGS}, and \texttt{RANLIB} to specify archiver, +Next, you will need to modify \texttt{ARCH}, \texttt{ARCHFLAGS}, and \texttt{RANLIB} to specify archiver, archiver options, and ranlib for your machine. If your architecture does not require \texttt{ranlib} to be run after each archive command (as is the case with CRAY computers running UNICOS, Hewlett Packard diff --git a/lapack-netlib/INSTALL/make.inc.ALPHA b/lapack-netlib/INSTALL/make.inc.ALPHA index 049cf0b13..0ceeaa155 100644 --- a/lapack-netlib/INSTALL/make.inc.ALPHA +++ b/lapack-netlib/INSTALL/make.inc.ALPHA @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.HPPA b/lapack-netlib/INSTALL/make.inc.HPPA index 2bd8ee16e..8eabbbdf4 100644 --- a/lapack-netlib/INSTALL/make.inc.HPPA +++ b/lapack-netlib/INSTALL/make.inc.HPPA @@ -29,8 +29,8 @@ LOADOPTS = -Aa +U77 # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.IRIX64 b/lapack-netlib/INSTALL/make.inc.IRIX64 index 0f57941b5..d9e71e1bf 100644 --- a/lapack-netlib/INSTALL/make.inc.IRIX64 +++ b/lapack-netlib/INSTALL/make.inc.IRIX64 @@ -32,8 +32,8 @@ LOADOPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.O2K b/lapack-netlib/INSTALL/make.inc.O2K index d99beca41..3ffcadacc 100644 --- a/lapack-netlib/INSTALL/make.inc.O2K +++ b/lapack-netlib/INSTALL/make.inc.O2K @@ -32,8 +32,8 @@ LOADOPTS = -O3 -64 -mips4 -r10000 # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.SGI5 b/lapack-netlib/INSTALL/make.inc.SGI5 index c4a702d48..c7019ac16 100644 --- a/lapack-netlib/INSTALL/make.inc.SGI5 +++ b/lapack-netlib/INSTALL/make.inc.SGI5 @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.SUN4 b/lapack-netlib/INSTALL/make.inc.SUN4 index 6a78e9576..4e44f1beb 100644 --- a/lapack-netlib/INSTALL/make.inc.SUN4 +++ b/lapack-netlib/INSTALL/make.inc.SUN4 @@ -29,8 +29,8 @@ LOADOPTS = -dalign -O4 -fast # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.SUN4SOL2 b/lapack-netlib/INSTALL/make.inc.SUN4SOL2 index 0ac3cc4e4..e6d79add3 100644 --- a/lapack-netlib/INSTALL/make.inc.SUN4SOL2 +++ b/lapack-netlib/INSTALL/make.inc.SUN4SOL2 @@ -33,8 +33,8 @@ LOADOPTS = -f -dalign -native -xO2 -xarch=v8plusa # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.XLF b/lapack-netlib/INSTALL/make.inc.XLF index 27e22cac9..9466ee332 100644 --- a/lapack-netlib/INSTALL/make.inc.XLF +++ b/lapack-netlib/INSTALL/make.inc.XLF @@ -30,8 +30,8 @@ LOADOPTS = -qnosave # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.gfortran b/lapack-netlib/INSTALL/make.inc.gfortran index b342b18a8..39d98d4d4 100644 --- a/lapack-netlib/INSTALL/make.inc.gfortran +++ b/lapack-netlib/INSTALL/make.inc.gfortran @@ -33,8 +33,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.gfortran_debug b/lapack-netlib/INSTALL/make.inc.gfortran_debug index 1eaed2102..10e6381df 100644 --- a/lapack-netlib/INSTALL/make.inc.gfortran_debug +++ b/lapack-netlib/INSTALL/make.inc.gfortran_debug @@ -33,8 +33,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.ifort b/lapack-netlib/INSTALL/make.inc.ifort index a3c37428e..b067bd484 100644 --- a/lapack-netlib/INSTALL/make.inc.ifort +++ b/lapack-netlib/INSTALL/make.inc.ifort @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.pgf95 b/lapack-netlib/INSTALL/make.inc.pgf95 index 931ff378f..a9a5cec98 100644 --- a/lapack-netlib/INSTALL/make.inc.pgf95 +++ b/lapack-netlib/INSTALL/make.inc.pgf95 @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/INSTALL/make.inc.pghpf b/lapack-netlib/INSTALL/make.inc.pghpf index 0dfe8c683..1d9bf549c 100644 --- a/lapack-netlib/INSTALL/make.inc.pghpf +++ b/lapack-netlib/INSTALL/make.inc.pghpf @@ -29,8 +29,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = echo # Timer for the SECOND and DSECND routines diff --git a/lapack-netlib/LAPACKE/src/Makefile b/lapack-netlib/LAPACKE/src/Makefile index 03c140bf7..44884d4a5 100644 --- a/lapack-netlib/LAPACKE/src/Makefile +++ b/lapack-netlib/LAPACKE/src/Makefile @@ -2455,16 +2455,16 @@ endif all: ../../$(LAPACKELIB) ../../$(LAPACKELIB): $(OBJ_A) $(OBJ_B) $(DEPRECATED) $(EXTENDED) $(MATGEN) - $(AR) $(ARFLAGS) $@ $(OBJ_A) - $(AR) $(ARFLAGS) $@ $(OBJ_B) + $(ARCH) $(ARCHFLAGS) $@ $(OBJ_A) + $(ARCH) $(ARCHFLAGS) $@ $(OBJ_B) ifdef BUILD_DEPRECATED - $(AR) $(ARFLAGS) $@ $(DEPRECATED) + $(ARCH) $(ARCHFLAGS) $@ $(DEPRECATED) endif ifdef (USEXBLAS) - $(AR) $(ARFLAGS) $@ $(EXTENDED) + $(ARCH) $(ARCHFLAGS) $@ $(EXTENDED) endif ifdef LAPACKE_WITH_TMG - $(AR) $(ARFLAGS) $@ $(MATGEN) + $(ARCH) $(ARCHFLAGS) $@ $(MATGEN) endif $(RANLIB) $@ diff --git a/lapack-netlib/LAPACKE/utils/Makefile b/lapack-netlib/LAPACKE/utils/Makefile index c6204ee3b..1f639c6ea 100644 --- a/lapack-netlib/LAPACKE/utils/Makefile +++ b/lapack-netlib/LAPACKE/utils/Makefile @@ -186,7 +186,7 @@ OBJ = lapacke_cgb_nancheck.o \ all: lib lib: $(OBJ) - $(AR) $(ARFLAGS) ../../$(LAPACKELIB) $^ + $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $^ $(RANLIB) ../../$(LAPACKELIB) clean: cleanobj diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index e5bb7a3db..531cb51fc 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -553,26 +553,26 @@ endif all: ../$(LAPACKLIB) ../$(LAPACKLIB): $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) - $(AR) $(ARFLAGS) $@ $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) + $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) $(RANLIB) $@ single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX) - $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \ + $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \ $(SXLASRC) $(SCLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) complex: $(CLASRC) $(ZCLASRC) $(CXLASRC) $(SCLAUX) $(ALLAUX) - $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(CLASRC) $(ZCLASRC) \ + $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(CLASRC) $(ZCLASRC) \ $(CXLASRC) $(SCLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) double: $(DLASRC) $(DSLASRC) $(DXLASRC) $(DZLAUX) $(ALLAUX) - $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(DLASRC) $(DSLASRC) \ + $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(DLASRC) $(DSLASRC) \ $(DXLASRC) $(DZLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) complex16: $(ZLASRC) $(ZCLASRC) $(ZXLASRC) $(DZLAUX) $(ALLAUX) - $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(ZLASRC) $(ZCLASRC) \ + $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(ZLASRC) $(ZCLASRC) \ $(ZXLASRC) $(DZLAUX) $(ALLAUX) $(RANLIB) ../$(LAPACKLIB) diff --git a/lapack-netlib/SRC/VARIANTS/Makefile b/lapack-netlib/SRC/VARIANTS/Makefile index 7d0e8824c..9f1410755 100644 --- a/lapack-netlib/SRC/VARIANTS/Makefile +++ b/lapack-netlib/SRC/VARIANTS/Makefile @@ -33,27 +33,27 @@ QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a cholrl.a: $(CHOLRL) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ choltop.a: $(CHOLTOP) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ lucr.a: $(LUCR) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ lull.a: $(LULL) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ lurec.a: $(LUREC) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ qrll.a: $(QRLL) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ clean: cleanobj cleanlib diff --git a/lapack-netlib/TESTING/MATGEN/Makefile b/lapack-netlib/TESTING/MATGEN/Makefile index f5ea5a8c0..e20004c2f 100644 --- a/lapack-netlib/TESTING/MATGEN/Makefile +++ b/lapack-netlib/TESTING/MATGEN/Makefile @@ -58,23 +58,23 @@ ALLOBJ = $(SMATGEN) $(CMATGEN) $(SCATGEN) $(DMATGEN) $(ZMATGEN) \ $(DZATGEN) ../../$(TMGLIB): $(ALLOBJ) - $(AR) $(ARFLAGS) $@ $^ + $(ARCH) $(ARCHFLAGS) $@ $^ $(RANLIB) $@ single: $(SMATGEN) $(SCATGEN) - $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ + $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) complex: $(CMATGEN) $(SCATGEN) - $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ + $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) double: $(DMATGEN) $(DZATGEN) - $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ + $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) complex16: $(ZMATGEN) $(DZATGEN) - $(AR) $(ARFLAGS) ../../$(TMGLIB) $^ + $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $^ $(RANLIB) ../../$(TMGLIB) $(SCATGEN): $(FRC) diff --git a/lapack-netlib/make.inc.example b/lapack-netlib/make.inc.example index 3ddb9eafc..d780c3a23 100644 --- a/lapack-netlib/make.inc.example +++ b/lapack-netlib/make.inc.example @@ -33,8 +33,8 @@ LOADOPTS = # The archiver and the flag(s) to use when building an archive # (library). If your system has no ranlib, set RANLIB = echo. # -AR = ar -ARFLAGS = cr +ARCH = ar +ARCHFLAGS = cr RANLIB = ranlib # Timer for the SECOND and DSECND routines diff --git a/make.inc b/make.inc index 93b355103..b6ed098c0 100644 --- a/make.inc +++ b/make.inc @@ -1,6 +1,6 @@ SHELL = /bin/sh PLAT = _LINUX DRVOPTS = $(NOOPT) -#ARFLAGS= $(ARFLAGS) -ru +#ARCHFLAGS= $(ARFLAGS) -ru #RANLIB = ranlib From a1bdc308b8d4dcb924f339ca5018c12a455d2652 Mon Sep 17 00:00:00 2001 From: Dumi Loghin Date: Thu, 6 Sep 2018 13:13:36 +0800 Subject: [PATCH 5/9] override ARCH (archiver) in lapack-netlib/make.inc --- Makefile | 2 +- c_check | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c0e5fbcf8..547feb0d2 100644 --- a/Makefile +++ b/Makefile @@ -237,7 +237,7 @@ ifndef NOFORTRAN -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "override ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc diff --git a/c_check b/c_check index c564855f3..eb302b71a 100644 --- a/c_check +++ b/c_check @@ -121,6 +121,11 @@ if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) { $binary =32; } +if ($architecture eq "riscv64") { + $defined = 1; + $binary = 64; +} + if ($compiler eq "PGI") { $compiler_name .= " -tp p7" if ($binary eq "32"); $compiler_name .= " -tp p7-64" if ($binary eq "64"); From 44020a42a453579740fd16cd23e76f4267c41b65 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Thu, 27 Feb 2020 14:29:42 +0800 Subject: [PATCH 6/9] Fixed compile bug for RV64. --- kernel/riscv64/KERNEL | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/riscv64/KERNEL b/kernel/riscv64/KERNEL index 04d82b4ce..ea6a8cf21 100644 --- a/kernel/riscv64/KERNEL +++ b/kernel/riscv64/KERNEL @@ -35,6 +35,11 @@ DASUMKERNEL = ../riscv64/asum.c CASUMKERNEL = ../riscv64/zasum.c ZASUMKERNEL = ../riscv64/zasum.c +SSUMKERNEL = ../arm/sum.c +DSUMKERNEL = ../arm/sum.c +CSUMKERNEL = ../arm/zsum.c +ZSUMKERNEL = ../arm/zsum.c + SAXPYKERNEL = ../riscv64/axpy.c DAXPYKERNEL = ../riscv64/axpy.c CAXPYKERNEL = ../riscv64/zaxpy.c From 265ab484c89d10dfff2d5df678221918d7a880e3 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Thu, 27 Feb 2020 14:46:15 +0800 Subject: [PATCH 7/9] Change default RISC-V 64-bit corename to RISCV64_GENERIC e.g. make CC=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran TARGET=RISCV64_GENERIC HOSTCC=gcc --- TargetList.txt | 3 + getarch.c | 10 +- kernel/riscv64/KERNEL | 162 +++---------------------- kernel/riscv64/KERNEL.RISCV64_GENERIC | 164 ++++++++++++++++++++++++++ param.h | 2 +- 5 files changed, 187 insertions(+), 154 deletions(-) create mode 100644 kernel/riscv64/KERNEL.RISCV64_GENERIC diff --git a/TargetList.txt b/TargetList.txt index 6a57bf1af..3b018e17a 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -97,3 +97,6 @@ TSV110 ZARCH_GENERIC Z13 Z14 + +10.RISC-V 64: +RISCV64_GENERIC diff --git a/getarch.c b/getarch.c index d0d260577..58706c452 100644 --- a/getarch.c +++ b/getarch.c @@ -906,17 +906,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #endif -#ifdef FORCE_RISCV64 +#ifdef FORCE_RISCV64_GENERIC #define FORCE #define ARCHITECTURE "RISCV64" -#define SUBARCHITECTURE "RISCV64" +#define SUBARCHITECTURE "RISCV64_GENERIC" #define SUBDIRNAME "riscv64" -#define ARCHCONFIG "-DRISCV64 " \ +#define ARCHCONFIG "-DRISCV64_GENERIC " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " -#define LIBNAME "riscv64" -#define CORENAME "RISCV64" +#define LIBNAME "riscv64_generic" +#define CORENAME "RISCV64_GENERIC" #else #endif diff --git a/kernel/riscv64/KERNEL b/kernel/riscv64/KERNEL index ea6a8cf21..68d68b5f8 100644 --- a/kernel/riscv64/KERNEL +++ b/kernel/riscv64/KERNEL @@ -1,154 +1,18 @@ -SAMAXKERNEL = ../riscv64/amax.c -DAMAXKERNEL = ../riscv64/amax.c -CAMAXKERNEL = ../riscv64/zamax.c -ZAMAXKERNEL = ../riscv64/zamax.c - -SAMINKERNEL = ../riscv64/amin.c -DAMINKERNEL = ../riscv64/amin.c -CAMINKERNEL = ../riscv64/zamin.c -ZAMINKERNEL = ../riscv64/zamin.c - -SMAXKERNEL = ../riscv64/max.c -DMAXKERNEL = ../riscv64/max.c - -SMINKERNEL = ../riscv64/min.c -DMINKERNEL = ../riscv64/min.c - -ISAMAXKERNEL = ../riscv64/iamax.c -IDAMAXKERNEL = ../riscv64/iamax.c -ICAMAXKERNEL = ../riscv64/izamax.c -IZAMAXKERNEL = ../riscv64/izamax.c - -ISAMINKERNEL = ../riscv64/iamin.c -IDAMINKERNEL = ../riscv64/iamin.c -ICAMINKERNEL = ../riscv64/izamin.c -IZAMINKERNEL = ../riscv64/izamin.c - -ISMAXKERNEL = ../riscv64/imax.c -IDMAXKERNEL = ../riscv64/imax.c - -ISMINKERNEL = ../riscv64/imin.c -IDMINKERNEL = ../riscv64/imin.c - -SASUMKERNEL = ../riscv64/asum.c -DASUMKERNEL = ../riscv64/asum.c -CASUMKERNEL = ../riscv64/zasum.c -ZASUMKERNEL = ../riscv64/zasum.c - -SSUMKERNEL = ../arm/sum.c -DSUMKERNEL = ../arm/sum.c -CSUMKERNEL = ../arm/zsum.c -ZSUMKERNEL = ../arm/zsum.c - -SAXPYKERNEL = ../riscv64/axpy.c -DAXPYKERNEL = ../riscv64/axpy.c -CAXPYKERNEL = ../riscv64/zaxpy.c -ZAXPYKERNEL = ../riscv64/zaxpy.c - -SCOPYKERNEL = ../riscv64/copy.c -DCOPYKERNEL = ../riscv64/copy.c -CCOPYKERNEL = ../riscv64/zcopy.c -ZCOPYKERNEL = ../riscv64/zcopy.c - -SDOTKERNEL = ../riscv64/dot.c -DDOTKERNEL = ../riscv64/dot.c -CDOTKERNEL = ../riscv64/zdot.c -ZDOTKERNEL = ../riscv64/zdot.c - -SNRM2KERNEL = ../riscv64/nrm2.c -DNRM2KERNEL = ../riscv64/nrm2.c -CNRM2KERNEL = ../riscv64/znrm2.c -ZNRM2KERNEL = ../riscv64/znrm2.c - -SROTKERNEL = ../riscv64/rot.c -DROTKERNEL = ../riscv64/rot.c -CROTKERNEL = ../riscv64/zrot.c -ZROTKERNEL = ../riscv64/zrot.c - -SSCALKERNEL = ../riscv64/scal.c -DSCALKERNEL = ../riscv64/scal.c -CSCALKERNEL = ../riscv64/zscal.c -ZSCALKERNEL = ../riscv64/zscal.c - -SSWAPKERNEL = ../riscv64/swap.c -DSWAPKERNEL = ../riscv64/swap.c -CSWAPKERNEL = ../riscv64/zswap.c -ZSWAPKERNEL = ../riscv64/zswap.c - -SGEMVNKERNEL = ../riscv64/gemv_n.c -DGEMVNKERNEL = ../riscv64/gemv_n.c -CGEMVNKERNEL = ../riscv64/zgemv_n.c -ZGEMVNKERNEL = ../riscv64/zgemv_n.c - -SGEMVTKERNEL = ../riscv64/gemv_t.c -DGEMVTKERNEL = ../riscv64/gemv_t.c -CGEMVTKERNEL = ../riscv64/zgemv_t.c -ZGEMVTKERNEL = ../riscv64/zgemv_t.c - -STRMMKERNEL = ../generic/trmmkernel_2x2.c -DTRMMKERNEL = ../generic/trmmkernel_2x2.c -CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c -ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c - -SGEMMKERNEL = ../generic/gemmkernel_2x2.c -SGEMMONCOPY = ../generic/gemm_ncopy_2.c -SGEMMOTCOPY = ../generic/gemm_tcopy_2.c -SGEMMONCOPYOBJ = sgemm_oncopy.o -SGEMMOTCOPYOBJ = sgemm_otcopy.o - -DGEMMKERNEL = ../generic/gemmkernel_2x2.c -DGEMMONCOPY = ../generic/gemm_ncopy_2.c -DGEMMOTCOPY = ../generic/gemm_tcopy_2.c -DGEMMONCOPYOBJ = dgemm_oncopy.o -DGEMMOTCOPYOBJ = dgemm_otcopy.o - -CGEMMKERNEL = ../generic/zgemmkernel_2x2.c -CGEMMONCOPY = ../generic/zgemm_ncopy_2.c -CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c -CGEMMONCOPYOBJ = cgemm_oncopy.o -CGEMMOTCOPYOBJ = cgemm_otcopy.o - -ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c -ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c -ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c -ZGEMMONCOPYOBJ = zgemm_oncopy.o -ZGEMMOTCOPYOBJ = zgemm_otcopy.o - -STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c - -DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c - -CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c - -ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c - -SSYMV_U_KERNEL = ../generic/symv_k.c -SSYMV_L_KERNEL = ../generic/symv_k.c -DSYMV_U_KERNEL = ../generic/symv_k.c -DSYMV_L_KERNEL = ../generic/symv_k.c -CSYMV_U_KERNEL = ../generic/zsymv_k.c -CSYMV_L_KERNEL = ../generic/zsymv_k.c -ZSYMV_U_KERNEL = ../generic/zsymv_k.c -ZSYMV_L_KERNEL = ../generic/zsymv_k.c - - -LSAME_KERNEL = ../generic/lsame.c - +ifndef SCABS_KERNEL SCABS_KERNEL = ../generic/cabs.c +endif + +ifndef DCABS_KERNEL DCABS_KERNEL = ../generic/cabs.c +endif + +ifndef QCABS_KERNEL QCABS_KERNEL = ../generic/cabs.c +endif + +ifndef LSAME_KERNEL +LSAME_KERNEL = ../generic/lsame.c +endif ifndef SGEMM_BETA SGEMM_BETA = ../generic/gemm_beta.c @@ -162,3 +26,5 @@ endif ifndef ZGEMM_BETA ZGEMM_BETA = ../generic/zgemm_beta.c endif + + diff --git a/kernel/riscv64/KERNEL.RISCV64_GENERIC b/kernel/riscv64/KERNEL.RISCV64_GENERIC new file mode 100644 index 000000000..ea6a8cf21 --- /dev/null +++ b/kernel/riscv64/KERNEL.RISCV64_GENERIC @@ -0,0 +1,164 @@ +SAMAXKERNEL = ../riscv64/amax.c +DAMAXKERNEL = ../riscv64/amax.c +CAMAXKERNEL = ../riscv64/zamax.c +ZAMAXKERNEL = ../riscv64/zamax.c + +SAMINKERNEL = ../riscv64/amin.c +DAMINKERNEL = ../riscv64/amin.c +CAMINKERNEL = ../riscv64/zamin.c +ZAMINKERNEL = ../riscv64/zamin.c + +SMAXKERNEL = ../riscv64/max.c +DMAXKERNEL = ../riscv64/max.c + +SMINKERNEL = ../riscv64/min.c +DMINKERNEL = ../riscv64/min.c + +ISAMAXKERNEL = ../riscv64/iamax.c +IDAMAXKERNEL = ../riscv64/iamax.c +ICAMAXKERNEL = ../riscv64/izamax.c +IZAMAXKERNEL = ../riscv64/izamax.c + +ISAMINKERNEL = ../riscv64/iamin.c +IDAMINKERNEL = ../riscv64/iamin.c +ICAMINKERNEL = ../riscv64/izamin.c +IZAMINKERNEL = ../riscv64/izamin.c + +ISMAXKERNEL = ../riscv64/imax.c +IDMAXKERNEL = ../riscv64/imax.c + +ISMINKERNEL = ../riscv64/imin.c +IDMINKERNEL = ../riscv64/imin.c + +SASUMKERNEL = ../riscv64/asum.c +DASUMKERNEL = ../riscv64/asum.c +CASUMKERNEL = ../riscv64/zasum.c +ZASUMKERNEL = ../riscv64/zasum.c + +SSUMKERNEL = ../arm/sum.c +DSUMKERNEL = ../arm/sum.c +CSUMKERNEL = ../arm/zsum.c +ZSUMKERNEL = ../arm/zsum.c + +SAXPYKERNEL = ../riscv64/axpy.c +DAXPYKERNEL = ../riscv64/axpy.c +CAXPYKERNEL = ../riscv64/zaxpy.c +ZAXPYKERNEL = ../riscv64/zaxpy.c + +SCOPYKERNEL = ../riscv64/copy.c +DCOPYKERNEL = ../riscv64/copy.c +CCOPYKERNEL = ../riscv64/zcopy.c +ZCOPYKERNEL = ../riscv64/zcopy.c + +SDOTKERNEL = ../riscv64/dot.c +DDOTKERNEL = ../riscv64/dot.c +CDOTKERNEL = ../riscv64/zdot.c +ZDOTKERNEL = ../riscv64/zdot.c + +SNRM2KERNEL = ../riscv64/nrm2.c +DNRM2KERNEL = ../riscv64/nrm2.c +CNRM2KERNEL = ../riscv64/znrm2.c +ZNRM2KERNEL = ../riscv64/znrm2.c + +SROTKERNEL = ../riscv64/rot.c +DROTKERNEL = ../riscv64/rot.c +CROTKERNEL = ../riscv64/zrot.c +ZROTKERNEL = ../riscv64/zrot.c + +SSCALKERNEL = ../riscv64/scal.c +DSCALKERNEL = ../riscv64/scal.c +CSCALKERNEL = ../riscv64/zscal.c +ZSCALKERNEL = ../riscv64/zscal.c + +SSWAPKERNEL = ../riscv64/swap.c +DSWAPKERNEL = ../riscv64/swap.c +CSWAPKERNEL = ../riscv64/zswap.c +ZSWAPKERNEL = ../riscv64/zswap.c + +SGEMVNKERNEL = ../riscv64/gemv_n.c +DGEMVNKERNEL = ../riscv64/gemv_n.c +CGEMVNKERNEL = ../riscv64/zgemv_n.c +ZGEMVNKERNEL = ../riscv64/zgemv_n.c + +SGEMVTKERNEL = ../riscv64/gemv_t.c +DGEMVTKERNEL = ../riscv64/gemv_t.c +CGEMVTKERNEL = ../riscv64/zgemv_t.c +ZGEMVTKERNEL = ../riscv64/zgemv_t.c + +STRMMKERNEL = ../generic/trmmkernel_2x2.c +DTRMMKERNEL = ../generic/trmmkernel_2x2.c +CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c + +SGEMMKERNEL = ../generic/gemmkernel_2x2.c +SGEMMONCOPY = ../generic/gemm_ncopy_2.c +SGEMMOTCOPY = ../generic/gemm_tcopy_2.c +SGEMMONCOPYOBJ = sgemm_oncopy.o +SGEMMOTCOPYOBJ = sgemm_otcopy.o + +DGEMMKERNEL = ../generic/gemmkernel_2x2.c +DGEMMONCOPY = ../generic/gemm_ncopy_2.c +DGEMMOTCOPY = ../generic/gemm_tcopy_2.c +DGEMMONCOPYOBJ = dgemm_oncopy.o +DGEMMOTCOPYOBJ = dgemm_otcopy.o + +CGEMMKERNEL = ../generic/zgemmkernel_2x2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPYOBJ = cgemm_oncopy.o +CGEMMOTCOPYOBJ = cgemm_otcopy.o + +ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPYOBJ = zgemm_oncopy.o +ZGEMMOTCOPYOBJ = zgemm_otcopy.o + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +SSYMV_U_KERNEL = ../generic/symv_k.c +SSYMV_L_KERNEL = ../generic/symv_k.c +DSYMV_U_KERNEL = ../generic/symv_k.c +DSYMV_L_KERNEL = ../generic/symv_k.c +CSYMV_U_KERNEL = ../generic/zsymv_k.c +CSYMV_L_KERNEL = ../generic/zsymv_k.c +ZSYMV_U_KERNEL = ../generic/zsymv_k.c +ZSYMV_L_KERNEL = ../generic/zsymv_k.c + + +LSAME_KERNEL = ../generic/lsame.c + +SCABS_KERNEL = ../generic/cabs.c +DCABS_KERNEL = ../generic/cabs.c +QCABS_KERNEL = ../generic/cabs.c + +ifndef SGEMM_BETA +SGEMM_BETA = ../generic/gemm_beta.c +endif +ifndef DGEMM_BETA +DGEMM_BETA = ../generic/gemm_beta.c +endif +ifndef CGEMM_BETA +CGEMM_BETA = ../generic/zgemm_beta.c +endif +ifndef ZGEMM_BETA +ZGEMM_BETA = ../generic/zgemm_beta.c +endif diff --git a/param.h b/param.h index d42724a57..4a7765012 100644 --- a/param.h +++ b/param.h @@ -2509,7 +2509,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif -#ifdef RISCV64 +#ifdef RISCV64_GENERIC #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_ALIGN 0x03fffUL From ef8e7d0279dfd1f9d9bec32b514a853d10bfdda7 Mon Sep 17 00:00:00 2001 From: damonyu Date: Thu, 15 Oct 2020 16:05:37 +0800 Subject: [PATCH 8/9] Add the support for RISC-V Vector. Change-Id: Iae7800a32f5af3903c330882cdf6f292d885f266 --- Makefile.prebuild | 4 + Makefile.riscv64 | 4 + Makefile.system | 5 +- TargetList.txt | 3 + c_check | 6 + common.h | 4 + common_riscv64.h | 98 + cpuid_riscv64.c | 113 ++ ctest.c | 4 + getarch.c | 33 + kernel/Makefile.L3 | 4 + kernel/generic/trmmkernel_16x4.c | 2092 ++++++++++++++++++++ kernel/generic/trmmkernel_8x4.c | 1317 +++++++++++++ kernel/generic/trmmkernel_8x8.c | 2207 ++++++++++++++++++++++ kernel/riscv64/KERNEL | 30 + kernel/riscv64/KERNEL.C910V | 190 ++ kernel/riscv64/KERNEL.RISCV64_GENERIC | 164 ++ kernel/riscv64/amax.c | 75 + kernel/riscv64/amax_vector.c | 245 +++ kernel/riscv64/amin.c | 75 + kernel/riscv64/amin_vector.c | 241 +++ kernel/riscv64/asum.c | 67 + kernel/riscv64/asum_vector.c | 131 ++ kernel/riscv64/axpby.c | 96 + kernel/riscv64/axpby_vector.c | 378 ++++ kernel/riscv64/axpy.c | 64 + kernel/riscv64/axpy_vector.c | 179 ++ kernel/riscv64/copy.c | 59 + kernel/riscv64/copy_vector.c | 148 ++ kernel/riscv64/dgemm_kernel_8x4_c910v.c | 977 ++++++++++ kernel/riscv64/dot.c | 64 + kernel/riscv64/dot_vector.c | 172 ++ kernel/riscv64/gemv_n.c | 67 + kernel/riscv64/gemv_n_vector.c | 146 ++ kernel/riscv64/gemv_t.c | 68 + kernel/riscv64/gemv_t_vector.c | 126 ++ kernel/riscv64/iamax.c | 77 + kernel/riscv64/iamax_vector.c | 191 ++ kernel/riscv64/iamin.c | 77 + kernel/riscv64/iamin_vector.c | 192 ++ kernel/riscv64/imax.c | 69 + kernel/riscv64/imax_vector.c | 176 ++ kernel/riscv64/imin.c | 67 + kernel/riscv64/imin_vector.c | 212 +++ kernel/riscv64/izamax.c | 81 + kernel/riscv64/izamax_vector.c | 246 +++ kernel/riscv64/izamin.c | 81 + kernel/riscv64/izamin_vector.c | 247 +++ kernel/riscv64/max.c | 65 + kernel/riscv64/max_vector.c | 116 ++ kernel/riscv64/min.c | 65 + kernel/riscv64/min_vector.c | 116 ++ kernel/riscv64/nrm2.c | 88 + kernel/riscv64/nrm2_vector.c | 220 +++ kernel/riscv64/nrm2_vector_dot.c | 128 ++ kernel/riscv64/omatcopy_cn.c | 90 + kernel/riscv64/omatcopy_ct.c | 89 + kernel/riscv64/omatcopy_rn.c | 90 + kernel/riscv64/omatcopy_rt.c | 62 + kernel/riscv64/rot.c | 62 + kernel/riscv64/rot_vector.c | 196 ++ kernel/riscv64/scal.c | 63 + kernel/riscv64/scal_vector.c | 133 ++ kernel/riscv64/sgemm_kernel_16x4_c910v.c | 1575 +++++++++++++++ kernel/riscv64/swap.c | 62 + kernel/riscv64/swap_vector.c | 173 ++ kernel/riscv64/symv_L.c | 70 + kernel/riscv64/symv_L_vector.c | 265 +++ kernel/riscv64/symv_U.c | 71 + kernel/riscv64/symv_U_vector.c | 264 +++ kernel/riscv64/zamax.c | 79 + kernel/riscv64/zamax_vector.c | 104 + kernel/riscv64/zamin.c | 79 + kernel/riscv64/zamin_vector.c | 104 + kernel/riscv64/zasum.c | 72 + kernel/riscv64/zasum_vector.c | 136 ++ kernel/riscv64/zaxpby.c | 118 ++ kernel/riscv64/zaxpby_vector.c | 197 ++ kernel/riscv64/zaxpy.c | 74 + kernel/riscv64/zaxpy_vector.c | 107 ++ kernel/riscv64/zcopy.c | 65 + kernel/riscv64/zcopy_vector.c | 92 + kernel/riscv64/zdot.c | 80 + kernel/riscv64/zdot_vector.c | 135 ++ kernel/riscv64/zgemv_n.c | 157 ++ kernel/riscv64/zgemv_n_vector.c | 175 ++ kernel/riscv64/zgemv_t.c | 140 ++ kernel/riscv64/zgemv_t_vector.c | 134 ++ kernel/riscv64/zhemv_LM_vector.c | 191 ++ kernel/riscv64/zhemv_UV_vector.c | 192 ++ kernel/riscv64/znrm2.c | 106 ++ kernel/riscv64/znrm2_vector.c | 278 +++ kernel/riscv64/zomatcopy_cn.c | 70 + kernel/riscv64/zomatcopy_cnc.c | 69 + kernel/riscv64/zomatcopy_ct.c | 71 + kernel/riscv64/zomatcopy_ctc.c | 71 + kernel/riscv64/zomatcopy_rn.c | 70 + kernel/riscv64/zomatcopy_rnc.c | 69 + kernel/riscv64/zomatcopy_rt.c | 72 + kernel/riscv64/zomatcopy_rtc.c | 72 + kernel/riscv64/zrot.c | 70 + kernel/riscv64/zrot_vector.c | 162 ++ kernel/riscv64/zscal.c | 88 + kernel/riscv64/zscal_vector.c | 152 ++ kernel/riscv64/zswap.c | 72 + kernel/riscv64/zswap_vector.c | 117 ++ lapack/laswp/riscv64/Makefile | 13 + param.h | 78 + test/Makefile | 6 + 109 files changed, 19571 insertions(+), 1 deletion(-) create mode 100644 Makefile.riscv64 create mode 100644 common_riscv64.h create mode 100644 cpuid_riscv64.c create mode 100644 kernel/generic/trmmkernel_16x4.c create mode 100644 kernel/generic/trmmkernel_8x4.c create mode 100644 kernel/generic/trmmkernel_8x8.c create mode 100644 kernel/riscv64/KERNEL create mode 100644 kernel/riscv64/KERNEL.C910V create mode 100644 kernel/riscv64/KERNEL.RISCV64_GENERIC create mode 100644 kernel/riscv64/amax.c create mode 100644 kernel/riscv64/amax_vector.c create mode 100644 kernel/riscv64/amin.c create mode 100644 kernel/riscv64/amin_vector.c create mode 100644 kernel/riscv64/asum.c create mode 100644 kernel/riscv64/asum_vector.c create mode 100644 kernel/riscv64/axpby.c create mode 100644 kernel/riscv64/axpby_vector.c create mode 100644 kernel/riscv64/axpy.c create mode 100644 kernel/riscv64/axpy_vector.c create mode 100644 kernel/riscv64/copy.c create mode 100644 kernel/riscv64/copy_vector.c create mode 100644 kernel/riscv64/dgemm_kernel_8x4_c910v.c create mode 100644 kernel/riscv64/dot.c create mode 100644 kernel/riscv64/dot_vector.c create mode 100644 kernel/riscv64/gemv_n.c create mode 100644 kernel/riscv64/gemv_n_vector.c create mode 100644 kernel/riscv64/gemv_t.c create mode 100644 kernel/riscv64/gemv_t_vector.c create mode 100644 kernel/riscv64/iamax.c create mode 100644 kernel/riscv64/iamax_vector.c create mode 100644 kernel/riscv64/iamin.c create mode 100644 kernel/riscv64/iamin_vector.c create mode 100644 kernel/riscv64/imax.c create mode 100644 kernel/riscv64/imax_vector.c create mode 100644 kernel/riscv64/imin.c create mode 100644 kernel/riscv64/imin_vector.c create mode 100644 kernel/riscv64/izamax.c create mode 100644 kernel/riscv64/izamax_vector.c create mode 100644 kernel/riscv64/izamin.c create mode 100644 kernel/riscv64/izamin_vector.c create mode 100644 kernel/riscv64/max.c create mode 100644 kernel/riscv64/max_vector.c create mode 100644 kernel/riscv64/min.c create mode 100644 kernel/riscv64/min_vector.c create mode 100644 kernel/riscv64/nrm2.c create mode 100644 kernel/riscv64/nrm2_vector.c create mode 100644 kernel/riscv64/nrm2_vector_dot.c create mode 100644 kernel/riscv64/omatcopy_cn.c create mode 100644 kernel/riscv64/omatcopy_ct.c create mode 100644 kernel/riscv64/omatcopy_rn.c create mode 100644 kernel/riscv64/omatcopy_rt.c create mode 100644 kernel/riscv64/rot.c create mode 100644 kernel/riscv64/rot_vector.c create mode 100644 kernel/riscv64/scal.c create mode 100644 kernel/riscv64/scal_vector.c create mode 100644 kernel/riscv64/sgemm_kernel_16x4_c910v.c create mode 100644 kernel/riscv64/swap.c create mode 100644 kernel/riscv64/swap_vector.c create mode 100644 kernel/riscv64/symv_L.c create mode 100644 kernel/riscv64/symv_L_vector.c create mode 100644 kernel/riscv64/symv_U.c create mode 100644 kernel/riscv64/symv_U_vector.c create mode 100644 kernel/riscv64/zamax.c create mode 100644 kernel/riscv64/zamax_vector.c create mode 100644 kernel/riscv64/zamin.c create mode 100644 kernel/riscv64/zamin_vector.c create mode 100644 kernel/riscv64/zasum.c create mode 100644 kernel/riscv64/zasum_vector.c create mode 100644 kernel/riscv64/zaxpby.c create mode 100644 kernel/riscv64/zaxpby_vector.c create mode 100644 kernel/riscv64/zaxpy.c create mode 100644 kernel/riscv64/zaxpy_vector.c create mode 100644 kernel/riscv64/zcopy.c create mode 100644 kernel/riscv64/zcopy_vector.c create mode 100644 kernel/riscv64/zdot.c create mode 100644 kernel/riscv64/zdot_vector.c create mode 100644 kernel/riscv64/zgemv_n.c create mode 100644 kernel/riscv64/zgemv_n_vector.c create mode 100644 kernel/riscv64/zgemv_t.c create mode 100644 kernel/riscv64/zgemv_t_vector.c create mode 100644 kernel/riscv64/zhemv_LM_vector.c create mode 100644 kernel/riscv64/zhemv_UV_vector.c create mode 100644 kernel/riscv64/znrm2.c create mode 100644 kernel/riscv64/znrm2_vector.c create mode 100644 kernel/riscv64/zomatcopy_cn.c create mode 100644 kernel/riscv64/zomatcopy_cnc.c create mode 100644 kernel/riscv64/zomatcopy_ct.c create mode 100644 kernel/riscv64/zomatcopy_ctc.c create mode 100644 kernel/riscv64/zomatcopy_rn.c create mode 100644 kernel/riscv64/zomatcopy_rnc.c create mode 100644 kernel/riscv64/zomatcopy_rt.c create mode 100644 kernel/riscv64/zomatcopy_rtc.c create mode 100644 kernel/riscv64/zrot.c create mode 100644 kernel/riscv64/zrot_vector.c create mode 100644 kernel/riscv64/zscal.c create mode 100644 kernel/riscv64/zscal_vector.c create mode 100644 kernel/riscv64/zswap.c create mode 100644 kernel/riscv64/zswap_vector.c create mode 100644 lapack/laswp/riscv64/Makefile diff --git a/Makefile.prebuild b/Makefile.prebuild index 48fb5e991..d6395da7b 100644 --- a/Makefile.prebuild +++ b/Makefile.prebuild @@ -41,6 +41,10 @@ ifeq ($(TARGET), I6500) TARGET_FLAGS = -mips64r6 endif +ifeq ($(TARGET), C910V) +TARGET_FLAGS = -march=rv64gcvxthead -mabi=lp64v +endif + all: getarch_2nd ./getarch_2nd 0 >> $(TARGET_MAKE) ./getarch_2nd 1 >> $(TARGET_CONF) diff --git a/Makefile.riscv64 b/Makefile.riscv64 new file mode 100644 index 000000000..15d7b059c --- /dev/null +++ b/Makefile.riscv64 @@ -0,0 +1,4 @@ +ifeq ($(CORE), C910V) +CCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v +FCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v -static +endif diff --git a/Makefile.system b/Makefile.system index 461f7370b..fe2aecd82 100644 --- a/Makefile.system +++ b/Makefile.system @@ -724,7 +724,10 @@ endif endif endif - +ifeq ($(ARCH), riscv64) +NO_BINARY_MODE = 1 +BINARY_DEFINED = 1 +endif # diff --git a/TargetList.txt b/TargetList.txt index 66eca4506..86177ebca 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -104,3 +104,6 @@ VORTEX ZARCH_GENERIC Z13 Z14 + +10.RISC-V 64: +RISCV64_GENERIC diff --git a/c_check b/c_check index 5ea93b75c..405963ae6 100644 --- a/c_check +++ b/c_check @@ -92,6 +92,7 @@ $architecture = ia64 if ($data =~ /ARCH_IA64/); $architecture = arm if ($data =~ /ARCH_ARM/); $architecture = arm64 if ($data =~ /ARCH_ARM64/); $architecture = zarch if ($data =~ /ARCH_ZARCH/); +$architecture = riscv64 if ($data =~ /ARCH_RISCV64/); $defined = 0; @@ -136,6 +137,11 @@ if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) { $binary =32; } +if ($architecture eq "riscv64") { + $defined = 1; + $binary = 64; +} + if ($compiler eq "PGI") { $compiler_name .= " -tp p7" if ($binary eq "32"); $compiler_name .= " -tp p7-64" if ($binary eq "64"); diff --git a/common.h b/common.h index a3ef99b59..faa75c447 100644 --- a/common.h +++ b/common.h @@ -437,6 +437,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246 #include "common_mips.h" #endif +#ifdef ARCH_RISCV64 +#include "common_riscv64.h" +#endif + #ifdef ARCH_MIPS64 #include "common_mips64.h" #endif diff --git a/common_riscv64.h b/common_riscv64.h new file mode 100644 index 000000000..49368c613 --- /dev/null +++ b/common_riscv64.h @@ -0,0 +1,98 @@ +/***************************************************************************** +Copyright (c) 2011-2014, The OpenBLAS Project +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************************/ + +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#ifndef COMMON_RISCV64 +#define COMMON_RISCV64 + +#define MB __sync_synchronize() +#define WMB __sync_synchronize() +#define RMB __sync_synchronize() + +#define INLINE inline + +#ifndef ASSEMBLER + + +static inline int blas_quickdivide(blasint x, blasint y){ + return x / y; +} + +#endif + + + +#define BUFFER_SIZE ( 32 << 20) +#define SEEK_ADDRESS + +#if defined(C910V) +#include +#endif + +#endif diff --git a/cpuid_riscv64.c b/cpuid_riscv64.c new file mode 100644 index 000000000..8a3209cb3 --- /dev/null +++ b/cpuid_riscv64.c @@ -0,0 +1,113 @@ +/***************************************************************************** +Copyright (c) 2011-2014, The OpenBLAS Project +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + + +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#define CPU_UNKNOWN 0 +#define CPU_C910V 1 + +static char *cpuname[] = { + "UNKOWN", + "C910V" +}; + +int detect(void){ + return CPU_UNKNOWN; +} + +char *get_corename(void){ + return cpuname[detect()]; +} + +void get_architecture(void){ + printf("RISCV64"); +} + +void get_subarchitecture(void){ +} + +void get_subdirname(void){ + printf("riscv64"); +} + +void get_cpuconfig(void){ + printf("#define UNKNOWN\n"); + printf("#define L1_DATA_SIZE 65536\n"); + printf("#define L1_DATA_LINESIZE 32\n"); + printf("#define L2_SIZE 512488\n"); + printf("#define L2_LINESIZE 32\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + printf("#define L2_ASSOCIATIVE 4\n"); +} + +void get_libname(void){ + printf("riscv64\n"); +} diff --git a/ctest.c b/ctest.c index cd84ab1bb..83a3b7d6c 100644 --- a/ctest.c +++ b/ctest.c @@ -153,6 +153,10 @@ ARCH_ARM ARCH_ARM64 #endif +#if defined(__riscv) +ARCH_RISCV64 +#endif + #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) HAVE_C11 #endif diff --git a/getarch.c b/getarch.c index e2c22d3a0..58465fb56 100644 --- a/getarch.c +++ b/getarch.c @@ -981,6 +981,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #endif +#ifdef FORCE_RISCV64_GENERIC +#define FORCE +#define ARCHITECTURE "RISCV64" +#define SUBARCHITECTURE "RISCV64_GENERIC" +#define SUBDIRNAME "riscv64" +#define ARCHCONFIG "-DRISCV64_GENERIC " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " +#define LIBNAME "riscv64_generic" +#define CORENAME "RISCV64_GENERIC" +#else +#endif + #ifdef FORCE_CORTEXA15 #define FORCE #define ARCHITECTURE "ARM" @@ -1252,6 +1266,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "Z14" #endif +#ifdef FORCE_C910V +#define FORCE +#define ARCHITECTURE "RISCV64" +#define SUBARCHITECTURE "C910V" +#define SUBDIRNAME "riscv64" +#define ARCHCONFIG "-DC910V " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " +#define LIBNAME "c910v" +#define CORENAME "C910V" +#else +#endif + + #ifndef FORCE #ifdef USER_TARGET @@ -1306,6 +1335,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define OPENBLAS_SUPPORTED #endif +#ifdef __riscv +#include "cpuid_riscv64.c" +#endif + #ifdef __arm__ #include "cpuid_arm.c" #define OPENBLAS_SUPPORTED diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 2ba593c2e..893713769 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -25,6 +25,10 @@ ifeq ($(ARCH), arm64) USE_TRMM = 1 endif +ifeq ($(ARCH), riscv64) +USE_TRMM = 1 +endif + ifeq ($(TARGET), LOONGSON3B) USE_TRMM = 1 endif diff --git a/kernel/generic/trmmkernel_16x4.c b/kernel/generic/trmmkernel_16x4.c new file mode 100644 index 000000000..7ea4e108c --- /dev/null +++ b/kernel/generic/trmmkernel_16x4.c @@ -0,0 +1,2092 @@ +#include "common.h" + +int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc ,BLASLONG offset) +{ + + BLASLONG i,j,k; + FLOAT *C0,*C1,*C2,*C3,*ptrba,*ptrbb; + + FLOAT res0_0; + FLOAT res0_1; + FLOAT res0_2; + FLOAT res0_3; + FLOAT res0_4; + FLOAT res0_5; + FLOAT res0_6; + FLOAT res0_7; + + FLOAT res0_8; + FLOAT res0_9; + FLOAT res0_10; + FLOAT res0_11; + FLOAT res0_12; + FLOAT res0_13; + FLOAT res0_14; + FLOAT res0_15; + + FLOAT res1_0; + FLOAT res1_1; + FLOAT res1_2; + FLOAT res1_3; + FLOAT res1_4; + FLOAT res1_5; + FLOAT res1_6; + FLOAT res1_7; + + FLOAT res1_8; + FLOAT res1_9; + FLOAT res1_10; + FLOAT res1_11; + FLOAT res1_12; + FLOAT res1_13; + FLOAT res1_14; + FLOAT res1_15; + + FLOAT res2_0; + FLOAT res2_1; + FLOAT res2_2; + FLOAT res2_3; + FLOAT res2_4; + FLOAT res2_5; + FLOAT res2_6; + FLOAT res2_7; + + FLOAT res2_8; + FLOAT res2_9; + FLOAT res2_10; + FLOAT res2_11; + FLOAT res2_12; + FLOAT res2_13; + FLOAT res2_14; + FLOAT res2_15; + + FLOAT res3_0; + FLOAT res3_1; + FLOAT res3_2; + FLOAT res3_3; + FLOAT res3_4; + FLOAT res3_5; + FLOAT res3_6; + FLOAT res3_7; + + FLOAT res3_8; + FLOAT res3_9; + FLOAT res3_10; + FLOAT res3_11; + FLOAT res3_12; + FLOAT res3_13; + FLOAT res3_14; + FLOAT res3_15; + + FLOAT a0; + FLOAT a1; + + FLOAT b0; + FLOAT b1; + FLOAT b2; + FLOAT b3; + + BLASLONG off, temp; + +#if !defined(LEFT) + off = -offset; +#else + off = 0; +#endif + + for (j=0; j + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + + if (n <= 0 || inc_x <= 0) return(maxf); + + maxf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) > maxf ) + { + maxf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(maxf); +} + + diff --git a/kernel/riscv64/amax_vector.c b/kernel/riscv64/amax_vector.c new file mode 100644 index 000000000..b6aec131e --- /dev/null +++ b/kernel/riscv64/amax_vector.c @@ -0,0 +1,245 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMAXVV_FLOAT vfmaxvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMAXVV_FLOAT vfmaxvv_float64xm8 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + if (n <= 0 || inc_x <= 0) return(maxf); + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_max; + + MASK_T mask0, mask1; + FLOAT zero = 0.0; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + if(gvl <= n/2){ + v_max = VFMVVF_FLOAT(0, gvl); + for(i=0,j=0; i maxf) + maxf = v0[0]; + j += gvl; + } + }else{ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + BLASLONG stride_x = inc_x * sizeof(FLOAT); + if(gvl <= n/2){ + BLASLONG inc_xv = inc_x * gvl; + v_max = VFMVVF_FLOAT(0, gvl); + for(i=0,j=0; i maxf) + maxf = v0[0]; + j += gvl; + } + } + return(maxf); +} + + diff --git a/kernel/riscv64/amin.c b/kernel/riscv64/amin.c new file mode 100644 index 000000000..78495a8e3 --- /dev/null +++ b/kernel/riscv64/amin.c @@ -0,0 +1,75 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + + if (n <= 0 || inc_x <= 0) return(minf); + + minf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) < minf ) + { + minf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(minf); +} + + diff --git a/kernel/riscv64/amin_vector.c b/kernel/riscv64/amin_vector.c new file mode 100644 index 000000000..53243ad56 --- /dev/null +++ b/kernel/riscv64/amin_vector.c @@ -0,0 +1,241 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include + +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMINVS_FLOAT vfredminvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMINVV_FLOAT vfminvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMINVS_FLOAT vfredminvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMINVV_FLOAT vfminvv_float64xm8 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + if (n <= 0 || inc_x <= 0) return(0.0); + FLOAT minf=FLT_MAX; + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_min; + + MASK_T mask0, mask1; + FLOAT zero = 0.0; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + if(gvl <= n/2){ + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + for(i=0,j=0; i + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT sumf = 0.0; + if (n <= 0 || inc_x <= 0) return(sumf); + + n *= inc_x; + while(i < n) + { + sumf += ABS(x[i]); + i += inc_x; + } + return(sumf); +} + + diff --git a/kernel/riscv64/asum_vector.c b/kernel/riscv64/asum_vector.c new file mode 100644 index 000000000..7ab7484e8 --- /dev/null +++ b/kernel/riscv64/asum_vector.c @@ -0,0 +1,131 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDSUMVS_FLOAT vfredsumvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFADDVV_FLOAT vfaddvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDSUMVS_FLOAT vfredsumvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFADDVV_FLOAT vfaddvv_float64xm8 +#endif +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + BLASLONG ix=0; + FLOAT asumf=0.0; + if (n <= 0 || inc_x <= 0) return(asumf); + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_zero,v_sum; + + MASK_T mask0, mask1; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_zero = VFMVVF_FLOAT(0, gvl); + if(gvl <= n/2){ + v_sum = VFMVVF_FLOAT(0, gvl); + for(i=0,j=0; i + +#define KERNEL8x4_I \ + "addi t1, %[PB], 1*8 \n\t"\ + "addi t2, %[PB], 2*8 \n\t"\ + "addi t3, %[PB], 3*8 \n\t"\ + "fld ft0, (%[PB]) \n\t"\ + "fld ft1, (t1) \n\t"\ + "fld ft2, (t2) \n\t"\ + "fld ft3, (t3) \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi t4, %[PA], 2*8 \n\t"\ + "addi t5, %[PA], 4*8 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "addi t6, %[PA], 6*8 \n\t"\ + "addi %[PA], %[PA], 8*8 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 8*8 \n\t"\ + "vfmv.v.f v9, ft1 \n\t"\ + "vle.v v2, (t5) \n\t"\ + "addi t5, t5, 8*8 \n\t"\ + "vle.v v3, (t6) \n\t"\ + "addi t6, t6, 8*8 \n\t"\ + "vfmv.v.f v10, ft2 \n\t"\ + "addi %[PB], %[PB], 4*8 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 8*8 \n\t"\ + "vfmv.v.f v11, ft3 \n\t"\ + "vfmacc.vv v16, v8, v0 \n\t"\ + "addi t1, t1, 4*8 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 8*8 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "addi t2, t2, 4*8 \n\t"\ + "vle.v v6, (t5) \n\t"\ + "addi t5, t5, 8*8 \n\t"\ + "vfmacc.vv v18, v8, v2 \n\t"\ + "addi t3, t3, 4*8 \n\t"\ + "vle.v v7, (t6) \n\t"\ + "addi t6, t6, 8*8 \n\t"\ + "vfmacc.vv v19, v8, v3 \n\t"\ + "fld ft4, (%[PB]) \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "fld ft5, (t1) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "fld ft6, (t2) \n\t"\ + "vfmacc.vv v22, v9, v2 \n\t"\ + "fld ft7, (t3) \n\t"\ + "vfmacc.vv v23, v9, v3 \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "vfmacc.vv v24, v10, v0 \n\t"\ + "vfmv.v.f v13, ft5 \n\t"\ + "vfmacc.vv v25, v10, v1 \n\t"\ + "vfmv.v.f v14, ft6 \n\t"\ + "vfmacc.vv v26, v10, v2 \n\t"\ + "vfmv.v.f v15, ft7 \n\t"\ + "vfmacc.vv v27, v10, v3 \n\t"\ + "addi %[PB], %[PB], 4*8 \n\t"\ + "vfmacc.vv v28, v11, v0 \n\t"\ + "addi t1, t1, 4*8 \n\t"\ + "vfmacc.vv v29, v11, v1 \n\t"\ + "addi t2, t2, 4*8 \n\t"\ + "vfmacc.vv v30, v11, v2 \n\t"\ + "addi t3, t3, 4*8 \n\t"\ + "vfmacc.vv v31, v11, v3 \n\t" + +#define KERNEL8x4_M1 \ + "vfmacc.vv v16, v8, v0 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 8*8 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 8*8 \n\t"\ + "vfmacc.vv v18, v8, v2 \n\t"\ + "vle.v v6, (t5) \n\t"\ + "addi t5, t5, 8*8 \n\t"\ + "vfmacc.vv v19, v8, v3 \n\t"\ + "vle.v v7, (t6) \n\t"\ + "addi t6, t6, 8*8 \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "fld ft4, (%[PB]) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "fld ft5, (t1) \n\t"\ + "vfmacc.vv v22, v9, v2 \n\t"\ + "fld ft6, (t2) \n\t"\ + "vfmacc.vv v23, v9, v3 \n\t"\ + "fld ft7, (t3) \n\t"\ + "addi %[PB], %[PB], 4*8 \n\t"\ + "vfmacc.vv v24, v10, v0 \n\t"\ + "addi t1, t1, 4*8 \n\t"\ + "vfmacc.vv v25, v10, v1 \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "vfmacc.vv v26, v10, v2 \n\t"\ + "addi t2, t2, 4*8 \n\t"\ + "vfmacc.vv v27, v10, v3 \n\t"\ + "vfmv.v.f v13, ft5 \n\t"\ + "vfmacc.vv v28, v11, v0 \n\t"\ + "addi t3, t3, 4*8 \n\t"\ + "vfmacc.vv v29, v11, v1 \n\t"\ + "vfmv.v.f v14, ft6 \n\t"\ + "vfmacc.vv v30, v11, v2 \n\t"\ + "vfmacc.vv v31, v11, v3 \n\t"\ + "vfmv.v.f v15, ft7 \n\t" + +#define KERNEL8x4_M2 \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 8*8 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 8*8 \n\t"\ + "vfmacc.vv v18, v12, v6 \n\t"\ + "vle.v v2, (t5) \n\t"\ + "addi t5, t5, 8*8 \n\t"\ + "vfmacc.vv v19, v12, v7 \n\t"\ + "vle.v v3, (t6) \n\t"\ + "addi t6, t6, 8*8 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "fld ft0, (%[PB]) \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "fld ft1, (t1) \n\t"\ + "vfmacc.vv v22, v13, v6 \n\t"\ + "fld ft2, (t2) \n\t"\ + "vfmacc.vv v23, v13, v7 \n\t"\ + "fld ft3, (t3) \n\t"\ + "addi %[PB], %[PB], 4*8 \n\t"\ + "vfmacc.vv v24, v14, v4 \n\t"\ + "addi t1, t1, 4*8 \n\t"\ + "vfmacc.vv v25, v14, v5 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "vfmacc.vv v26, v14, v6 \n\t"\ + "addi t2, t2, 4*8 \n\t"\ + "vfmacc.vv v27, v14, v7 \n\t"\ + "vfmv.v.f v9, ft1 \n\t"\ + "vfmacc.vv v28, v15, v4 \n\t"\ + "addi t3, t3, 4*8 \n\t"\ + "vfmacc.vv v29, v15, v5 \n\t"\ + "vfmv.v.f v10, ft2 \n\t"\ + "vfmacc.vv v30, v15, v6 \n\t"\ + "vfmacc.vv v31, v15, v7 \n\t"\ + "vfmv.v.f v11, ft3 \n\t" + +#define KERNEL8x4_E \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vfmacc.vv v18, v12, v6 \n\t"\ + "vfmacc.vv v19, v12, v7 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "vfmacc.vv v22, v13, v6 \n\t"\ + "vfmacc.vv v23, v13, v7 \n\t"\ + "vfmacc.vv v24, v14, v4 \n\t"\ + "vfmacc.vv v25, v14, v5 \n\t"\ + "vfmacc.vv v26, v14, v6 \n\t"\ + "vfmacc.vv v27, v14, v7 \n\t"\ + "vfmacc.vv v28, v15, v4 \n\t"\ + "vfmacc.vv v29, v15, v5 \n\t"\ + "vfmacc.vv v30, v15, v6 \n\t"\ + "vfmacc.vv v31, v15, v7 \n\t" + + + + +int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc +#ifdef TRMMKERNEL + ,BLASLONG offset +#endif + ) +{ + BLASLONG i,j,k; + FLOAT *C0,*C1,*C2,*C3; + FLOAT *ptrba,*ptrbb; + + FLOAT loadb0,loadb1,loadb2,loadb3; + FLOAT load0,load1,load2,load3,load4,load5,load6,load7; + + FLOAT res0,res1,res2,res3; + FLOAT res4,res5,res6,res7; + FLOAT res8,res9,res10,res11; + FLOAT res12,res13,res14,res15; + + for (j=0; j 0){ + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDSUM_FLOAT(vr, vx, gvl); + dot += vx[0]; + } + //tail + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLEV_FLOAT(&x[j], gvl); + vy = VLEV_FLOAT(&y[j], gvl); + FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl); + //vr = VFDOTVV_FLOAT(vx, vy, gvl); + vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); + vx = VFREDSUM_FLOAT(vr, vz, gvl); + dot += vx[0]; + } + }else if(inc_y == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + unsigned int stride_x = inc_x * sizeof(FLOAT); + for(i=0,j=0; i 0){ + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDSUM_FLOAT(vr, vx, gvl); + dot += vx[0]; + } + //tail + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); + vy = VLEV_FLOAT(&y[j], gvl); + FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl); + //vr = VFDOTVV_FLOAT(vx, vy, gvl); + vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); + vx = VFREDSUM_FLOAT(vr, vz, gvl); + dot += vx[0]; + } + }else if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + unsigned int stride_y = inc_y * sizeof(FLOAT); + for(i=0,j=0; i 0){ + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDSUM_FLOAT(vr, vx, gvl); + dot += vx[0]; + } + //tail + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLEV_FLOAT(&x[j], gvl); + vy = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl); + FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl); + //vr = VFDOTVV_FLOAT(vx, vy, gvl); + vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); + vx = VFREDSUM_FLOAT(vr, vz, gvl); + dot += vx[0]; + } + }else{ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + unsigned int stride_x = inc_x * sizeof(FLOAT); + unsigned int stride_y = inc_y * sizeof(FLOAT); + for(i=0,j=0; i 0){ + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDSUM_FLOAT(vr, vx, gvl); + dot += vx[0]; + } + //tail + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); + vy = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl); + FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl); + //vr = VFDOTVV_FLOAT(vx, vy, gvl); + vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); + vx = VFREDSUM_FLOAT(vr, vz, gvl); + dot += vx[0]; + } + } + return(dot); +} + + diff --git a/kernel/riscv64/gemv_n.c b/kernel/riscv64/gemv_n.c new file mode 100644 index 000000000..ef61b245b --- /dev/null +++ b/kernel/riscv64/gemv_n.c @@ -0,0 +1,67 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** + * * 2013/09/14 Saar + * * BLASTEST float : OK + * * BLASTEST double : OK + * CTEST : OK + * TEST : OK + * * + * **************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +{ + BLASLONG i; + BLASLONG ix,iy; + BLASLONG j; + FLOAT *a_ptr; + FLOAT temp; + + ix = 0; + a_ptr = a; + + for (j=0; j + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + BLASLONG max=0; + + if (n <= 0 || inc_x <= 0) return(max); + + maxf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) > maxf ) + { + max = i; + maxf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(max+1); +} + + diff --git a/kernel/riscv64/iamax_vector.c b/kernel/riscv64/iamax_vector.c new file mode 100644 index 000000000..3aa64afc9 --- /dev/null +++ b/kernel/riscv64/iamax_vector.c @@ -0,0 +1,191 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMAXVV_FLOAT vfmaxvv_float64xm8 +#define VMFGEVF_FLOAT vmfgevf_e64xm8_float64xm8 +#define VMFIRSTM vmfirstm_e64xm8 +#define UINT_V_T uint64xm8_t +#define VIDV_MASK_UINT vidv_mask_uint64xm8 +#define VIDV_UINT vidv_uint64xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 +#define VADDVX_UINT vaddvx_uint64xm8 +#define VMVVX_UINT vmvvx_uint64xm8 +#else + +#define ABS fabsf +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMAXVV_FLOAT vfmaxvv_float32xm8 +#define VMFGEVF_FLOAT vmfgevf_e32xm8_float32xm8 +#define VMFIRSTM vmfirstm_e32xm8 +#define UINT_V_T uint32xm8_t +#define VIDV_MASK_UINT vidv_mask_uint32xm8 +#define VIDV_UINT vidv_uint32xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 +#define VADDVX_UINT vaddvx_uint32xm8 +#define VMVVX_UINT vmvvx_uint32xm8 +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + FLOAT maxf=0.0; + unsigned int max_index = 0; + if (n <= 0 || inc_x <= 0) return(max_index); + + FLOAT_V_T vx, v_max; + UINT_V_T v_max_index; + MASK_T mask; + unsigned int gvl = 0; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_max_index = VMVVX_UINT(0, gvl); + v_max = VFMVVF_FLOAT(-1, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLEV_FLOAT(&x[j], gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + //index where element greater than v_max + mask = VMFLTVV_FLOAT(v_max, vx, gvl); + v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl); + v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl); + + //update v_max and start_index j + v_max = VFMAXVV_FLOAT(v_max, vx, gvl); + j += gvl; + } + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + maxf = vx[0]; + mask = VMFGEVF_FLOAT(v_max, maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLEV_FLOAT(&x[j], gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + v_max = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + FLOAT cur_maxf = vx[0]; + if(cur_maxf > maxf){ + //tail index + v_max_index = VIDV_UINT(gvl); + v_max_index = VADDVX_UINT(v_max_index, j, gvl); + + mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + } + } + }else{ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + unsigned int stride_x = inc_x * sizeof(FLOAT); + unsigned int idx = 0, inc_v = gvl * inc_x; + + v_max_index = VMVVX_UINT(0, gvl); + v_max = VFMVVF_FLOAT(-1, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + //index where element greater than v_max + mask = VMFLTVV_FLOAT(v_max, vx, gvl); + v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl); + v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl); + + //update v_max and start_index j + v_max = VFMAXVV_FLOAT(v_max, vx, gvl); + j += gvl; + idx += inc_v; + } + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + maxf = vx[0]; + mask = VMFGEVF_FLOAT(v_max, maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + v_max = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + vx = VFMVVF_FLOAT(0, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + FLOAT cur_maxf = vx[0]; + if(cur_maxf > maxf){ + //tail index + v_max_index = VIDV_UINT(gvl); + v_max_index = VADDVX_UINT(v_max_index, j, gvl); + + mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + } + } + } + return(max_index+1); +} + + diff --git a/kernel/riscv64/iamin.c b/kernel/riscv64/iamin.c new file mode 100644 index 000000000..155292bd5 --- /dev/null +++ b/kernel/riscv64/iamin.c @@ -0,0 +1,77 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + BLASLONG min=0; + + if (n <= 0 || inc_x <= 0) return(min); + + minf=ABS(x[0]); + ix += inc_x; + i++; + + while(i < n) + { + if( ABS(x[ix]) < ABS(minf) ) + { + min = i; + minf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(min+1); +} + + diff --git a/kernel/riscv64/iamin_vector.c b/kernel/riscv64/iamin_vector.c new file mode 100644 index 000000000..608f19a00 --- /dev/null +++ b/kernel/riscv64/iamin_vector.c @@ -0,0 +1,192 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include + +#if defined(DOUBLE) + +#define ABS fabs +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMINVS_FLOAT vfredminvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMINVV_FLOAT vfminvv_float64xm8 +#define VMFLEVF_FLOAT vmflevf_e64xm8_float64xm8 +#define VMFIRSTM vmfirstm_e64xm8 +#define UINT_V_T uint64xm8_t +#define VIDV_MASK_UINT vidv_mask_uint64xm8 +#define VIDV_UINT vidv_uint64xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 +#define VADDVX_UINT vaddvx_uint64xm8 +#define VMVVX_UINT vmvvx_uint64xm8 +#else + +#define ABS fabsf +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMINVS_FLOAT vfredminvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMINVV_FLOAT vfminvv_float32xm8 +#define VMFLEVF_FLOAT vmflevf_e32xm8_float32xm8 +#define VMFIRSTM vmfirstm_e32xm8 +#define UINT_V_T uint32xm8_t +#define VIDV_MASK_UINT vidv_mask_uint32xm8 +#define VIDV_UINT vidv_uint32xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 +#define VADDVX_UINT vaddvx_uint32xm8 +#define VMVVX_UINT vmvvx_uint32xm8 +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + FLOAT minf=FLT_MAX; + unsigned int min_index = 0; + if (n <= 0 || inc_x <= 0) return(min_index); + + FLOAT_V_T vx, v_min; + UINT_V_T v_min_index; + MASK_T mask; + unsigned int gvl = 0; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + v_min_index = VMVVX_UINT(0, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLEV_FLOAT(&x[j], gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + //index where element less than v_min + mask = VMFLTVV_FLOAT(vx, v_min, gvl); + v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl); + v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl); + + //update v_min and start_index j + v_min = VFMINVV_FLOAT(v_min, vx, gvl); + j += gvl; + } + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + minf = vx[0]; + mask = VMFLEVF_FLOAT(v_min, minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLEV_FLOAT(&x[j], gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + v_min = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + FLOAT cur_minf = vx[0]; + if(cur_minf < minf){ + //tail index + v_min_index = VIDV_UINT(gvl); + v_min_index = VADDVX_UINT(v_min_index, j, gvl); + + mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + } + } + }else{ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + unsigned int stride_x = inc_x * sizeof(FLOAT); + unsigned int idx = 0, inc_v = gvl * inc_x; + + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + v_min_index = VMVVX_UINT(0, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + //index where element less than v_min + mask = VMFLTVV_FLOAT(vx, v_min, gvl); + v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl); + v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl); + + //update v_min and start_index j + v_min = VFMINVV_FLOAT(v_min, vx, gvl); + j += gvl; + idx += inc_v; + } + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + minf = vx[0]; + mask = VMFLEVF_FLOAT(v_min, minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); + //fabs(vector) + mask = VMFLTVF_FLOAT(vx, 0, gvl); + v_min = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); + + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + FLOAT cur_minf = vx[0]; + if(cur_minf < minf){ + //tail index + v_min_index = VIDV_UINT(gvl); + v_min_index = VADDVX_UINT(v_min_index, j, gvl); + + mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + } + } + } + return(min_index+1); +} + + diff --git a/kernel/riscv64/imax.c b/kernel/riscv64/imax.c new file mode 100644 index 000000000..5072dd16e --- /dev/null +++ b/kernel/riscv64/imax.c @@ -0,0 +1,69 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + BLASLONG max=0; + + if (n <= 0 || inc_x <= 0) return(max); + + maxf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] > maxf ) + { + max = i; + maxf = x[ix]; + } + ix += inc_x; + i++; + } + return(max+1); +} + + diff --git a/kernel/riscv64/imax_vector.c b/kernel/riscv64/imax_vector.c new file mode 100644 index 000000000..44af7101b --- /dev/null +++ b/kernel/riscv64/imax_vector.c @@ -0,0 +1,176 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include + +#if defined(DOUBLE) + +#define ABS fabs +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFMAXVV_FLOAT vfmaxvv_float64xm8 +#define VMFGEVF_FLOAT vmfgevf_e64xm8_float64xm8 +#define VMFIRSTM vmfirstm_e64xm8 +#define UINT_V_T uint64xm8_t +#define VIDV_MASK_UINT vidv_mask_uint64xm8 +#define VIDV_UINT vidv_uint64xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 +#define VADDVX_UINT vaddvx_uint64xm8 +#define VMVVX_UINT vmvvx_uint64xm8 +#else + +#define ABS fabsf +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFMAXVV_FLOAT vfmaxvv_float32xm8 +#define VMFGEVF_FLOAT vmfgevf_e32xm8_float32xm8 +#define VMFIRSTM vmfirstm_e32xm8 +#define UINT_V_T uint32xm8_t +#define VIDV_MASK_UINT vidv_mask_uint32xm8 +#define VIDV_UINT vidv_uint32xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 +#define VADDVX_UINT vaddvx_uint32xm8 +#define VMVVX_UINT vmvvx_uint32xm8 +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + unsigned int max_index = 0; + if (n <= 0 || inc_x <= 0) return(max_index); + FLOAT maxf=-FLT_MAX; + + FLOAT_V_T vx, v_max; + UINT_V_T v_max_index; + MASK_T mask; + unsigned int gvl = 0; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_max_index = VMVVX_UINT(0, gvl); + v_max = VFMVVF_FLOAT(-FLT_MAX, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLEV_FLOAT(&x[j], gvl); + + //index where element greater than v_max + mask = VMFLTVV_FLOAT(v_max, vx, gvl); + v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl); + v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl); + + //update v_max and start_index j + v_max = VFMAXVV_FLOAT(v_max, vx, gvl); + j += gvl; + } + vx = VFMVVF_FLOAT(-FLT_MAX, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + maxf = vx[0]; + mask = VMFGEVF_FLOAT(v_max, maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + v_max = VLEV_FLOAT(&x[j], gvl); + + vx = VFMVVF_FLOAT(-FLT_MAX, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + FLOAT cur_maxf = vx[0]; + if(cur_maxf > maxf){ + //tail index + v_max_index = VIDV_UINT(gvl); + v_max_index = VADDVX_UINT(v_max_index, j, gvl); + + mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + } + } + }else{ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + unsigned int stride_x = inc_x * sizeof(FLOAT); + unsigned int idx = 0, inc_v = gvl * inc_x; + + v_max = VFMVVF_FLOAT(-FLT_MAX, gvl); + v_max_index = VMVVX_UINT(0, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); + + //index where element greater than v_max + mask = VMFLTVV_FLOAT(v_max, vx, gvl); + v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl); + v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl); + + //update v_max and start_index j + v_max = VFMAXVV_FLOAT(v_max, vx, gvl); + j += gvl; + idx += inc_v; + } + vx = VFMVVF_FLOAT(-FLT_MAX, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + maxf = vx[0]; + mask = VMFGEVF_FLOAT(v_max, maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + v_max = VLSEV_FLOAT(&x[idx], stride_x, gvl); + + vx = VFMVVF_FLOAT(-FLT_MAX, gvl); + vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); + FLOAT cur_maxf = vx[0]; + if(cur_maxf > maxf){ + //tail index + v_max_index = VIDV_UINT(gvl); + v_max_index = VADDVX_UINT(v_max_index, j, gvl); + + mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); + max_index = VMFIRSTM(mask,gvl); + max_index = v_max_index[max_index]; + } + } + } + return(max_index+1); +} + + diff --git a/kernel/riscv64/imin.c b/kernel/riscv64/imin.c new file mode 100644 index 000000000..ffc65226e --- /dev/null +++ b/kernel/riscv64/imin.c @@ -0,0 +1,67 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** +* 2013/08/19 Saar +* BLASTEST float +* BLASTEST double +* +**************************************************************************************/ + +#include "common.h" +#include + + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + BLASLONG min=0; + + if (n <= 0 || inc_x <= 0) return(min); + + minf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] < minf ) + { + min = i; + minf = x[ix]; + } + ix += inc_x; + i++; + } + return(min+1); +} + + diff --git a/kernel/riscv64/imin_vector.c b/kernel/riscv64/imin_vector.c new file mode 100644 index 000000000..e6e0e9f9f --- /dev/null +++ b/kernel/riscv64/imin_vector.c @@ -0,0 +1,212 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include + +#if defined(DOUBLE) + +#define ABS fabs +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMINVS_FLOAT vfredminvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFMINVV_FLOAT vfminvv_float64xm8 +#define VMFLEVF_FLOAT vmflevf_e64xm8_float64xm8 +#define VMFIRSTM vmfirstm_e64xm8 +#define UINT_V_T uint64xm8_t +#define VIDV_MASK_UINT vidv_mask_uint64xm8 +#define VIDV_UINT vidv_uint64xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 +#define VADDVX_UINT vaddvx_uint64xm8 +#define VMVVX_UINT vmvvx_uint64xm8 +#else + +#define ABS fabsf +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMINVS_FLOAT vfredminvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFMINVV_FLOAT vfminvv_float32xm8 +#define VMFLEVF_FLOAT vmflevf_e32xm8_float32xm8 +#define VMFIRSTM vmfirstm_e32xm8 +#define UINT_V_T uint32xm8_t +#define VIDV_MASK_UINT vidv_mask_uint32xm8 +#define VIDV_UINT vidv_uint32xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 +#define VADDVX_UINT vaddvx_uint32xm8 +#define VMVVX_UINT vmvvx_uint32xm8 +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + FLOAT minf=FLT_MAX; + unsigned int min_index = 0; + if (n <= 0 || inc_x <= 0) return(min_index); + + FLOAT_V_T vx, v_min; + UINT_V_T v_min_index; + MASK_T mask; + unsigned int gvl = 0; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + v_min_index = VMVVX_UINT(0, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLEV_FLOAT(&x[j], gvl); + //index where element less than v_min + mask = VMFLTVV_FLOAT(vx, v_min, gvl); + v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e64,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_min_index) + :"v"(mask), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e32,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_min_index) + :"v"(mask), "r"(gvl) + :"v0"); +#endif +*/ + v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl); + + //update v_min and start_index j + v_min = VFMINVV_FLOAT(v_min, vx, gvl); + j += gvl; + } + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + minf = vx[0]; + mask = VMFLEVF_FLOAT(v_min, minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + v_min = VLEV_FLOAT(&x[j], gvl); + + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + FLOAT cur_minf = vx[0]; + if(cur_minf < minf){ + //tail index + v_min_index = VIDV_UINT(gvl); + v_min_index = VADDVX_UINT(v_min_index, j, gvl); + mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + } + } + }else{ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + unsigned int stride_x = inc_x * sizeof(FLOAT); + unsigned int idx = 0, inc_v = gvl * inc_x; + + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + v_min_index = VMVVX_UINT(0, gvl); + for(i=0,j=0; i < n/gvl; i++){ + vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); + + //index where element less than v_min + mask = VMFLTVV_FLOAT(vx, v_min, gvl); + v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e64,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_min_index) + :"v"(mask), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e32,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_min_index) + :"v"(mask), "r"(gvl) + :"v0"); +#endif +*/ + + v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl); + + //update v_min and start_index j + v_min = VFMINVV_FLOAT(v_min, vx, gvl); + j += gvl; + idx += inc_v; + } + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + minf = vx[0]; + mask = VMFLEVF_FLOAT(v_min, minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + v_min = VLSEV_FLOAT(&x[idx], stride_x, gvl); + + vx = VFMVVF_FLOAT(FLT_MAX, gvl); + vx = VFREDMINVS_FLOAT(v_min, vx, gvl); + FLOAT cur_minf = vx[0]; + if(cur_minf < minf){ + //tail index + v_min_index = VIDV_UINT(gvl); + v_min_index = VADDVX_UINT(v_min_index, j, gvl); + mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); + min_index = VMFIRSTM(mask,gvl); + min_index = v_min_index[min_index]; + } + } + } + return(min_index+1); +} + + diff --git a/kernel/riscv64/izamax.c b/kernel/riscv64/izamax.c new file mode 100644 index 000000000..8fe33e95b --- /dev/null +++ b/kernel/riscv64/izamax.c @@ -0,0 +1,81 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf; + BLASLONG max=0; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(max); + + inc_x2 = 2 * inc_x; + + maxf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) > maxf ) + { + max = i; + maxf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(max+1); +} + + diff --git a/kernel/riscv64/izamax_vector.c b/kernel/riscv64/izamax_vector.c new file mode 100644 index 000000000..62c95d973 --- /dev/null +++ b/kernel/riscv64/izamax_vector.c @@ -0,0 +1,246 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define RVV_EFLOAT RVV_E64 +#define FLOAT_V_T float64xm8_t +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMAXVV_FLOAT vfmaxvv_float64xm8 +#define VMFGEVF_FLOAT vmfgevf_e64xm8_float64xm8 +#define VMFIRSTM vmfirstm_e64xm8 +#define UINT_V_T uint64xm8_t +#define VIDV_MASK_UINT vidv_mask_uint64xm8 +#define VIDV_UINT vidv_uint64xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 +#define VADDVX_UINT vaddvx_uint64xm8 +#define VFADDVV_FLOAT vfaddvv_float64xm8 +#define VMVVX_UINT vmvvx_uint64xm8 +#else + +#define ABS fabsf +#define RVV_EFLOAT RVV_E32 +#define FLOAT_V_T float32xm8_t +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMAXVV_FLOAT vfmaxvv_float32xm8 +#define VMFGEVF_FLOAT vmfgevf_e32xm8_float32xm8 +#define VMFIRSTM vmfirstm_e32xm8 +#define UINT_V_T uint32xm8_t +#define VIDV_MASK_UINT vidv_mask_uint32xm8 +#define VIDV_UINT vidv_uint32xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 +#define VADDVX_UINT vaddvx_uint32xm8 +#define VFADDVV_FLOAT vfaddvv_float32xm8 +#define VMVVX_UINT vmvvx_uint32xm8 +#endif + +#define RVV_M RVV_M8 + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + FLOAT maxf=0.0; + unsigned int max_index = 0; + if (n <= 0 || inc_x <= 0) return(max_index); + + FLOAT_V_T vx0, vx1, v_max; + UINT_V_T v_max_index; + MASK_T mask0, mask1; + unsigned int gvl = 0; + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_max_index = VMVVX_UINT(0, gvl); + v_max = VFMVVF_FLOAT(-1, gvl); + BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT); + BLASLONG inc_xv = gvl * inc_x * 2; + BLASLONG ix = 0; + for(i=0,j=0; i < n/gvl; i++){ + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + //fabs(vector) + mask0 = VMFLTVF_FLOAT(vx0, 0, gvl); + vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); + //fabs(vector) + mask1 = VMFLTVF_FLOAT(vx1, 0, gvl); + vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + vx0 = VFADDVV_FLOAT(vx0, vx1, gvl); + + //index where element greater than v_max + mask0 = VMFLTVV_FLOAT(v_max, vx0, gvl); + v_max_index = VIDV_MASK_UINT(v_max_index, mask0, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e64,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_max_index) + :"v"(mask0), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e32,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_max_index) + :"v"(mask0), "r"(gvl) + :"v0"); +#endif +*/ + v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask0, gvl); + + //update v_max and start_index j + v_max = VFMAXVV_FLOAT(v_max, vx0, gvl); + j += gvl; + ix += inc_xv; + } + vx0 = VFMVVF_FLOAT(0, gvl); + vx0 = VFREDMAXVS_FLOAT(v_max, vx0, gvl); + maxf = vx0[0]; + mask0 = VMFGEVF_FLOAT(v_max, maxf, gvl); + max_index = VMFIRSTM(mask0,gvl); + max_index = v_max_index[max_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + v_max_index = VMVVX_UINT(0, gvl); + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + //fabs(vector) + mask0 = VMFLTVF_FLOAT(vx0, 0, gvl); + vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); + //fabs(vector) + mask1 = VMFLTVF_FLOAT(vx1, 0, gvl); + vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + v_max = VFADDVV_FLOAT(vx0, vx1, gvl); + vx0 = VFMVVF_FLOAT(0, gvl); + vx0 = VFREDMAXVS_FLOAT(v_max, vx0, gvl); + FLOAT cur_maxf = vx0[0]; + if(cur_maxf > maxf){ + //tail index + v_max_index = VIDV_UINT(gvl); + v_max_index = VADDVX_UINT(v_max_index, j, gvl); + + mask0 = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); + max_index = VMFIRSTM(mask0,gvl); + max_index = v_max_index[max_index]; + } + } + return(max_index+1); +} + + diff --git a/kernel/riscv64/izamin.c b/kernel/riscv64/izamin.c new file mode 100644 index 000000000..fb5a0d4cb --- /dev/null +++ b/kernel/riscv64/izamin.c @@ -0,0 +1,81 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf; + BLASLONG min=0; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(min); + + inc_x2 = 2 * inc_x; + + minf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) < minf ) + { + min = i; + minf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(min+1); +} + + diff --git a/kernel/riscv64/izamin_vector.c b/kernel/riscv64/izamin_vector.c new file mode 100644 index 000000000..38eccf1b5 --- /dev/null +++ b/kernel/riscv64/izamin_vector.c @@ -0,0 +1,247 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include + +#if defined(DOUBLE) + +#define RVV_EFLOAT RVV_E64 +#define FLOAT_V_T float64xm8_t +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMINVS_FLOAT vfredminvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMINVV_FLOAT vfminvv_float64xm8 +#define VMFLEVF_FLOAT vmflevf_e64xm8_float64xm8 +#define VMFIRSTM vmfirstm_e64xm8 +#define UINT_V_T uint64xm8_t +#define VIDV_MASK_UINT vidv_mask_uint64xm8 +#define VIDV_UINT vidv_uint64xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 +#define VADDVX_UINT vaddvx_uint64xm8 +#define VFADDVV_FLOAT vfaddvv_float64xm8 +#define VMVVX_UINT vmvvx_uint64xm8 +#else + +#define ABS fabsf +#define RVV_EFLOAT RVV_E32 +#define FLOAT_V_T float32xm8_t +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMINVS_FLOAT vfredminvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMINVV_FLOAT vfminvv_float32xm8 +#define VMFLEVF_FLOAT vmflevf_e32xm8_float32xm8 +#define VMFIRSTM vmfirstm_e32xm8 +#define UINT_V_T uint32xm8_t +#define VIDV_MASK_UINT vidv_mask_uint32xm8 +#define VIDV_UINT vidv_uint32xm8 +#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 +#define VADDVX_UINT vaddvx_uint32xm8 +#define VFADDVV_FLOAT vfaddvv_float32xm8 +#define VMVVX_UINT vmvvx_uint32xm8 +#endif + +#define RVV_M RVV_M8 + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + FLOAT minf=FLT_MAX; + unsigned int min_index = 0; + if (n <= 0 || inc_x <= 0) return(min_index); + + FLOAT_V_T vx0, vx1, v_min; + UINT_V_T v_min_index; + MASK_T mask0, mask1; + unsigned int gvl = 0; + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_min_index = VMVVX_UINT(0, gvl); + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT); + BLASLONG inc_xv = gvl * inc_x * 2; + BLASLONG ix = 0; + for(i=0,j=0; i < n/gvl; i++){ + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + //fabs(vector) + mask0 = VMFLTVF_FLOAT(vx0, 0, gvl); + vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); + //fabs(vector) + mask1 = VMFLTVF_FLOAT(vx1, 0, gvl); + vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + vx0 = VFADDVV_FLOAT(vx0, vx1, gvl); + + //index where element less than v_min + mask0 = VMFLTVV_FLOAT(vx0, v_min, gvl); + v_min_index = VIDV_MASK_UINT(v_min_index, mask0, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e64,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_min_index) + :"v"(mask0), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1 \n\t" + "vsetvli x0, %2, e32,m8 \n\t" + "vid.v %0, v0.t \n\t" + :"+v"(v_min_index) + :"v"(mask0), "r"(gvl) + :"v0"); +#endif +*/ + v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask0, gvl); + + //update v_min and start_index j + v_min = VFMINVV_FLOAT(v_min, vx0, gvl); + j += gvl; + ix += inc_xv; + } + vx0 = VFMVVF_FLOAT(FLT_MAX, gvl); + vx0 = VFREDMINVS_FLOAT(v_min, vx0, gvl); + minf = vx0[0]; + mask0 = VMFLEVF_FLOAT(v_min, minf, gvl); + min_index = VMFIRSTM(mask0,gvl); + min_index = v_min_index[min_index]; + + if(j < n){ + gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); + v_min_index = VMVVX_UINT(0, gvl); + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + //fabs(vector) + mask0 = VMFLTVF_FLOAT(vx0, 0, gvl); + vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx0) + :"v"(mask0), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); + //fabs(vector) + mask1 = VMFLTVF_FLOAT(vx1, 0, gvl); + vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl); +/* +#if defined(DOUBLE) +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e64,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#else +asm volatile( + "vor.vv v0, %1, %1\n\t" + "vsetvli x0, %3, e32,m8 \n\t" + "vfrsub.vf %0, %0, %2, v0.t \n\t" + :"+v"(vx1) + :"v"(mask1), "f"(zero), "r"(gvl) + :"v0"); +#endif +*/ + v_min = VFADDVV_FLOAT(vx0, vx1, gvl); + vx0 = VFMVVF_FLOAT(FLT_MAX, gvl); + vx0 = VFREDMINVS_FLOAT(v_min, vx0, gvl); + FLOAT cur_minf = vx0[0]; + if(cur_minf < minf){ + //tail index + v_min_index = VIDV_UINT(gvl); + v_min_index = VADDVX_UINT(v_min_index, j, gvl); + + mask0 = VMFLEVF_FLOAT(v_min, cur_minf, gvl); + min_index = VMFIRSTM(mask0,gvl); + min_index = v_min_index[min_index]; + } + } + return(min_index+1); +} + + diff --git a/kernel/riscv64/max.c b/kernel/riscv64/max.c new file mode 100644 index 000000000..2ad956bc0 --- /dev/null +++ b/kernel/riscv64/max.c @@ -0,0 +1,65 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + + if (n <= 0 || inc_x <= 0) return(maxf); + + maxf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] > maxf ) + { + maxf = x[ix]; + } + ix += inc_x; + i++; + } + return(maxf); +} + + diff --git a/kernel/riscv64/max_vector.c b/kernel/riscv64/max_vector.c new file mode 100644 index 000000000..4ef75452d --- /dev/null +++ b/kernel/riscv64/max_vector.c @@ -0,0 +1,116 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFMAXVV_FLOAT vfmaxvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFMAXVV_FLOAT vfmaxvv_float64xm8 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + if (n <= 0 || inc_x <= 0) return(0.0); + FLOAT maxf=-FLT_MAX; + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_max; + + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + if(gvl <= n/2){ + v_max = VFMVVF_FLOAT(-FLT_MAX, gvl); + for(i=0,j=0; i maxf) + maxf = v0[0]; + j += gvl; + } + }else{ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + BLASLONG stride_x = inc_x * sizeof(FLOAT); + if(gvl <= n/2){ + v_max = VFMVVF_FLOAT(-FLT_MAX, gvl); + BLASLONG idx = 0, inc_xv = inc_x * gvl; + for(i=0,j=0; i maxf) + maxf = v0[0]; + j += gvl; + } + } + return(maxf); +} + + diff --git a/kernel/riscv64/min.c b/kernel/riscv64/min.c new file mode 100644 index 000000000..2812fe397 --- /dev/null +++ b/kernel/riscv64/min.c @@ -0,0 +1,65 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + + if (n <= 0 || inc_x <= 0) return(minf); + + minf=x[0]; + ix += inc_x; + i++; + + while(i < n) + { + if( x[ix] < minf ) + { + minf = x[ix]; + } + ix += inc_x; + i++; + } + return(minf); +} + + diff --git a/kernel/riscv64/min_vector.c b/kernel/riscv64/min_vector.c new file mode 100644 index 000000000..83c965bfa --- /dev/null +++ b/kernel/riscv64/min_vector.c @@ -0,0 +1,116 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMINVS_FLOAT vfredminvs_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFMINVV_FLOAT vfminvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMINVS_FLOAT vfredminvs_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFMINVV_FLOAT vfminvv_float64xm8 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + if (n <= 0 || inc_x <= 0) return(0.0); + FLOAT minf=FLT_MAX; + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_min; + + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + if(gvl <= n/2){ + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + for(i=0,j=0; i + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT scale = 0.0; + FLOAT ssq = 1.0; + FLOAT absxi = 0.0; + + + if (n <= 0 || inc_x <= 0) return(0.0); + if ( n == 1 ) return( ABS(x[0]) ); + + n *= inc_x; + while(i < n) + { + + if ( x[i] != 0.0 ) + { + absxi = ABS( x[i] ); + if ( scale < absxi ) + { + ssq = 1 + ssq * ( scale / absxi ) * ( scale / absxi ); + scale = absxi ; + } + else + { + ssq += ( absxi/scale ) * ( absxi/scale ); + } + + } + i += inc_x; + } + scale = scale * sqrt( ssq ); + return(scale); + +} + + diff --git a/kernel/riscv64/nrm2_vector.c b/kernel/riscv64/nrm2_vector.c new file mode 100644 index 000000000..785c0d2f8 --- /dev/null +++ b/kernel/riscv64/nrm2_vector.c @@ -0,0 +1,220 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M4 +#define FLOAT_V_T float32xm4_t +#define VLEV_FLOAT vlev_float32xm4 +#define VLSEV_FLOAT vlsev_float32xm4 +#define VFREDSUM_FLOAT vfredsumvs_float32xm4 +#define VFMACCVV_FLOAT vfmaccvv_float32xm4 +#define VFMVVF_FLOAT vfmvvf_float32xm4 +#define VFDOTVV_FLOAT vfdotvv_float32xm4 +#define ABS fabsf +#define MASK_T e32xm4_t +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm4 +#define VMFGTVF_FLOAT vmfgtvf_e32xm4_float32xm4 +#define VMFIRSTM vmfirstm_e32xm4 +#define VFDIVVF_FLOAT vfdivvf_float32xm4 +#define VMFLTVF_FLOAT vmfltvf_e32xm4_float32xm4 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm4 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M4 +#define FLOAT_V_T float64xm4_t +#define VLEV_FLOAT vlev_float64xm4 +#define VLSEV_FLOAT vlsev_float64xm4 +#define VFREDSUM_FLOAT vfredsumvs_float64xm4 +#define VFMACCVV_FLOAT vfmaccvv_float64xm4 +#define VFMVVF_FLOAT vfmvvf_float64xm4 +#define VFDOTVV_FLOAT vfdotvv_float64xm4 +#define ABS fabs +#define MASK_T e64xm4_t +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm4 +#define VMFGTVF_FLOAT vmfgtvf_e64xm4_float64xm4 +#define VMFIRSTM vmfirstm_e64xm4 +#define VFDIVVF_FLOAT vfdivvf_float64xm4 +#define VMFLTVF_FLOAT vmfltvf_e64xm4_float64xm4 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm4 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + + if ( n < 0 ) return(0.0); + if(n == 1) return (ABS(x[0])); + + FLOAT_V_T vr, v0, v_zero; + unsigned int gvl = 0; + FLOAT scale = 0.0, ssq = 0.0; + MASK_T mask; + BLASLONG index = 0; + if(inc_x == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + v_zero = VFMVVF_FLOAT(0, gvl); + for(i=0,j=0; i + +#define KERNEL16x4_I \ + "addi t1, %[PB], 1*4 \n\t"\ + "addi t2, %[PB], 2*4 \n\t"\ + "addi t3, %[PB], 3*4 \n\t"\ + "flw ft0, (%[PB]) \n\t"\ + "flw ft1, (t1) \n\t"\ + "flw ft2, (t2) \n\t"\ + "flw ft3, (t3) \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi t4, %[PA], 4*4 \n\t"\ + "addi t5, %[PA], 8*4 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "addi t6, %[PA], 12*4 \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmv.v.f v9, ft1 \n\t"\ + "vle.v v2, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vle.v v3, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "vfmv.v.f v10, ft2 \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vfmv.v.f v11, ft3 \n\t"\ + "vfmacc.vv v16, v8, v0 \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "vle.v v6, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vfmacc.vv v18, v8, v2 \n\t"\ + "addi t3, t3, 4*4 \n\t"\ + "vle.v v7, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "vfmacc.vv v19, v8, v3 \n\t"\ + "flw ft4, (%[PB]) \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "flw ft5, (t1) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "flw ft6, (t2) \n\t"\ + "vfmacc.vv v22, v9, v2 \n\t"\ + "flw ft7, (t3) \n\t"\ + "vfmacc.vv v23, v9, v3 \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "vfmacc.vv v24, v10, v0 \n\t"\ + "vfmv.v.f v13, ft5 \n\t"\ + "vfmacc.vv v25, v10, v1 \n\t"\ + "vfmv.v.f v14, ft6 \n\t"\ + "vfmacc.vv v26, v10, v2 \n\t"\ + "vfmv.v.f v15, ft7 \n\t"\ + "vfmacc.vv v27, v10, v3 \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "vfmacc.vv v28, v11, v0 \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vfmacc.vv v29, v11, v1 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "vfmacc.vv v30, v11, v2 \n\t"\ + "addi t3, t3, 4*4 \n\t"\ + "vfmacc.vv v31, v11, v3 \n\t" + +#define KERNEL16x4_M1 \ + "vfmacc.vv v16, v8, v0 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmacc.vv v18, v8, v2 \n\t"\ + "vle.v v6, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vfmacc.vv v19, v8, v3 \n\t"\ + "vle.v v7, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "flw ft4, (%[PB]) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "flw ft5, (t1) \n\t"\ + "vfmacc.vv v22, v9, v2 \n\t"\ + "flw ft6, (t2) \n\t"\ + "vfmacc.vv v23, v9, v3 \n\t"\ + "flw ft7, (t3) \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "vfmacc.vv v24, v10, v0 \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vfmacc.vv v25, v10, v1 \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "vfmacc.vv v26, v10, v2 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "vfmacc.vv v27, v10, v3 \n\t"\ + "vfmv.v.f v13, ft5 \n\t"\ + "vfmacc.vv v28, v11, v0 \n\t"\ + "addi t3, t3, 4*4 \n\t"\ + "vfmacc.vv v29, v11, v1 \n\t"\ + "vfmv.v.f v14, ft6 \n\t"\ + "vfmacc.vv v30, v11, v2 \n\t"\ + "vfmacc.vv v31, v11, v3 \n\t"\ + "vfmv.v.f v15, ft7 \n\t" + +#define KERNEL16x4_M2 \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmacc.vv v18, v12, v6 \n\t"\ + "vle.v v2, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vfmacc.vv v19, v12, v7 \n\t"\ + "vle.v v3, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "flw ft0, (%[PB]) \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "flw ft1, (t1) \n\t"\ + "vfmacc.vv v22, v13, v6 \n\t"\ + "flw ft2, (t2) \n\t"\ + "vfmacc.vv v23, v13, v7 \n\t"\ + "flw ft3, (t3) \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "vfmacc.vv v24, v14, v4 \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vfmacc.vv v25, v14, v5 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "vfmacc.vv v26, v14, v6 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "vfmacc.vv v27, v14, v7 \n\t"\ + "vfmv.v.f v9, ft1 \n\t"\ + "vfmacc.vv v28, v15, v4 \n\t"\ + "addi t3, t3, 4*4 \n\t"\ + "vfmacc.vv v29, v15, v5 \n\t"\ + "vfmv.v.f v10, ft2 \n\t"\ + "vfmacc.vv v30, v15, v6 \n\t"\ + "vfmacc.vv v31, v15, v7 \n\t"\ + "vfmv.v.f v11, ft3 \n\t" + +#define KERNEL16x4_E \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vfmacc.vv v18, v12, v6 \n\t"\ + "vfmacc.vv v19, v12, v7 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "vfmacc.vv v22, v13, v6 \n\t"\ + "vfmacc.vv v23, v13, v7 \n\t"\ + "vfmacc.vv v24, v14, v4 \n\t"\ + "vfmacc.vv v25, v14, v5 \n\t"\ + "vfmacc.vv v26, v14, v6 \n\t"\ + "vfmacc.vv v27, v14, v7 \n\t"\ + "vfmacc.vv v28, v15, v4 \n\t"\ + "vfmacc.vv v29, v15, v5 \n\t"\ + "vfmacc.vv v30, v15, v6 \n\t"\ + "vfmacc.vv v31, v15, v7 \n\t" + + +#define KERNEL8x4_I \ + "addi t1, %[PB], 1*4 \n\t"\ + "addi t2, %[PB], 2*4 \n\t"\ + "addi t3, %[PB], 3*4 \n\t"\ + "flw ft0, (%[PB]) \n\t"\ + "flw ft1, (t1) \n\t"\ + "flw ft2, (t2) \n\t"\ + "flw ft3, (t3) \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi t4, %[PA], 4*4 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "addi %[PA], %[PA], 8*4 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 8*4 \n\t"\ + "vfmv.v.f v9, ft1 \n\t"\ + "vfmv.v.f v10, ft2 \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 8*4 \n\t"\ + "vfmv.v.f v11, ft3 \n\t"\ + "vfmacc.vv v16, v8, v0 \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 8*4 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "flw ft4, (%[PB]) \n\t"\ + "addi t3, t3, 4*4 \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "flw ft5, (t1) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "flw ft6, (t2) \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "flw ft7, (t3) \n\t"\ + "vfmacc.vv v24, v10, v0 \n\t"\ + "vfmv.v.f v13, ft5 \n\t"\ + "vfmacc.vv v25, v10, v1 \n\t"\ + "vfmv.v.f v14, ft6 \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "vfmv.v.f v15, ft7 \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vfmacc.vv v28, v11, v0 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "vfmacc.vv v29, v11, v1 \n\t"\ + "addi t3, t3, 4*4 \n\t" + + +#define KERNEL8x4_M1 \ + "vfmacc.vv v16, v8, v0 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 8*4 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 8*4 \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "flw ft4, (%[PB]) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "flw ft5, (t1) \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "flw ft6, (t2) \n\t"\ + "vfmacc.vv v24, v10, v0 \n\t"\ + "flw ft7, (t3) \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vfmacc.vv v25, v10, v1 \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "vfmv.v.f v13, ft5 \n\t"\ + "vfmacc.vv v28, v11, v0 \n\t"\ + "addi t3, t3, 4*4 \n\t"\ + "vfmacc.vv v29, v11, v1 \n\t"\ + "vfmv.v.f v14, ft6 \n\t"\ + "vfmv.v.f v15, ft7 \n\t" + +#define KERNEL8x4_M2 \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 8*4 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 8*4 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "flw ft0, (%[PB]) \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "flw ft1, (t1) \n\t"\ + "addi %[PB], %[PB], 4*4 \n\t"\ + "flw ft2, (t2) \n\t"\ + "vfmacc.vv v24, v14, v4 \n\t"\ + "flw ft3, (t3) \n\t"\ + "addi t1, t1, 4*4 \n\t"\ + "vfmacc.vv v25, v14, v5 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "addi t2, t2, 4*4 \n\t"\ + "vfmv.v.f v9, ft1 \n\t"\ + "vfmacc.vv v28, v15, v4 \n\t"\ + "addi t3, t3, 4*4 \n\t"\ + "vfmacc.vv v29, v15, v5 \n\t"\ + "vfmv.v.f v10, ft2 \n\t"\ + "vfmv.v.f v11, ft3 \n\t" + +#define KERNEL8x4_E \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "vfmacc.vv v24, v14, v4 \n\t"\ + "vfmacc.vv v25, v14, v5 \n\t"\ + "vfmacc.vv v28, v15, v4 \n\t"\ + "vfmacc.vv v29, v15, v5 \n\t" + + +#define KERNEL16x2_I \ + "addi t1, %[PB], 1*4 \n\t"\ + "flw ft0, (%[PB]) \n\t"\ + "flw ft1, (t1) \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi t4, %[PA], 4*4 \n\t"\ + "addi t5, %[PA], 8*4 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "addi t6, %[PA], 12*4 \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmv.v.f v9, ft1 \n\t"\ + "vle.v v2, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vle.v v3, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "addi %[PB], %[PB], 2*4 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vfmacc.vv v16, v8, v0 \n\t"\ + "addi t1, t1, 2*4 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "vle.v v6, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vfmacc.vv v18, v8, v2 \n\t"\ + "vle.v v7, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "vfmacc.vv v19, v8, v3 \n\t"\ + "flw ft4, (%[PB]) \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "flw ft5, (t1) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "addi %[PB], %[PB], 2*4 \n\t"\ + "vfmacc.vv v22, v9, v2 \n\t"\ + "addi t1, t1, 2*4 \n\t"\ + "vfmacc.vv v23, v9, v3 \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "vfmv.v.f v13, ft5 \n\t" + + +#define KERNEL16x2_M1 \ + "vfmacc.vv v16, v8, v0 \n\t"\ + "vle.v v4, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vfmacc.vv v17, v8, v1 \n\t"\ + "vle.v v5, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmacc.vv v18, v8, v2 \n\t"\ + "vle.v v6, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vfmacc.vv v19, v8, v3 \n\t"\ + "vle.v v7, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "flw ft4, (%[PB]) \n\t"\ + "vfmacc.vv v20, v9, v0 \n\t"\ + "flw ft5, (t1) \n\t"\ + "vfmacc.vv v21, v9, v1 \n\t"\ + "vfmv.v.f v12, ft4 \n\t"\ + "vfmacc.vv v22, v9, v2 \n\t"\ + "addi t1, t1, 2*4 \n\t"\ + "vfmacc.vv v23, v9, v3 \n\t"\ + "addi %[PB], %[PB], 2*4 \n\t"\ + "vfmv.v.f v13, ft5 \n\t" + + +#define KERNEL16x2_M2 \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vle.v v0, (%[PA]) \n\t"\ + "addi %[PA], %[PA], 16*4 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vle.v v1, (t4) \n\t"\ + "addi t4, t4, 16*4 \n\t"\ + "vfmacc.vv v18, v12, v6 \n\t"\ + "vle.v v2, (t5) \n\t"\ + "addi t5, t5, 16*4 \n\t"\ + "vfmacc.vv v19, v12, v7 \n\t"\ + "vle.v v3, (t6) \n\t"\ + "addi t6, t6, 16*4 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "flw ft0, (%[PB]) \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "flw ft1, (t1) \n\t"\ + "vfmacc.vv v22, v13, v6 \n\t"\ + "vfmv.v.f v8, ft0 \n\t"\ + "vfmacc.vv v23, v13, v7 \n\t"\ + "addi %[PB], %[PB], 2*4 \n\t"\ + "addi t1, t1, 2*4 \n\t"\ + "vfmv.v.f v9, ft1 \n\t" + + +#define KERNEL16x2_E \ + "vfmacc.vv v16, v12, v4 \n\t"\ + "vfmacc.vv v17, v12, v5 \n\t"\ + "vfmacc.vv v18, v12, v6 \n\t"\ + "vfmacc.vv v19, v12, v7 \n\t"\ + "vfmacc.vv v20, v13, v4 \n\t"\ + "vfmacc.vv v21, v13, v5 \n\t"\ + "vfmacc.vv v22, v13, v6 \n\t"\ + "vfmacc.vv v23, v13, v7 \n\t" + + +int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc +#ifdef TRMMKERNEL + ,BLASLONG offset +#endif + ) +{ + BLASLONG i,j,k; + FLOAT *C0,*C1,*C2,*C3; + FLOAT *ptrba,*ptrbb; + + FLOAT loadb0,loadb1,loadb2,loadb3; + FLOAT load0,load1,load2,load3,load4,load5,load6,load7; + + FLOAT res0,res1,res2,res3; + FLOAT res4,res5,res6,res7; + FLOAT res8,res9,res10,res11; + FLOAT res12,res13,res14,res15; + + for (j=0; j + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp; + + if ( n < 0 ) return(0); + + while(i < n) + { + + temp = x[ix] ; + x[ix] = y[iy] ; + y[iy] = temp ; + + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/riscv64/swap_vector.c b/kernel/riscv64/swap_vector.c new file mode 100644 index 000000000..9377bf4b9 --- /dev/null +++ b/kernel/riscv64/swap_vector.c @@ -0,0 +1,173 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VSEV_FLOAT vsev_float32xm8 +#define VSSEV_FLOAT vssev_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VSEV_FLOAT vsev_float64xm8 +#define VSSEV_FLOAT vssev_float64xm8 +#endif + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i = 0, j = 0; + BLASLONG ix = 0,iy = 0; + BLASLONG stride_x, stride_y; + FLOAT_V_T vx0, vx1, vy0, vy1; + unsigned int gvl = 0; + + if (n < 0) return(0); + if(inc_x == 1 && inc_y == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + if(gvl <= n/2){ + for(i=0,j=0; i 0){ + gvl = vsetvli(len, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < len / gvl; k++){ + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VLEV_FLOAT(&y[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < m){ + gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); + vy = VLEV_FLOAT(&y[i], gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[j] += alpha * temp2; + a_ptr += lda; + } + }else if(inc_x == 1){ + jy = 0; + stride_y = inc_y * sizeof(FLOAT); + for (j=0; j 0){ + gvl = vsetvli(len, RVV_EFLOAT, RVV_M); + inc_yv = inc_y * gvl; + vr = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < len / gvl; k++){ + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + iy += inc_yv; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < m){ + gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[jy] += alpha * temp2; + jy += inc_y; + a_ptr += lda; + } + }else if(inc_y == 1){ + jx = 0; + stride_x = inc_x * sizeof(FLOAT); + for (j=0; j 0){ + gvl = vsetvli(len, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + inc_xv = inc_x * gvl; + for(k = 0; k < len / gvl; k++){ + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VLEV_FLOAT(&y[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + ix += inc_xv; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < m){ + gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); + vy = VLEV_FLOAT(&y[i], gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[j] += alpha * temp2; + jx += inc_x; + a_ptr += lda; + } + }else{ + stride_x = inc_x * sizeof(FLOAT); + stride_y = inc_y * sizeof(FLOAT); + jx = 0; + jy = 0; + for (j=0; j 0){ + gvl = vsetvli(len, RVV_EFLOAT, RVV_M); + inc_xv = inc_x * gvl; + inc_yv = inc_y * gvl; + vr = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < len / gvl; k++){ + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + ix += inc_xv; + iy += inc_yv; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < m){ + gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[jy] += alpha * temp2; + jx += inc_x; + jy += inc_y; + a_ptr += lda; + } + } + return(0); +} + diff --git a/kernel/riscv64/symv_U.c b/kernel/riscv64/symv_U.c new file mode 100644 index 000000000..b5a0c96e9 --- /dev/null +++ b/kernel/riscv64/symv_U.c @@ -0,0 +1,71 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +{ + BLASLONG i; + BLASLONG ix,iy; + BLASLONG jx,jy; + BLASLONG j; + FLOAT temp1; + FLOAT temp2; + +#if 0 + if( m != offset ) + printf("Symv_U: m=%d offset=%d\n",m,offset); +#endif + + BLASLONG m1 = m - offset; + + jx = m1 * inc_x; + jy = m1 * inc_y; + + for (j=m1; j 0){ + i = 0; + gvl = vsetvli(j, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < j / gvl; k++){ + vy = VLEV_FLOAT(&y[i], gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < j){ + gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); + vy = VLEV_FLOAT(&y[i], gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[j] += temp1 * a_ptr[j] + alpha * temp2; + a_ptr += lda; + } + }else if(inc_x == 1){ + jy = m1 * inc_y; + a_ptr += m1 * lda; + stride_y = inc_y * sizeof(FLOAT); + for (j=m1; j 0){ + iy = 0; + i = 0; + gvl = vsetvli(j, RVV_EFLOAT, RVV_M); + inc_yv = inc_y * gvl; + vr = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < j / gvl; k++){ + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + iy += inc_yv; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < j){ + gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLEV_FLOAT(&x[i], gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[jy] += temp1 * a_ptr[j] + alpha * temp2; + a_ptr += lda; + jy += inc_y; + } + }else if(inc_y == 1){ + jx = m1 * inc_x; + a_ptr += m1 * lda; + stride_x = inc_x * sizeof(FLOAT); + for (j=m1; j 0){ + ix = 0; + i = 0; + gvl = vsetvli(j, RVV_EFLOAT, RVV_M); + inc_xv = inc_x * gvl; + vr = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < j / gvl; k++){ + vy = VLEV_FLOAT(&y[i], gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + ix += inc_xv; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < j){ + gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); + vy = VLEV_FLOAT(&y[i], gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSEV_FLOAT(&y[i], vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[j] += temp1 * a_ptr[j] + alpha * temp2; + a_ptr += lda; + jx += inc_x; + } + }else{ + jx = m1 * inc_x; + jy = m1 * inc_y; + a_ptr += m1 * lda; + stride_x = inc_x * sizeof(FLOAT); + stride_y = inc_y * sizeof(FLOAT); + for (j=m1; j 0){ + ix = 0; + iy = 0; + i = 0; + gvl = vsetvli(j, RVV_EFLOAT, RVV_M); + inc_xv = inc_x * gvl; + inc_yv = inc_y * gvl; + vr = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < j / gvl; k++){ + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMACCVV_FLOAT(vr, vx, va, gvl); + + i += gvl; + ix += inc_xv; + iy += inc_yv; + } + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 = va[0]; + if(i < j){ + gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); + vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); + va = VLEV_FLOAT(&a_ptr[i], gvl); + vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); + VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); + + vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vr = VFMULVV_FLOAT(vx, va, gvl); + va = VFMVVF_FLOAT(0, gvl); + va = VFREDSUM_FLOAT(vr, va, gvl); + temp2 += va[0]; + } + } + y[jy] += temp1 * a_ptr[j] + alpha * temp2; + a_ptr += lda; + jx += inc_x; + jy += inc_y; + } + } + return(0); +} + diff --git a/kernel/riscv64/zamax.c b/kernel/riscv64/zamax.c new file mode 100644 index 000000000..a39bd7821 --- /dev/null +++ b/kernel/riscv64/zamax.c @@ -0,0 +1,79 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(0.0); + + inc_x2 = 2 * inc_x; + + maxf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) > maxf ) + { + maxf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(maxf); +} + + diff --git a/kernel/riscv64/zamax_vector.c b/kernel/riscv64/zamax_vector.c new file mode 100644 index 000000000..a6c742b14 --- /dev/null +++ b/kernel/riscv64/zamax_vector.c @@ -0,0 +1,104 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMAXVV_FLOAT vfmaxvv_float32xm8 +#define VFADDVV_FLOAT vfaddvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMAXVV_FLOAT vfmaxvv_float64xm8 +#define VFADDVV_FLOAT vfaddvv_float64xm8 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + if (n <= 0 || inc_x <= 0) return(maxf); + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_max; + + MASK_T mask0, mask1; + BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2; + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_max = VFMVVF_FLOAT(0, gvl); + BLASLONG inc_xv = inc_x * gvl * 2; + for(; i maxf) + maxf = v_max[0]; + } + return(maxf); +} diff --git a/kernel/riscv64/zamin.c b/kernel/riscv64/zamin.c new file mode 100644 index 000000000..02eab3e75 --- /dev/null +++ b/kernel/riscv64/zamin.c @@ -0,0 +1,79 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(0.0); + + inc_x2 = 2 * inc_x; + + minf = CABS1(x,0); + ix += inc_x2; + i++; + + while(i < n) + { + if( CABS1(x,ix) < minf ) + { + minf = CABS1(x,ix); + } + ix += inc_x2; + i++; + } + return(minf); +} + + diff --git a/kernel/riscv64/zamin_vector.c b/kernel/riscv64/zamin_vector.c new file mode 100644 index 000000000..44a7cf1dc --- /dev/null +++ b/kernel/riscv64/zamin_vector.c @@ -0,0 +1,104 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#include + +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDMINVS_FLOAT vfredminvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFMINVV_FLOAT vfminvv_float32xm8 +#define VFADDVV_FLOAT vfaddvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDMINVS_FLOAT vfredminvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFMINVV_FLOAT vfminvv_float64xm8 +#define VFADDVV_FLOAT vfaddvv_float64xm8 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + BLASLONG ix=0; + if (n <= 0 || inc_x <= 0) return(0.0); + FLOAT minf=FLT_MAX; + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_min; + MASK_T mask0, mask1; + BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2; + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + v_min = VFMVVF_FLOAT(FLT_MAX, gvl); + BLASLONG inc_xv = inc_x * gvl * 2; + for(; i + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT sumf = 0.0; + BLASLONG inc_x2; + + if (n <= 0 || inc_x <= 0) return(sumf); + + inc_x2 = 2 * inc_x; + + n *= inc_x2; + while(i < n) + { + sumf += CABS1(x,i); + i += inc_x2; + } + return(sumf); +} + + diff --git a/kernel/riscv64/zasum_vector.c b/kernel/riscv64/zasum_vector.c new file mode 100644 index 000000000..d9fa88971 --- /dev/null +++ b/kernel/riscv64/zasum_vector.c @@ -0,0 +1,136 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VFREDSUMVS_FLOAT vfredsumvs_float32xm8 +#define MASK_T e32xm8_t +#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 +#define VFMVVF_FLOAT vfmvvf_float32xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 +#define VFADDVV_FLOAT vfaddvv_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VFREDSUMVS_FLOAT vfredsumvs_float64xm8 +#define MASK_T e64xm8_t +#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 +#define VFMVVF_FLOAT vfmvvf_float64xm8 +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 +#define VFADDVV_FLOAT vfaddvv_float64xm8 +#endif +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + BLASLONG ix=0; + FLOAT asumf=0.0; + if (n <= 0 || inc_x <= 0) return(asumf); + unsigned int gvl = 0; + FLOAT_V_T v0, v1, v_zero,v_sum; + + MASK_T mask0, mask1; + if(inc_x == 1){ + BLASLONG n2 = n * 2; + gvl = vsetvli(n2, RVV_EFLOAT, RVV_M); + v_zero = VFMVVF_FLOAT(0, gvl); + if(gvl <= n2/2){ + v_sum = VFMVVF_FLOAT(0, gvl); + for(i=0,j=0; i 0){ + gvl = vsetvli(len, RVV_EFLOAT, RVV_M); + inc_xv = incx * gvl * 2; + inc_yv = incy * gvl * 2; + inc_av = gvl * 2; + vr0 = VFMVVF_FLOAT(0, gvl); + vr1 = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < len / gvl; k++){ + va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl); + va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl); + vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); + vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); +#ifndef HEMVREV + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#else + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#endif + VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); + VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); + + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); +#ifndef HEMVREV + vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl); + vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl); + vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl); +#else + vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl); + vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl); + +#endif + i += gvl; + ix += inc_xv; + iy += inc_yv; + ia += inc_av; + } + va0 = VFMVVF_FLOAT(0, gvl); + vx0 = VFREDSUM_FLOAT(vr0, va0, gvl); + temp_r2 = vx0[0]; + vx1 = VFREDSUM_FLOAT(vr1, va0, gvl); + temp_i2 = vx1[0]; + if(i < m){ + gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); + va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl); + va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl); + vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); + vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); +#ifndef HEMVREV + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#else + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#endif + VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); + VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); + + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); +#ifndef HEMVREV + vr0 = VFMULVV_FLOAT(vx0, va0, gvl); + vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMULVV_FLOAT(vx1, va0, gvl); + vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl); +#else + vr0 = VFMULVV_FLOAT(vx0, va0, gvl); + vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMULVV_FLOAT(vx1, va0, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl); +#endif + + va0 = VFMVVF_FLOAT(0, gvl); + vx0 = VFREDSUM_FLOAT(vr0, va0, gvl); + temp_r2 += vx0[0]; + vx1 = VFREDSUM_FLOAT(vr1, va0, gvl); + temp_i2 += vx1[0]; + } + } + y[jy] += alpha_r * temp_r2 - alpha_i * temp_i2; + y[jy+1] += alpha_r * temp_i2 + alpha_i * temp_r2; + jx += inc_x2; + jy += inc_y2; + ja += 2; + a_ptr += lda2; + } + return(0); +} diff --git a/kernel/riscv64/zhemv_UV_vector.c b/kernel/riscv64/zhemv_UV_vector.c new file mode 100644 index 000000000..6fe12c76c --- /dev/null +++ b/kernel/riscv64/zhemv_UV_vector.c @@ -0,0 +1,192 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M4 +#define FLOAT_V_T float32xm4_t +#define VLSEV_FLOAT vlsev_float32xm4 +#define VSSEV_FLOAT vssev_float32xm4 +#define VFREDSUM_FLOAT vfredsumvs_float32xm4 +#define VFMACCVV_FLOAT vfmaccvv_float32xm4 +#define VFMACCVF_FLOAT vfmaccvf_float32xm4 +#define VFMVVF_FLOAT vfmvvf_float32xm4 +#define VFMULVV_FLOAT vfmulvv_float32xm4 +#define VFNMSACVF_FLOAT vfnmsacvf_float32xm4 +#define VFNMSACVV_FLOAT vfnmsacvv_float32xm4 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M4 +#define FLOAT_V_T float64xm4_t +#define VLSEV_FLOAT vlsev_float64xm4 +#define VSSEV_FLOAT vssev_float64xm4 +#define VFREDSUM_FLOAT vfredsumvs_float64xm4 +#define VFMACCVV_FLOAT vfmaccvv_float64xm4 +#define VFMACCVF_FLOAT vfmaccvf_float64xm4 +#define VFMVVF_FLOAT vfmvvf_float64xm4 +#define VFMULVV_FLOAT vfmulvv_float64xm4 +#define VFNMSACVF_FLOAT vfnmsacvf_float64xm4 +#define VFNMSACVV_FLOAT vfnmsacvv_float64xm4 +#endif + +int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){ + BLASLONG i, j, k; + BLASLONG ix, iy, ia; + BLASLONG jx, jy, ja; + FLOAT temp_r1, temp_i1; + FLOAT temp_r2, temp_i2; + FLOAT *a_ptr = a; + unsigned int gvl = 0; + + + FLOAT_V_T va0, va1, vx0, vx1, vy0, vy1, vr0, vr1; + BLASLONG stride_x, stride_y, stride_a, inc_xv, inc_yv, inc_av, lda2; + + BLASLONG inc_x2 = incx * 2; + BLASLONG inc_y2 = incy * 2; + stride_x = inc_x2 * sizeof(FLOAT); + stride_y = inc_y2 * sizeof(FLOAT); + stride_a = 2 * sizeof(FLOAT); + lda2 = lda * 2; + + BLASLONG m1 = m - offset; + a_ptr = a + m1 * lda2; + jx = m1 * inc_x2; + jy = m1 * inc_y2; + ja = m1 * 2; + for(j = m1; j < m; j++){ + temp_r1 = alpha_r * x[jx] - alpha_i * x[jx+1];; + temp_i1 = alpha_r * x[jx+1] + alpha_i * x[jx]; + temp_r2 = 0; + temp_i2 = 0; + ix = 0; + iy = 0; + ia = 0; + i = 0; + if(j > 0){ + gvl = vsetvli(j, RVV_EFLOAT, RVV_M); + inc_xv = incx * gvl * 2; + inc_yv = incy * gvl * 2; + inc_av = gvl * 2; + vr0 = VFMVVF_FLOAT(0, gvl); + vr1 = VFMVVF_FLOAT(0, gvl); + for(k = 0; k < j / gvl; k++){ + va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl); + va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl); + vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); + vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); +#ifndef HEMVREV + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#else + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#endif + VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); + VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); + + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); +#ifndef HEMVREV + vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl); + vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl); + vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl); +#else + vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl); + vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl); + +#endif + i += gvl; + ix += inc_xv; + iy += inc_yv; + ia += inc_av; + } + va0 = VFMVVF_FLOAT(0, gvl); + vx0 = VFREDSUM_FLOAT(vr0, va0, gvl); + temp_r2 = vx0[0]; + vx1 = VFREDSUM_FLOAT(vr1, va0, gvl); + temp_i2 = vx1[0]; + if(i < j){ + gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); + va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl); + va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl); + vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); + vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); +#ifndef HEMVREV + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#else + vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); + vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl); + vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl); + vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); +#endif + VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); + VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); + + vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); + vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); +#ifndef HEMVREV + vr0 = VFMULVV_FLOAT(vx0, va0, gvl); + vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMULVV_FLOAT(vx1, va0, gvl); + vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl); +#else + vr0 = VFMULVV_FLOAT(vx0, va0, gvl); + vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl); + vr1 = VFMULVV_FLOAT(vx1, va0, gvl); + vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl); +#endif + + va0 = VFMVVF_FLOAT(0, gvl); + vx0 = VFREDSUM_FLOAT(vr0, va0, gvl); + temp_r2 += vx0[0]; + vx1 = VFREDSUM_FLOAT(vr1, va0, gvl); + temp_i2 += vx1[0]; + } + } + y[jy] += temp_r1 * a_ptr[ja]; + y[jy+1] += temp_i1 * a_ptr[ja]; + y[jy] += alpha_r * temp_r2 - alpha_i * temp_i2; + y[jy+1] += alpha_r * temp_i2 + alpha_i * temp_r2; + jx += inc_x2; + jy += inc_y2; + ja += 2; + a_ptr += lda2; + } + return(0); +} diff --git a/kernel/riscv64/znrm2.c b/kernel/riscv64/znrm2.c new file mode 100644 index 000000000..fc1c8b54a --- /dev/null +++ b/kernel/riscv64/znrm2.c @@ -0,0 +1,106 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/13 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT scale = 0.0; + FLOAT ssq = 1.0; + BLASLONG inc_x2; + FLOAT temp; + + if (n <= 0 || inc_x <= 0) return(0.0); + + inc_x2 = 2 * inc_x; + + n *= inc_x2; + while(i < n) + { + + if ( x[i] != 0.0 ) + { + temp = ABS( x[i] ); + if ( scale < temp ) + { + ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); + scale = temp ; + } + else + { + ssq += ( temp / scale ) * ( temp / scale ); + } + + } + + if ( x[i+1] != 0.0 ) + { + temp = ABS( x[i+1] ); + if ( scale < temp ) + { + ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); + scale = temp ; + } + else + { + ssq += ( temp / scale ) * ( temp / scale ); + } + + } + + + i += inc_x2; + } + scale = scale * sqrt( ssq ); + return(scale); + +} + + diff --git a/kernel/riscv64/znrm2_vector.c b/kernel/riscv64/znrm2_vector.c new file mode 100644 index 000000000..b0ebfa5f4 --- /dev/null +++ b/kernel/riscv64/znrm2_vector.c @@ -0,0 +1,278 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M4 +#define FLOAT_V_T float32xm4_t +#define VLEV_FLOAT vlev_float32xm4 +#define VLSEV_FLOAT vlsev_float32xm4 +#define VFREDSUM_FLOAT vfredsumvs_float32xm4 +#define VFMACCVV_FLOAT vfmaccvv_float32xm4 +#define VFMVVF_FLOAT vfmvvf_float32xm4 +#define VFDOTVV_FLOAT vfdotvv_float32xm4 +#define ABS fabsf +#define MASK_T e32xm4_t +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm4 +#define VMFGTVF_FLOAT vmfgtvf_e32xm4_float32xm4 +#define VMFIRSTM vmfirstm_e32xm4 +#define VFDIVVF_FLOAT vfdivvf_float32xm4 +#define VMFLTVF_FLOAT vmfltvf_e32xm4_float32xm4 +#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm4 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M4 +#define FLOAT_V_T float64xm4_t +#define VLEV_FLOAT vlev_float64xm4 +#define VLSEV_FLOAT vlsev_float64xm4 +#define VFREDSUM_FLOAT vfredsumvs_float64xm4 +#define VFMACCVV_FLOAT vfmaccvv_float64xm4 +#define VFMVVF_FLOAT vfmvvf_float64xm4 +#define VFDOTVV_FLOAT vfdotvv_float64xm4 +#define ABS fabs +#define MASK_T e64xm4_t +#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm4 +#define VMFGTVF_FLOAT vmfgtvf_e64xm4_float64xm4 +#define VMFIRSTM vmfirstm_e64xm4 +#define VFDIVVF_FLOAT vfdivvf_float64xm4 +#define VMFLTVF_FLOAT vmfltvf_e64xm4_float64xm4 +#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm4 +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0, j=0; + + if ( n < 0 ) return(0.0); +// if(n == 1) return (ABS(x[0])); + + FLOAT_V_T vr, v0, v_zero; + unsigned int gvl = 0; + FLOAT scale = 0.0, ssq = 0.0; + MASK_T mask; + BLASLONG index = 0; + if(inc_x == 1){ + BLASLONG n2 = n * 2; + gvl = vsetvli(n2, RVV_EFLOAT, RVV_M); + vr = VFMVVF_FLOAT(0, gvl); + v_zero = VFMVVF_FLOAT(0, gvl); + for(i=0,j=0; i + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dummy4, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp[2]; + BLASLONG inc_x2; + BLASLONG inc_y2; + + if ( n < 0 ) return(0); + + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; + + while(i < n) + { + + temp[0] = x[ix] ; + temp[1] = x[ix+1] ; + x[ix] = y[iy] ; + x[ix+1] = y[iy+1] ; + y[iy] = temp[0] ; + y[iy+1] = temp[1] ; + + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/riscv64/zswap_vector.c b/kernel/riscv64/zswap_vector.c new file mode 100644 index 000000000..b655a968c --- /dev/null +++ b/kernel/riscv64/zswap_vector.c @@ -0,0 +1,117 @@ +/*************************************************************************** +Copyright (c) 2020, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include +#if !defined(DOUBLE) +#define RVV_EFLOAT RVV_E32 +#define RVV_M RVV_M8 +#define FLOAT_V_T float32xm8_t +#define VLEV_FLOAT vlev_float32xm8 +#define VLSEV_FLOAT vlsev_float32xm8 +#define VSEV_FLOAT vsev_float32xm8 +#define VSSEV_FLOAT vssev_float32xm8 +#else +#define RVV_EFLOAT RVV_E64 +#define RVV_M RVV_M8 +#define FLOAT_V_T float64xm8_t +#define VLEV_FLOAT vlev_float64xm8 +#define VLSEV_FLOAT vlsev_float64xm8 +#define VSEV_FLOAT vsev_float64xm8 +#define VSSEV_FLOAT vssev_float64xm8 +#endif + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dummy4, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i = 0, j = 0; + BLASLONG ix = 0,iy = 0; + BLASLONG stride_x, stride_y; + FLOAT_V_T vx0, vx1, vy0, vy1; + unsigned int gvl = 0; + + if (n < 0) return(0); + if(inc_x == 1 && inc_y == 1){ + gvl = vsetvli(n, RVV_EFLOAT, RVV_M); + BLASLONG n2 = n * 2; + if(gvl <= n2/2){ + for(i=0,j=0; i Date: Sun, 22 Nov 2020 16:02:19 +0800 Subject: [PATCH 9/9] Update doc for C910. --- README.md | 7 +++++++ TargetList.txt | 2 ++ 2 files changed, 9 insertions(+) diff --git a/README.md b/README.md index ca034e747..267df5358 100644 --- a/README.md +++ b/README.md @@ -172,6 +172,13 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th - **Z13**: Optimized Level-3 BLAS and Level-1,2 - **Z14**: Optimized Level-3 BLAS and (single precision) Level-1,2 +#### RISC-V + +- **C910V**: Optimized Leve-3 BLAS (real) and Level-1,2 by RISC-V Vector extension 0.7.1. + ```sh + make HOSTCC=gcc TARGET=C910V CC=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran + ``` + ### Support for multiple targets in a single library OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake. diff --git a/TargetList.txt b/TargetList.txt index 86177ebca..d19964916 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -107,3 +107,5 @@ Z14 10.RISC-V 64: RISCV64_GENERIC +C910V +