Add the support for RISC-V Vector.
Change-Id: Iae7800a32f5af3903c330882cdf6f292d885f266
This commit is contained in:
parent
756802df61
commit
ef8e7d0279
|
@ -41,6 +41,10 @@ ifeq ($(TARGET), I6500)
|
|||
TARGET_FLAGS = -mips64r6
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), C910V)
|
||||
TARGET_FLAGS = -march=rv64gcvxthead -mabi=lp64v
|
||||
endif
|
||||
|
||||
all: getarch_2nd
|
||||
./getarch_2nd 0 >> $(TARGET_MAKE)
|
||||
./getarch_2nd 1 >> $(TARGET_CONF)
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
ifeq ($(CORE), C910V)
|
||||
CCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v
|
||||
FCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v -static
|
||||
endif
|
|
@ -724,7 +724,10 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(ARCH), riscv64)
|
||||
NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
|
||||
#
|
||||
|
|
|
@ -104,3 +104,6 @@ VORTEX
|
|||
ZARCH_GENERIC
|
||||
Z13
|
||||
Z14
|
||||
|
||||
10.RISC-V 64:
|
||||
RISCV64_GENERIC
|
||||
|
|
6
c_check
6
c_check
|
@ -92,6 +92,7 @@ $architecture = ia64 if ($data =~ /ARCH_IA64/);
|
|||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
|
||||
$architecture = riscv64 if ($data =~ /ARCH_RISCV64/);
|
||||
|
||||
$defined = 0;
|
||||
|
||||
|
@ -136,6 +137,11 @@ if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) {
|
|||
$binary =32;
|
||||
}
|
||||
|
||||
if ($architecture eq "riscv64") {
|
||||
$defined = 1;
|
||||
$binary = 64;
|
||||
}
|
||||
|
||||
if ($compiler eq "PGI") {
|
||||
$compiler_name .= " -tp p7" if ($binary eq "32");
|
||||
$compiler_name .= " -tp p7-64" if ($binary eq "64");
|
||||
|
|
4
common.h
4
common.h
|
@ -437,6 +437,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
|||
#include "common_mips.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_RISCV64
|
||||
#include "common_riscv64.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_MIPS64
|
||||
#include "common_mips64.h"
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#ifndef COMMON_RISCV64
|
||||
#define COMMON_RISCV64
|
||||
|
||||
#define MB __sync_synchronize()
|
||||
#define WMB __sync_synchronize()
|
||||
#define RMB __sync_synchronize()
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#define BUFFER_SIZE ( 32 << 20)
|
||||
#define SEEK_ADDRESS
|
||||
|
||||
#if defined(C910V)
|
||||
#include <riscv-vector.h>
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,113 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_C910V 1
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKOWN",
|
||||
"C910V"
|
||||
};
|
||||
|
||||
int detect(void){
|
||||
return CPU_UNKNOWN;
|
||||
}
|
||||
|
||||
char *get_corename(void){
|
||||
return cpuname[detect()];
|
||||
}
|
||||
|
||||
void get_architecture(void){
|
||||
printf("RISCV64");
|
||||
}
|
||||
|
||||
void get_subarchitecture(void){
|
||||
}
|
||||
|
||||
void get_subdirname(void){
|
||||
printf("riscv64");
|
||||
}
|
||||
|
||||
void get_cpuconfig(void){
|
||||
printf("#define UNKNOWN\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
}
|
||||
|
||||
void get_libname(void){
|
||||
printf("riscv64\n");
|
||||
}
|
4
ctest.c
4
ctest.c
|
@ -153,6 +153,10 @@ ARCH_ARM
|
|||
ARCH_ARM64
|
||||
#endif
|
||||
|
||||
#if defined(__riscv)
|
||||
ARCH_RISCV64
|
||||
#endif
|
||||
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
|
||||
HAVE_C11
|
||||
#endif
|
||||
|
|
33
getarch.c
33
getarch.c
|
@ -981,6 +981,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_RISCV64_GENERIC
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "RISCV64"
|
||||
#define SUBARCHITECTURE "RISCV64_GENERIC"
|
||||
#define SUBDIRNAME "riscv64"
|
||||
#define ARCHCONFIG "-DRISCV64_GENERIC " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||
#define LIBNAME "riscv64_generic"
|
||||
#define CORENAME "RISCV64_GENERIC"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXA15
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM"
|
||||
|
@ -1252,6 +1266,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define CORENAME "Z14"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_C910V
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "RISCV64"
|
||||
#define SUBARCHITECTURE "C910V"
|
||||
#define SUBDIRNAME "riscv64"
|
||||
#define ARCHCONFIG "-DC910V " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||
#define LIBNAME "c910v"
|
||||
#define CORENAME "C910V"
|
||||
#else
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef FORCE
|
||||
|
||||
#ifdef USER_TARGET
|
||||
|
@ -1306,6 +1335,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef __riscv
|
||||
#include "cpuid_riscv64.c"
|
||||
#endif
|
||||
|
||||
#ifdef __arm__
|
||||
#include "cpuid_arm.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
|
|
|
@ -25,6 +25,10 @@ ifeq ($(ARCH), arm64)
|
|||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), riscv64)
|
||||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), LOONGSON3B)
|
||||
USE_TRMM = 1
|
||||
endif
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,30 @@
|
|||
ifndef SCABS_KERNEL
|
||||
SCABS_KERNEL = ../generic/cabs.c
|
||||
endif
|
||||
|
||||
ifndef DCABS_KERNEL
|
||||
DCABS_KERNEL = ../generic/cabs.c
|
||||
endif
|
||||
|
||||
ifndef QCABS_KERNEL
|
||||
QCABS_KERNEL = ../generic/cabs.c
|
||||
endif
|
||||
|
||||
ifndef LSAME_KERNEL
|
||||
LSAME_KERNEL = ../generic/lsame.c
|
||||
endif
|
||||
|
||||
ifndef SGEMM_BETA
|
||||
SGEMM_BETA = ../generic/gemm_beta.c
|
||||
endif
|
||||
ifndef DGEMM_BETA
|
||||
DGEMM_BETA = ../generic/gemm_beta.c
|
||||
endif
|
||||
ifndef CGEMM_BETA
|
||||
CGEMM_BETA = ../generic/zgemm_beta.c
|
||||
endif
|
||||
ifndef ZGEMM_BETA
|
||||
ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||
endif
|
||||
|
||||
|
|
@ -0,0 +1,190 @@
|
|||
SAMAXKERNEL = amax_vector.c
|
||||
DAMAXKERNEL = amax_vector.c
|
||||
CAMAXKERNEL = zamax_vector.c
|
||||
ZAMAXKERNEL = zamax_vector.c
|
||||
|
||||
SAMINKERNEL = amin_vector.c
|
||||
DAMINKERNEL = amin_vector.c
|
||||
CAMINKERNEL = zamin_vector.c
|
||||
ZAMINKERNEL = zamin_vector.c
|
||||
|
||||
SMAXKERNEL = max_vector.c
|
||||
DMAXKERNEL = max_vector.c
|
||||
|
||||
SMINKERNEL = min_vector.c
|
||||
DMINKERNEL = min_vector.c
|
||||
|
||||
ISAMAXKERNEL = iamax_vector.c
|
||||
IDAMAXKERNEL = iamax_vector.c
|
||||
ICAMAXKERNEL = izamax_vector.c
|
||||
IZAMAXKERNEL = izamax_vector.c
|
||||
|
||||
ISAMINKERNEL = iamin_vector.c
|
||||
IDAMINKERNEL = iamin_vector.c
|
||||
ICAMINKERNEL = izamin_vector.c
|
||||
IZAMINKERNEL = izamin_vector.c
|
||||
|
||||
ISMAXKERNEL = imax_vector.c
|
||||
IDMAXKERNEL = imax_vector.c
|
||||
|
||||
ISMINKERNEL = imin_vector.c
|
||||
IDMINKERNEL = imin_vector.c
|
||||
|
||||
SASUMKERNEL = asum_vector.c
|
||||
DASUMKERNEL = asum_vector.c
|
||||
CASUMKERNEL = zasum_vector.c
|
||||
ZASUMKERNEL = zasum_vector.c
|
||||
|
||||
SSUMKERNEL = ../arm/sum.c
|
||||
DSUMKERNEL = ../arm/sum.c
|
||||
CSUMKERNEL = ../arm/zsum.c
|
||||
ZSUMKERNEL = ../arm/zsum.c
|
||||
|
||||
SAXPYKERNEL = axpy_vector.c
|
||||
DAXPYKERNEL = axpy_vector.c
|
||||
CAXPYKERNEL = zaxpy_vector.c
|
||||
ZAXPYKERNEL = zaxpy_vector.c
|
||||
|
||||
SAXPBYKERNEL = axpby_vector.c
|
||||
DAXPBYKERNEL = axpby_vector.c
|
||||
CAXPBYKERNEL = zaxpby_vector.c
|
||||
ZAXPBYKERNEL = zaxpby_vector.c
|
||||
|
||||
SCOPYKERNEL = copy_vector.c
|
||||
DCOPYKERNEL = copy_vector.c
|
||||
CCOPYKERNEL = zcopy_vector.c
|
||||
ZCOPYKERNEL = zcopy_vector.c
|
||||
|
||||
SDOTKERNEL = dot_vector.c
|
||||
DDOTKERNEL = dot_vector.c
|
||||
CDOTKERNEL = zdot_vector.c
|
||||
ZDOTKERNEL = zdot_vector.c
|
||||
|
||||
SNRM2KERNEL = nrm2_vector.c
|
||||
DNRM2KERNEL = nrm2_vector.c
|
||||
CNRM2KERNEL = znrm2_vector.c
|
||||
ZNRM2KERNEL = znrm2_vector.c
|
||||
|
||||
SROTKERNEL = rot_vector.c
|
||||
DROTKERNEL = rot_vector.c
|
||||
CROTKERNEL = zrot_vector.c
|
||||
ZROTKERNEL = zrot_vector.c
|
||||
|
||||
SSCALKERNEL = scal_vector.c
|
||||
DSCALKERNEL = scal_vector.c
|
||||
CSCALKERNEL = zscal_vector.c
|
||||
ZSCALKERNEL = zscal_vector.c
|
||||
|
||||
SSWAPKERNEL = swap_vector.c
|
||||
DSWAPKERNEL = swap_vector.c
|
||||
CSWAPKERNEL = zswap_vector.c
|
||||
ZSWAPKERNEL = zswap_vector.c
|
||||
|
||||
SGEMVNKERNEL = gemv_n_vector.c
|
||||
DGEMVNKERNEL = gemv_n_vector.c
|
||||
CGEMVNKERNEL = zgemv_n_vector.c
|
||||
ZGEMVNKERNEL = zgemv_n_vector.c
|
||||
|
||||
SGEMVTKERNEL = gemv_t_vector.c
|
||||
DGEMVTKERNEL = gemv_t_vector.c
|
||||
CGEMVTKERNEL = zgemv_t_vector.c
|
||||
ZGEMVTKERNEL = zgemv_t_vector.c
|
||||
|
||||
STRMMKERNEL = ../generic/trmmkernel_16x4.c
|
||||
DTRMMKERNEL = ../generic/trmmkernel_8x4.c
|
||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_16x4_c910v.c
|
||||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
||||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_8x4_c910v.c
|
||||
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
|
||||
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
|
||||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
|
||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
SSYMV_U_KERNEL = symv_U_vector.c
|
||||
SSYMV_L_KERNEL = symv_L_vector.c
|
||||
DSYMV_U_KERNEL = symv_U_vector.c
|
||||
DSYMV_L_KERNEL = symv_L_vector.c
|
||||
CSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||
CSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||
ZSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||
ZSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||
|
||||
CHEMV_L_KERNEL = zhemv_LM_vector.c
|
||||
CHEMV_M_KERNEL = zhemv_LM_vector.c
|
||||
CHEMV_U_KERNEL = zhemv_UV_vector.c
|
||||
CHEMV_V_KERNEL = zhemv_UV_vector.c
|
||||
ZHEMV_L_KERNEL = zhemv_LM_vector.c
|
||||
ZHEMV_M_KERNEL = zhemv_LM_vector.c
|
||||
ZHEMV_U_KERNEL = zhemv_UV_vector.c
|
||||
ZHEMV_V_KERNEL = zhemv_UV_vector.c
|
||||
|
||||
|
||||
LSAME_KERNEL = ../generic/lsame.c
|
||||
|
||||
SCABS_KERNEL = ../generic/cabs.c
|
||||
DCABS_KERNEL = ../generic/cabs.c
|
||||
QCABS_KERNEL = ../generic/cabs.c
|
||||
|
||||
ifndef SGEMM_BETA
|
||||
SGEMM_BETA = ../generic/gemm_beta.c
|
||||
endif
|
||||
ifndef DGEMM_BETA
|
||||
DGEMM_BETA = ../generic/gemm_beta.c
|
||||
endif
|
||||
ifndef CGEMM_BETA
|
||||
CGEMM_BETA = ../generic/zgemm_beta.c
|
||||
endif
|
||||
ifndef ZGEMM_BETA
|
||||
ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||
endif
|
|
@ -0,0 +1,164 @@
|
|||
SAMAXKERNEL = ../riscv64/amax.c
|
||||
DAMAXKERNEL = ../riscv64/amax.c
|
||||
CAMAXKERNEL = ../riscv64/zamax.c
|
||||
ZAMAXKERNEL = ../riscv64/zamax.c
|
||||
|
||||
SAMINKERNEL = ../riscv64/amin.c
|
||||
DAMINKERNEL = ../riscv64/amin.c
|
||||
CAMINKERNEL = ../riscv64/zamin.c
|
||||
ZAMINKERNEL = ../riscv64/zamin.c
|
||||
|
||||
SMAXKERNEL = ../riscv64/max.c
|
||||
DMAXKERNEL = ../riscv64/max.c
|
||||
|
||||
SMINKERNEL = ../riscv64/min.c
|
||||
DMINKERNEL = ../riscv64/min.c
|
||||
|
||||
ISAMAXKERNEL = ../riscv64/iamax.c
|
||||
IDAMAXKERNEL = ../riscv64/iamax.c
|
||||
ICAMAXKERNEL = ../riscv64/izamax.c
|
||||
IZAMAXKERNEL = ../riscv64/izamax.c
|
||||
|
||||
ISAMINKERNEL = ../riscv64/iamin.c
|
||||
IDAMINKERNEL = ../riscv64/iamin.c
|
||||
ICAMINKERNEL = ../riscv64/izamin.c
|
||||
IZAMINKERNEL = ../riscv64/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../riscv64/imax.c
|
||||
IDMAXKERNEL = ../riscv64/imax.c
|
||||
|
||||
ISMINKERNEL = ../riscv64/imin.c
|
||||
IDMINKERNEL = ../riscv64/imin.c
|
||||
|
||||
SASUMKERNEL = ../riscv64/asum.c
|
||||
DASUMKERNEL = ../riscv64/asum.c
|
||||
CASUMKERNEL = ../riscv64/zasum.c
|
||||
ZASUMKERNEL = ../riscv64/zasum.c
|
||||
|
||||
SSUMKERNEL = ../arm/sum.c
|
||||
DSUMKERNEL = ../arm/sum.c
|
||||
CSUMKERNEL = ../arm/zsum.c
|
||||
ZSUMKERNEL = ../arm/zsum.c
|
||||
|
||||
SAXPYKERNEL = ../riscv64/axpy.c
|
||||
DAXPYKERNEL = ../riscv64/axpy.c
|
||||
CAXPYKERNEL = ../riscv64/zaxpy.c
|
||||
ZAXPYKERNEL = ../riscv64/zaxpy.c
|
||||
|
||||
SCOPYKERNEL = ../riscv64/copy.c
|
||||
DCOPYKERNEL = ../riscv64/copy.c
|
||||
CCOPYKERNEL = ../riscv64/zcopy.c
|
||||
ZCOPYKERNEL = ../riscv64/zcopy.c
|
||||
|
||||
SDOTKERNEL = ../riscv64/dot.c
|
||||
DDOTKERNEL = ../riscv64/dot.c
|
||||
CDOTKERNEL = ../riscv64/zdot.c
|
||||
ZDOTKERNEL = ../riscv64/zdot.c
|
||||
|
||||
SNRM2KERNEL = ../riscv64/nrm2.c
|
||||
DNRM2KERNEL = ../riscv64/nrm2.c
|
||||
CNRM2KERNEL = ../riscv64/znrm2.c
|
||||
ZNRM2KERNEL = ../riscv64/znrm2.c
|
||||
|
||||
SROTKERNEL = ../riscv64/rot.c
|
||||
DROTKERNEL = ../riscv64/rot.c
|
||||
CROTKERNEL = ../riscv64/zrot.c
|
||||
ZROTKERNEL = ../riscv64/zrot.c
|
||||
|
||||
SSCALKERNEL = ../riscv64/scal.c
|
||||
DSCALKERNEL = ../riscv64/scal.c
|
||||
CSCALKERNEL = ../riscv64/zscal.c
|
||||
ZSCALKERNEL = ../riscv64/zscal.c
|
||||
|
||||
SSWAPKERNEL = ../riscv64/swap.c
|
||||
DSWAPKERNEL = ../riscv64/swap.c
|
||||
CSWAPKERNEL = ../riscv64/zswap.c
|
||||
ZSWAPKERNEL = ../riscv64/zswap.c
|
||||
|
||||
SGEMVNKERNEL = ../riscv64/gemv_n.c
|
||||
DGEMVNKERNEL = ../riscv64/gemv_n.c
|
||||
CGEMVNKERNEL = ../riscv64/zgemv_n.c
|
||||
ZGEMVNKERNEL = ../riscv64/zgemv_n.c
|
||||
|
||||
SGEMVTKERNEL = ../riscv64/gemv_t.c
|
||||
DGEMVTKERNEL = ../riscv64/gemv_t.c
|
||||
CGEMVTKERNEL = ../riscv64/zgemv_t.c
|
||||
ZGEMVTKERNEL = ../riscv64/zgemv_t.c
|
||||
|
||||
STRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
|
||||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
|
||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
|
||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
SSYMV_U_KERNEL = ../generic/symv_k.c
|
||||
SSYMV_L_KERNEL = ../generic/symv_k.c
|
||||
DSYMV_U_KERNEL = ../generic/symv_k.c
|
||||
DSYMV_L_KERNEL = ../generic/symv_k.c
|
||||
CSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||
CSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||
ZSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||
ZSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||
|
||||
|
||||
LSAME_KERNEL = ../generic/lsame.c
|
||||
|
||||
SCABS_KERNEL = ../generic/cabs.c
|
||||
DCABS_KERNEL = ../generic/cabs.c
|
||||
QCABS_KERNEL = ../generic/cabs.c
|
||||
|
||||
ifndef SGEMM_BETA
|
||||
SGEMM_BETA = ../generic/gemm_beta.c
|
||||
endif
|
||||
ifndef DGEMM_BETA
|
||||
DGEMM_BETA = ../generic/gemm_beta.c
|
||||
endif
|
||||
ifndef CGEMM_BETA
|
||||
CGEMM_BETA = ../generic/zgemm_beta.c
|
||||
endif
|
||||
ifndef ZGEMM_BETA
|
||||
ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||
endif
|
|
@ -0,0 +1,75 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : NoTest
|
||||
* TEST : NoTest
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT maxf=0.0;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(maxf);
|
||||
|
||||
maxf=ABS(x[0]);
|
||||
ix += inc_x;
|
||||
i++;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( ABS(x[ix]) > maxf )
|
||||
{
|
||||
maxf = ABS(x[ix]);
|
||||
}
|
||||
ix += inc_x;
|
||||
i++;
|
||||
}
|
||||
return(maxf);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,245 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLEV_FLOAT vlev_float32xm8
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8
|
||||
#define MASK_T e32xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8
|
||||
#define VFMAXVV_FLOAT vfmaxvv_float32xm8
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLEV_FLOAT vlev_float64xm8
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8
|
||||
#define MASK_T e64xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8
|
||||
#define VFMAXVV_FLOAT vfmaxvv_float64xm8
|
||||
#endif
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT maxf=0.0;
|
||||
if (n <= 0 || inc_x <= 0) return(maxf);
|
||||
unsigned int gvl = 0;
|
||||
FLOAT_V_T v0, v1, v_max;
|
||||
|
||||
MASK_T mask0, mask1;
|
||||
FLOAT zero = 0.0;
|
||||
if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n/2){
|
||||
v_max = VFMVVF_FLOAT(0, gvl);
|
||||
for(i=0,j=0; i<n/(gvl*2); i++){
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
//v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl);
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
|
||||
v_max = VFMAXVV_FLOAT(v_max, v0, gvl);
|
||||
|
||||
v1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
//v1 = VFRSUBVF_MASK_FLOAT(v1, 0, mask1, gvl);
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
|
||||
v_max = VFMAXVV_FLOAT(v_max, v1, gvl);
|
||||
j += gvl*2;
|
||||
}
|
||||
v0 = VFMVVF_FLOAT(0, gvl);
|
||||
v0 = VFREDMAXVS_FLOAT(v_max, v0, gvl);
|
||||
maxf = v0[0];
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
//v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl);
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
|
||||
v1 = VFMVVF_FLOAT(0, gvl);
|
||||
v0 = VFREDMAXVS_FLOAT(v0, v1, gvl);
|
||||
if(v0[0] > maxf)
|
||||
maxf = v0[0];
|
||||
j += gvl;
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG stride_x = inc_x * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
v_max = VFMVVF_FLOAT(0, gvl);
|
||||
for(i=0,j=0; i<n/(gvl*2); i++){
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
//v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl);
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
|
||||
v_max = VFMAXVV_FLOAT(v_max, v0, gvl);
|
||||
|
||||
v1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
//v1 = VFRSUBVF_MASK_FLOAT(v1, 0, mask1, gvl);
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
|
||||
v_max = VFMAXVV_FLOAT(v_max, v1, gvl);
|
||||
j += gvl*2;
|
||||
ix += inc_xv*2;
|
||||
}
|
||||
v0 = VFMVVF_FLOAT(0, gvl);
|
||||
v0 = VFREDMAXVS_FLOAT(v_max, v0, gvl);
|
||||
maxf = v0[0];
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
//v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl);
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
|
||||
v1 = VFMVVF_FLOAT(0, gvl);
|
||||
v0 = VFREDMAXVS_FLOAT(v0, v1, gvl);
|
||||
if(v0[0] > maxf)
|
||||
maxf = v0[0];
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
return(maxf);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : NoTest
|
||||
* TEST : NoTest
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT minf=0.0;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(minf);
|
||||
|
||||
minf=ABS(x[0]);
|
||||
ix += inc_x;
|
||||
i++;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( ABS(x[ix]) < minf )
|
||||
{
|
||||
minf = ABS(x[ix]);
|
||||
}
|
||||
ix += inc_x;
|
||||
i++;
|
||||
}
|
||||
return(minf);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,241 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLEV_FLOAT vlev_float32xm8
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDMINVS_FLOAT vfredminvs_float32xm8
|
||||
#define MASK_T e32xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8
|
||||
#define VFMINVV_FLOAT vfminvv_float32xm8
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLEV_FLOAT vlev_float64xm8
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDMINVS_FLOAT vfredminvs_float64xm8
|
||||
#define MASK_T e64xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8
|
||||
#define VFMINVV_FLOAT vfminvv_float64xm8
|
||||
#endif
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
if (n <= 0 || inc_x <= 0) return(0.0);
|
||||
FLOAT minf=FLT_MAX;
|
||||
unsigned int gvl = 0;
|
||||
FLOAT_V_T v0, v1, v_min;
|
||||
|
||||
MASK_T mask0, mask1;
|
||||
FLOAT zero = 0.0;
|
||||
if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n/2){
|
||||
v_min = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
for(i=0,j=0; i<n/(gvl*2); i++){
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
//v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl);
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
v_min = VFMINVV_FLOAT(v_min, v0, gvl);
|
||||
|
||||
v1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
//v1 = VFRSUBVF_MASK_FLOAT(v1, 0, mask1, gvl);
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
|
||||
v_min = VFMINVV_FLOAT(v_min, v1, gvl);
|
||||
j += gvl*2;
|
||||
}
|
||||
v1 = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
v0 = VFREDMINVS_FLOAT(v_min, v1, gvl);
|
||||
minf = v0[0];
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
//v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl);
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
v1 = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
v0 = VFREDMINVS_FLOAT(v0, v1, gvl);
|
||||
if(v0[0] < minf)
|
||||
minf = v0[0];
|
||||
j += gvl;
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG stride_x = inc_x * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG idx = 0, inc_xv = inc_x * gvl;
|
||||
v_min = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
for(i=0,j=0; i<n/(gvl*2); i++){
|
||||
v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
//v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl);
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
v_min = VFMINVV_FLOAT(v_min, v0, gvl);
|
||||
|
||||
v1 = VLSEV_FLOAT(&x[idx+inc_xv], stride_x, gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
//v1 = VFRSUBVF_MASK_FLOAT(v1, 0, mask1, gvl);
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
|
||||
v_min = VFMINVV_FLOAT(v_min, v1, gvl);
|
||||
j += gvl*2;
|
||||
idx += inc_xv*2;
|
||||
}
|
||||
v1 = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
v0 = VFREDMINVS_FLOAT(v_min, v1, gvl);
|
||||
minf = v0[0];
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
//v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl);
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(v0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
v1 = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
v0 = VFREDMINVS_FLOAT(v0, v1, gvl);
|
||||
if(v0[0] < minf)
|
||||
minf = v0[0];
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
return(minf);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
FLOAT sumf = 0.0;
|
||||
if (n <= 0 || inc_x <= 0) return(sumf);
|
||||
|
||||
n *= inc_x;
|
||||
while(i < n)
|
||||
{
|
||||
sumf += ABS(x[i]);
|
||||
i += inc_x;
|
||||
}
|
||||
return(sumf);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,131 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLEV_FLOAT vlev_float32xm8
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDSUMVS_FLOAT vfredsumvs_float32xm8
|
||||
#define MASK_T e32xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8
|
||||
#define VFADDVV_FLOAT vfaddvv_float32xm8
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLEV_FLOAT vlev_float64xm8
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDSUMVS_FLOAT vfredsumvs_float64xm8
|
||||
#define MASK_T e64xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8
|
||||
#define VFADDVV_FLOAT vfaddvv_float64xm8
|
||||
#endif
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT asumf=0.0;
|
||||
if (n <= 0 || inc_x <= 0) return(asumf);
|
||||
unsigned int gvl = 0;
|
||||
FLOAT_V_T v0, v1, v_zero,v_sum;
|
||||
|
||||
MASK_T mask0, mask1;
|
||||
if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
v_zero = VFMVVF_FLOAT(0, gvl);
|
||||
if(gvl <= n/2){
|
||||
v_sum = VFMVVF_FLOAT(0, gvl);
|
||||
for(i=0,j=0; i<n/(gvl*2); i++){
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl);
|
||||
v_sum = VFADDVV_FLOAT(v_sum, v0, gvl);
|
||||
|
||||
v1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl);
|
||||
v_sum = VFADDVV_FLOAT(v_sum, v1, gvl);
|
||||
j += gvl * 2;
|
||||
}
|
||||
v0 = VFREDSUMVS_FLOAT(v_sum, v_zero, gvl);
|
||||
asumf += v0[0];
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl);
|
||||
v0 = VFREDSUMVS_FLOAT(v0, v_zero, gvl);
|
||||
asumf += v0[0];
|
||||
j += gvl;
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
unsigned int stride_x = inc_x * sizeof(FLOAT);
|
||||
v_zero = VFMVVF_FLOAT(0, gvl);
|
||||
if(gvl <= n/2){
|
||||
v_sum = VFMVVF_FLOAT(0, gvl);
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
for(i=0,j=0; i<n/(gvl*2); i++){
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl);
|
||||
v_sum = VFADDVV_FLOAT(v_sum, v0, gvl);
|
||||
|
||||
v1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl);
|
||||
v_sum = VFADDVV_FLOAT(v_sum, v1, gvl);
|
||||
j += gvl * 2;
|
||||
inc_xv += inc_xv * 2;
|
||||
}
|
||||
v0 = VFREDSUMVS_FLOAT(v_sum, v_zero, gvl);
|
||||
asumf += v0[0];
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl);
|
||||
v0 = VFREDSUMVS_FLOAT(v0, v_zero, gvl);
|
||||
asumf += v0[0];
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
return(asumf);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix,iy;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
|
||||
ix = 0;
|
||||
iy = 0;
|
||||
|
||||
if ( beta == 0.0 )
|
||||
{
|
||||
|
||||
if ( alpha == 0.0 )
|
||||
{
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = 0.0 ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = alpha * x[ix] ;
|
||||
ix += inc_x ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if ( alpha == 0.0 )
|
||||
{
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = beta * y[iy] ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = alpha * x[ix] + beta * y[iy] ;
|
||||
ix += inc_x ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,378 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLEV_FLOAT vlev_float32xm4
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VSEV_FLOAT vsev_float32xm4
|
||||
#define VSSEV_FLOAT vssev_float32xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float32xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm4
|
||||
#define VFMULVF_FLOAT vfmulvf_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLEV_FLOAT vlev_float64xm4
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VSEV_FLOAT vsev_float64xm4
|
||||
#define VSSEV_FLOAT vssev_float64xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float64xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm4
|
||||
#define VFMULVF_FLOAT vfmulvf_float64xm4
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
if (n < 0) return(0);
|
||||
|
||||
BLASLONG i=0, j=0;
|
||||
unsigned int gvl = 0;
|
||||
FLOAT_V_T vx0, vx1;
|
||||
FLOAT_V_T vy0, vy1;
|
||||
|
||||
BLASLONG stride_x, stride_y, ix = 0, iy = 0;
|
||||
|
||||
if(beta == 0.0){
|
||||
if(alpha == 0.0){//alpha == 0 && beta == 0
|
||||
if(inc_y == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n/2){
|
||||
vy0 = VFMVVF_FLOAT(0.0, gvl);
|
||||
for(i=0,j=0;i<n/(gvl*2);i++){
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
VSEV_FLOAT(&y[j+gvl], vy0, gvl);
|
||||
j += gvl * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vy0 = VFMVVF_FLOAT(0.0, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
vy0 = VFMVVF_FLOAT(0.0, gvl);
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
for(i=0,j=0;i<n/(gvl*2);i++){
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy0, gvl);
|
||||
j += gvl * 2;
|
||||
iy += inc_yv * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vy0 = VFMVVF_FLOAT(0.0, gvl);
|
||||
VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
|
||||
}else{//alpha != 0 && beta == 0, y = ax
|
||||
if(inc_x == 1 && inc_y == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n/2){
|
||||
for(i=0,j=0;i<n/(2*gvl);i++){
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vy0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
|
||||
vx1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
vy1 = VFMULVF_FLOAT(vx1, alpha, gvl);
|
||||
VSEV_FLOAT(&y[j+gvl], vy1, gvl);
|
||||
j += gvl * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vy0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else if(inc_y == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
for(i=0,j=0;i<n/(2*gvl);i++){
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vy0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
|
||||
vx1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
|
||||
vy1 = VFMULVF_FLOAT(vx1, alpha, gvl);
|
||||
VSEV_FLOAT(&y[j+gvl], vy1, gvl);
|
||||
j += gvl * 2;
|
||||
ix += inc_xv * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
vy0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
for(i=0,j=0;i<n/(2*gvl);i++){
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vy0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
|
||||
vx1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
vy1 = VFMULVF_FLOAT(vx1, alpha, gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy1, gvl);
|
||||
j += gvl * 2;
|
||||
iy += inc_yv * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vy0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else{//inc_x !=1 && inc_y != 1
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
for(i=0,j=0;i<n/(2*gvl);i++){
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vy0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
|
||||
vx1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
|
||||
vy1 = VFMULVF_FLOAT(vx1, alpha, gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy1, gvl);
|
||||
j += gvl * 2;
|
||||
ix += inc_xv * 2;
|
||||
iy += inc_yv * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
vy0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}else{//beta != 0
|
||||
if(alpha == 0.0){//alpha == 0 && beta != 0; y = by
|
||||
if(inc_y == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n/2){
|
||||
for(i=0,j=0;i<n/(2*gvl);i++){
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
vy0 = VFMULVF_FLOAT(vy0, beta, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
|
||||
vy1 = VLEV_FLOAT(&y[j+gvl], gvl);
|
||||
vy1 = VFMULVF_FLOAT(vy1, beta, gvl);
|
||||
VSEV_FLOAT(&y[j+gvl], vy1, gvl);
|
||||
j += gvl * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
vy0 = VFMULVF_FLOAT(vy0, beta, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
for(i=0,j=0;i<n/(2*gvl);i++){
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy0 = VFMULVF_FLOAT(vy0, beta, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
|
||||
vy1 = VLSEV_FLOAT(&y[iy+inc_yv], stride_y, gvl);
|
||||
vy1 = VFMULVF_FLOAT(vy1, beta, gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy1, gvl);
|
||||
j += gvl * 2;
|
||||
iy += inc_yv * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vy0 = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl);
|
||||
vy0 = VFMULVF_FLOAT(vy0, beta, gvl);
|
||||
VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
|
||||
}else{//alpha != 0 && beta != 0; y = ax + by
|
||||
if(inc_x == 1 && inc_y == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n/2){
|
||||
for(i=0,j=0;i<n/(2*gvl);i++){
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vx0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
|
||||
vx1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
vx1 = VFMULVF_FLOAT(vx1, alpha, gvl);
|
||||
vy1 = VLEV_FLOAT(&y[j+gvl], gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vx1, beta, vy1,gvl);
|
||||
VSEV_FLOAT(&y[j+gvl], vy1, gvl);
|
||||
j += gvl * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vx0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else if(inc_y == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
for(i=0,j=0;i<n/(2*gvl);i++){
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
|
||||
vx1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
|
||||
vx1 = VFMULVF_FLOAT(vx1, alpha, gvl);
|
||||
vy1 = VLEV_FLOAT(&y[j+gvl], gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vx1, beta, vy1, gvl);
|
||||
VSEV_FLOAT(&y[j+gvl], vy1, gvl);
|
||||
j += gvl * 2;
|
||||
ix += inc_xv * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
vx0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
for(i=0,j=0;i<n/(2*gvl);i++){
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vx0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
|
||||
vx1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
vx1 = VFMULVF_FLOAT(vx1, alpha, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+inc_yv], stride_y, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vx1, beta, vy1, gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy1, gvl);
|
||||
j += gvl * 2;
|
||||
iy += inc_yv * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vx0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else{//inc_x != 1 && inc_y != 1
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
for(i=0,j=0;i<n/(2*gvl);i++){
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
|
||||
vx1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
|
||||
vx1 = VFMULVF_FLOAT(vx1, alpha, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+inc_yv], stride_y, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vx1, beta, vy1, gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy1, gvl);
|
||||
j += gvl * 2;
|
||||
ix += inc_xv * 2;
|
||||
iy += inc_yv * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
vx0 = VFMULVF_FLOAT(vx0, alpha, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix,iy;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
if ( da == 0.0 ) return(0);
|
||||
|
||||
ix = 0;
|
||||
iy = 0;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
|
||||
y[iy] += da * x[ix] ;
|
||||
ix += inc_x ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
|
||||
}
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,179 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLEV_FLOAT vlev_float32xm4
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VSEV_FLOAT vsev_float32xm4
|
||||
#define VSSEV_FLOAT vssev_float32xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLEV_FLOAT vlev_float64xm4
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VSEV_FLOAT vsev_float64xm4
|
||||
#define VSSEV_FLOAT vssev_float64xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float64xm4
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
|
||||
{
|
||||
BLASLONG i=0, j=0, jx=0, jy=0;
|
||||
unsigned int gvl = 0;
|
||||
FLOAT_V_T vx0, vx1;
|
||||
FLOAT_V_T vy0, vy1;
|
||||
BLASLONG stride_x, stride_y;
|
||||
|
||||
if (n < 0) return(0);
|
||||
if (da == 0.0) return(0);
|
||||
|
||||
if (inc_x == 1 && inc_y == 1) {
|
||||
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
|
||||
if (gvl <= n/2) {
|
||||
for (i = 0, j=0; i < n/(2*gvl); i++, j+=2*gvl) {
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
|
||||
vx1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
vy1 = VLEV_FLOAT(&y[j+gvl], gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, da, vx1, gvl);
|
||||
VSEV_FLOAT(&y[j+gvl], vy1, gvl);
|
||||
}
|
||||
}
|
||||
//tail
|
||||
for (; j < n; ) {
|
||||
gvl = vsetvli(n - j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
|
||||
j += gvl;
|
||||
}
|
||||
}else if (inc_y == 1) {
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
for(i=0,j=0; i<n/(2*gvl); i++){
|
||||
vx0 = VLSEV_FLOAT(&x[jx], stride_x, gvl);
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
|
||||
vx1 = VLSEV_FLOAT(&x[jx+inc_xv], stride_x, gvl);
|
||||
vy1 = VLEV_FLOAT(&y[j+gvl], gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, da, vx1, gvl);
|
||||
VSEV_FLOAT(&y[j+gvl], vy1, gvl);
|
||||
|
||||
j += gvl * 2;
|
||||
jx += inc_xv * 2;
|
||||
}
|
||||
}
|
||||
for (; j<n; ) {
|
||||
gvl = vsetvli(n - j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl);
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else if(inc_x == 1){
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
for(i=0,j=0; i<n/(2*gvl); i++){
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[jy], stride_y, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl);
|
||||
VSSEV_FLOAT(&y[jy], stride_y, vy0, gvl);
|
||||
|
||||
vx1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[jy+inc_yv], stride_y, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, da, vx1, gvl);
|
||||
VSSEV_FLOAT(&y[jy+inc_yv], stride_y, vy1, gvl);
|
||||
|
||||
j += gvl * 2;
|
||||
jy += inc_yv * 2;
|
||||
}
|
||||
}
|
||||
for (; j<n; ) {
|
||||
gvl = vsetvli(n - j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl);
|
||||
VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else{
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
for(i=0,j=0; i<n/(2*gvl); i++){
|
||||
vx0 = VLSEV_FLOAT(&x[jx], stride_x, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[jy], stride_y, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl);
|
||||
VSSEV_FLOAT(&y[jy], stride_y, vy0, gvl);
|
||||
|
||||
vx1 = VLSEV_FLOAT(&x[jx+inc_xv], stride_x, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[jy+inc_yv], stride_y, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, da, vx1, gvl);
|
||||
VSSEV_FLOAT(&y[jy+inc_yv], stride_y, vy1, gvl);
|
||||
|
||||
j += gvl * 2;
|
||||
jx += inc_xv * 2;
|
||||
jy += inc_yv * 2;
|
||||
}
|
||||
}
|
||||
for (; j<n; ) {
|
||||
gvl = vsetvli(n - j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl);
|
||||
VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0,iy=0;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
|
||||
y[iy] = x[ix] ;
|
||||
ix += inc_x ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
|
||||
}
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,148 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLEV_FLOAT vlev_float32xm8
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VSEV_FLOAT vsev_float32xm8
|
||||
#define VSSEV_FLOAT vssev_float32xm8
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLEV_FLOAT vlev_float64xm8
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VSEV_FLOAT vsev_float64xm8
|
||||
#define VSSEV_FLOAT vssev_float64xm8
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
BLASLONG ix=0,iy=0;
|
||||
if(n < 0) return(0);
|
||||
|
||||
BLASLONG stride_x, stride_y;
|
||||
FLOAT_V_T v0, v1, v2, v3;
|
||||
unsigned int gvl = 0;
|
||||
|
||||
if(inc_x == 1 && inc_y == 1){
|
||||
memcpy(&y[0], &x[0], n*sizeof(FLOAT));
|
||||
}else if (inc_y == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
if(gvl <= n/4){
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
BLASLONG gvl3 = gvl * 3;
|
||||
BLASLONG inc_xv3 = inc_xv * 3;
|
||||
for(i=0,j=0; i<n/(4*gvl); i++){
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
VSEV_FLOAT(&y[j], v0, gvl);
|
||||
v1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
|
||||
VSEV_FLOAT(&y[j+gvl], v1, gvl);
|
||||
|
||||
v2 = VLSEV_FLOAT(&x[ix+inc_xv*2], stride_x, gvl);
|
||||
VSEV_FLOAT(&y[j+gvl*2], v2, gvl);
|
||||
v3 = VLSEV_FLOAT(&x[ix+inc_xv3], stride_x, gvl);
|
||||
VSEV_FLOAT(&y[j+gvl3], v3, gvl);
|
||||
j += gvl * 4;
|
||||
ix += inc_xv * 4;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
VSEV_FLOAT(&y[j], v0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
if(gvl <= n/4){
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
BLASLONG inc_yv3 = inc_yv * 3;
|
||||
BLASLONG gvl3 = gvl * 3;
|
||||
for(i=0,j=0; i<n/(4*gvl); i++){
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, v0, gvl);
|
||||
v1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv], stride_y, v1, gvl);
|
||||
|
||||
v2 = VLEV_FLOAT(&x[j+gvl*2], gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv*2], stride_y, v2, gvl);
|
||||
v3 = VLEV_FLOAT(&x[j+gvl3], gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv3], stride_y, v3, gvl);
|
||||
j += gvl * 4;
|
||||
iy += inc_yv * 4;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
VSSEV_FLOAT(&y[j*inc_y], stride_y, v0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
if(gvl <= n/4){
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
BLASLONG inc_xv3 = inc_xv * 3;
|
||||
BLASLONG inc_yv3 = inc_yv * 3;
|
||||
for(i=0,j=0; i<n/(4*gvl); i++){
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, v0, gvl);
|
||||
v1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv], stride_y, v1, gvl);
|
||||
|
||||
v2 = VLSEV_FLOAT(&x[ix+inc_xv*2], stride_x, gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv*2], stride_y, v2, gvl);
|
||||
v3 = VLSEV_FLOAT(&x[ix+inc_xv3], stride_x, gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv3], stride_y, v3, gvl);
|
||||
|
||||
j += gvl * 4;
|
||||
ix += inc_xv * 4;
|
||||
iy += inc_yv * 4;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
VSSEV_FLOAT(&y[j*inc_y], stride_y, v0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,977 @@
|
|||
#include "common.h"
|
||||
#include <riscv-vector.h>
|
||||
|
||||
#define KERNEL8x4_I \
|
||||
"addi t1, %[PB], 1*8 \n\t"\
|
||||
"addi t2, %[PB], 2*8 \n\t"\
|
||||
"addi t3, %[PB], 3*8 \n\t"\
|
||||
"fld ft0, (%[PB]) \n\t"\
|
||||
"fld ft1, (t1) \n\t"\
|
||||
"fld ft2, (t2) \n\t"\
|
||||
"fld ft3, (t3) \n\t"\
|
||||
"vle.v v0, (%[PA]) \n\t"\
|
||||
"addi t4, %[PA], 2*8 \n\t"\
|
||||
"addi t5, %[PA], 4*8 \n\t"\
|
||||
"vfmv.v.f v8, ft0 \n\t"\
|
||||
"addi t6, %[PA], 6*8 \n\t"\
|
||||
"addi %[PA], %[PA], 8*8 \n\t"\
|
||||
"vle.v v1, (t4) \n\t"\
|
||||
"addi t4, t4, 8*8 \n\t"\
|
||||
"vfmv.v.f v9, ft1 \n\t"\
|
||||
"vle.v v2, (t5) \n\t"\
|
||||
"addi t5, t5, 8*8 \n\t"\
|
||||
"vle.v v3, (t6) \n\t"\
|
||||
"addi t6, t6, 8*8 \n\t"\
|
||||
"vfmv.v.f v10, ft2 \n\t"\
|
||||
"addi %[PB], %[PB], 4*8 \n\t"\
|
||||
"vle.v v4, (%[PA]) \n\t"\
|
||||
"addi %[PA], %[PA], 8*8 \n\t"\
|
||||
"vfmv.v.f v11, ft3 \n\t"\
|
||||
"vfmacc.vv v16, v8, v0 \n\t"\
|
||||
"addi t1, t1, 4*8 \n\t"\
|
||||
"vle.v v5, (t4) \n\t"\
|
||||
"addi t4, t4, 8*8 \n\t"\
|
||||
"vfmacc.vv v17, v8, v1 \n\t"\
|
||||
"addi t2, t2, 4*8 \n\t"\
|
||||
"vle.v v6, (t5) \n\t"\
|
||||
"addi t5, t5, 8*8 \n\t"\
|
||||
"vfmacc.vv v18, v8, v2 \n\t"\
|
||||
"addi t3, t3, 4*8 \n\t"\
|
||||
"vle.v v7, (t6) \n\t"\
|
||||
"addi t6, t6, 8*8 \n\t"\
|
||||
"vfmacc.vv v19, v8, v3 \n\t"\
|
||||
"fld ft4, (%[PB]) \n\t"\
|
||||
"vfmacc.vv v20, v9, v0 \n\t"\
|
||||
"fld ft5, (t1) \n\t"\
|
||||
"vfmacc.vv v21, v9, v1 \n\t"\
|
||||
"fld ft6, (t2) \n\t"\
|
||||
"vfmacc.vv v22, v9, v2 \n\t"\
|
||||
"fld ft7, (t3) \n\t"\
|
||||
"vfmacc.vv v23, v9, v3 \n\t"\
|
||||
"vfmv.v.f v12, ft4 \n\t"\
|
||||
"vfmacc.vv v24, v10, v0 \n\t"\
|
||||
"vfmv.v.f v13, ft5 \n\t"\
|
||||
"vfmacc.vv v25, v10, v1 \n\t"\
|
||||
"vfmv.v.f v14, ft6 \n\t"\
|
||||
"vfmacc.vv v26, v10, v2 \n\t"\
|
||||
"vfmv.v.f v15, ft7 \n\t"\
|
||||
"vfmacc.vv v27, v10, v3 \n\t"\
|
||||
"addi %[PB], %[PB], 4*8 \n\t"\
|
||||
"vfmacc.vv v28, v11, v0 \n\t"\
|
||||
"addi t1, t1, 4*8 \n\t"\
|
||||
"vfmacc.vv v29, v11, v1 \n\t"\
|
||||
"addi t2, t2, 4*8 \n\t"\
|
||||
"vfmacc.vv v30, v11, v2 \n\t"\
|
||||
"addi t3, t3, 4*8 \n\t"\
|
||||
"vfmacc.vv v31, v11, v3 \n\t"
|
||||
|
||||
#define KERNEL8x4_M1 \
|
||||
"vfmacc.vv v16, v8, v0 \n\t"\
|
||||
"vle.v v4, (%[PA]) \n\t"\
|
||||
"addi %[PA], %[PA], 8*8 \n\t"\
|
||||
"vfmacc.vv v17, v8, v1 \n\t"\
|
||||
"vle.v v5, (t4) \n\t"\
|
||||
"addi t4, t4, 8*8 \n\t"\
|
||||
"vfmacc.vv v18, v8, v2 \n\t"\
|
||||
"vle.v v6, (t5) \n\t"\
|
||||
"addi t5, t5, 8*8 \n\t"\
|
||||
"vfmacc.vv v19, v8, v3 \n\t"\
|
||||
"vle.v v7, (t6) \n\t"\
|
||||
"addi t6, t6, 8*8 \n\t"\
|
||||
"vfmacc.vv v20, v9, v0 \n\t"\
|
||||
"fld ft4, (%[PB]) \n\t"\
|
||||
"vfmacc.vv v21, v9, v1 \n\t"\
|
||||
"fld ft5, (t1) \n\t"\
|
||||
"vfmacc.vv v22, v9, v2 \n\t"\
|
||||
"fld ft6, (t2) \n\t"\
|
||||
"vfmacc.vv v23, v9, v3 \n\t"\
|
||||
"fld ft7, (t3) \n\t"\
|
||||
"addi %[PB], %[PB], 4*8 \n\t"\
|
||||
"vfmacc.vv v24, v10, v0 \n\t"\
|
||||
"addi t1, t1, 4*8 \n\t"\
|
||||
"vfmacc.vv v25, v10, v1 \n\t"\
|
||||
"vfmv.v.f v12, ft4 \n\t"\
|
||||
"vfmacc.vv v26, v10, v2 \n\t"\
|
||||
"addi t2, t2, 4*8 \n\t"\
|
||||
"vfmacc.vv v27, v10, v3 \n\t"\
|
||||
"vfmv.v.f v13, ft5 \n\t"\
|
||||
"vfmacc.vv v28, v11, v0 \n\t"\
|
||||
"addi t3, t3, 4*8 \n\t"\
|
||||
"vfmacc.vv v29, v11, v1 \n\t"\
|
||||
"vfmv.v.f v14, ft6 \n\t"\
|
||||
"vfmacc.vv v30, v11, v2 \n\t"\
|
||||
"vfmacc.vv v31, v11, v3 \n\t"\
|
||||
"vfmv.v.f v15, ft7 \n\t"
|
||||
|
||||
#define KERNEL8x4_M2 \
|
||||
"vfmacc.vv v16, v12, v4 \n\t"\
|
||||
"vle.v v0, (%[PA]) \n\t"\
|
||||
"addi %[PA], %[PA], 8*8 \n\t"\
|
||||
"vfmacc.vv v17, v12, v5 \n\t"\
|
||||
"vle.v v1, (t4) \n\t"\
|
||||
"addi t4, t4, 8*8 \n\t"\
|
||||
"vfmacc.vv v18, v12, v6 \n\t"\
|
||||
"vle.v v2, (t5) \n\t"\
|
||||
"addi t5, t5, 8*8 \n\t"\
|
||||
"vfmacc.vv v19, v12, v7 \n\t"\
|
||||
"vle.v v3, (t6) \n\t"\
|
||||
"addi t6, t6, 8*8 \n\t"\
|
||||
"vfmacc.vv v20, v13, v4 \n\t"\
|
||||
"fld ft0, (%[PB]) \n\t"\
|
||||
"vfmacc.vv v21, v13, v5 \n\t"\
|
||||
"fld ft1, (t1) \n\t"\
|
||||
"vfmacc.vv v22, v13, v6 \n\t"\
|
||||
"fld ft2, (t2) \n\t"\
|
||||
"vfmacc.vv v23, v13, v7 \n\t"\
|
||||
"fld ft3, (t3) \n\t"\
|
||||
"addi %[PB], %[PB], 4*8 \n\t"\
|
||||
"vfmacc.vv v24, v14, v4 \n\t"\
|
||||
"addi t1, t1, 4*8 \n\t"\
|
||||
"vfmacc.vv v25, v14, v5 \n\t"\
|
||||
"vfmv.v.f v8, ft0 \n\t"\
|
||||
"vfmacc.vv v26, v14, v6 \n\t"\
|
||||
"addi t2, t2, 4*8 \n\t"\
|
||||
"vfmacc.vv v27, v14, v7 \n\t"\
|
||||
"vfmv.v.f v9, ft1 \n\t"\
|
||||
"vfmacc.vv v28, v15, v4 \n\t"\
|
||||
"addi t3, t3, 4*8 \n\t"\
|
||||
"vfmacc.vv v29, v15, v5 \n\t"\
|
||||
"vfmv.v.f v10, ft2 \n\t"\
|
||||
"vfmacc.vv v30, v15, v6 \n\t"\
|
||||
"vfmacc.vv v31, v15, v7 \n\t"\
|
||||
"vfmv.v.f v11, ft3 \n\t"
|
||||
|
||||
#define KERNEL8x4_E \
|
||||
"vfmacc.vv v16, v12, v4 \n\t"\
|
||||
"vfmacc.vv v17, v12, v5 \n\t"\
|
||||
"vfmacc.vv v18, v12, v6 \n\t"\
|
||||
"vfmacc.vv v19, v12, v7 \n\t"\
|
||||
"vfmacc.vv v20, v13, v4 \n\t"\
|
||||
"vfmacc.vv v21, v13, v5 \n\t"\
|
||||
"vfmacc.vv v22, v13, v6 \n\t"\
|
||||
"vfmacc.vv v23, v13, v7 \n\t"\
|
||||
"vfmacc.vv v24, v14, v4 \n\t"\
|
||||
"vfmacc.vv v25, v14, v5 \n\t"\
|
||||
"vfmacc.vv v26, v14, v6 \n\t"\
|
||||
"vfmacc.vv v27, v14, v7 \n\t"\
|
||||
"vfmacc.vv v28, v15, v4 \n\t"\
|
||||
"vfmacc.vv v29, v15, v5 \n\t"\
|
||||
"vfmacc.vv v30, v15, v6 \n\t"\
|
||||
"vfmacc.vv v31, v15, v7 \n\t"
|
||||
|
||||
|
||||
|
||||
|
||||
int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc
|
||||
#ifdef TRMMKERNEL
|
||||
,BLASLONG offset
|
||||
#endif
|
||||
)
|
||||
{
|
||||
BLASLONG i,j,k;
|
||||
FLOAT *C0,*C1,*C2,*C3;
|
||||
FLOAT *ptrba,*ptrbb;
|
||||
|
||||
FLOAT loadb0,loadb1,loadb2,loadb3;
|
||||
FLOAT load0,load1,load2,load3,load4,load5,load6,load7;
|
||||
|
||||
FLOAT res0,res1,res2,res3;
|
||||
FLOAT res4,res5,res6,res7;
|
||||
FLOAT res8,res9,res10,res11;
|
||||
FLOAT res12,res13,res14,res15;
|
||||
|
||||
for (j=0; j<bn/4; j+=1){
|
||||
C0 = C;
|
||||
C1 = C0+ldc;
|
||||
C2 = C1+ldc;
|
||||
C3 = C2+ldc;
|
||||
|
||||
ptrba = ba;
|
||||
for(i=0; i<bm/8; i+=1){
|
||||
ptrbb = bb;
|
||||
//t0 for k
|
||||
//ft0-ft3,ft4-ft7,v8-v15 for B, t1-t3 for PB1-3
|
||||
//v0-v3,v4-v7 for A, t4-t6 for PA1-3
|
||||
//v16-v31 for temp C
|
||||
|
||||
asm volatile(
|
||||
"vsetvli zero, zero, e64,m1 \n\t"
|
||||
"fmv.w.x ft11, zero \n\t"
|
||||
"mv t0, %[BK] \n\t"
|
||||
|
||||
"vfmv.v.f v16, ft11 \n\t"
|
||||
"vfmv.v.f v17, ft11 \n\t"
|
||||
"vfmv.v.f v18, ft11 \n\t"
|
||||
"vfmv.v.f v19, ft11 \n\t"
|
||||
|
||||
"vfmv.v.f v20, ft11 \n\t"
|
||||
"vfmv.v.f v21, ft11 \n\t"
|
||||
"vfmv.v.f v22, ft11 \n\t"
|
||||
"vfmv.v.f v23, ft11 \n\t"
|
||||
|
||||
"vfmv.v.f v24, ft11 \n\t"
|
||||
"vfmv.v.f v25, ft11 \n\t"
|
||||
"vfmv.v.f v26, ft11 \n\t"
|
||||
"vfmv.v.f v27, ft11 \n\t"
|
||||
|
||||
"vfmv.v.f v28, ft11 \n\t"
|
||||
"vfmv.v.f v29, ft11 \n\t"
|
||||
"vfmv.v.f v30, ft11 \n\t"
|
||||
"vfmv.v.f v31, ft11 \n\t"
|
||||
//unloop 8
|
||||
"srli t0, %[BK], 3 \n\t"
|
||||
"blez t0, M8x4_TAIL \n\t"
|
||||
|
||||
//preloop
|
||||
KERNEL8x4_I
|
||||
KERNEL8x4_M2
|
||||
KERNEL8x4_M1
|
||||
KERNEL8x4_M2
|
||||
"addi t0, t0, -1 \n\t"
|
||||
"blez t0, M8x4_MAINLOOP_TAIL \n\t"
|
||||
".align 4 \n\t"
|
||||
"M8x4_MAINLOOP: \n\t"
|
||||
KERNEL8x4_M1
|
||||
KERNEL8x4_M2
|
||||
KERNEL8x4_M1
|
||||
KERNEL8x4_M2
|
||||
KERNEL8x4_M1
|
||||
KERNEL8x4_M2
|
||||
KERNEL8x4_M1
|
||||
KERNEL8x4_M2
|
||||
"addi t0, t0, -1 \n\t"
|
||||
"bgtz t0, M8x4_MAINLOOP \n\t"
|
||||
|
||||
"M8x4_MAINLOOP_TAIL: \n\t"
|
||||
KERNEL8x4_M1
|
||||
KERNEL8x4_M2
|
||||
KERNEL8x4_M1
|
||||
KERNEL8x4_E
|
||||
|
||||
//tail
|
||||
"M8x4_TAIL: \n\t"
|
||||
"andi t0, %[BK], 7 \n\t"
|
||||
"blez t0, M8x4_SAVERESULT \n\t"
|
||||
|
||||
"addi t4, %[PA], 2*8 \n\t"
|
||||
"addi t5, %[PA], 4*8 \n\t"
|
||||
"addi t6, %[PA], 6*8 \n\t"
|
||||
"addi t1, %[PB], 1*8 \n\t"
|
||||
"addi t2, %[PB], 2*8 \n\t"
|
||||
"addi t3, %[PB], 3*8 \n\t"
|
||||
|
||||
".align 4 \n\t"
|
||||
"M8x4_TAILLOOP: \n\t"
|
||||
"fld ft0, (%[PB]) \n\t"
|
||||
"addi %[PB], %[PB], 4*8 \n\t"
|
||||
"vle.v v0, (%[PA]) \n\t"
|
||||
"add %[PA], %[PA], 8*8 \n\t"
|
||||
"vle.v v1, (t4) \n\t"
|
||||
"addi t4, t4, 8*8 \n\t"
|
||||
|
||||
"vfmv.v.f v8, ft0 \n\t"
|
||||
"fld ft1, (t1) \n\t"
|
||||
"addi t1, t1, 4*8 \n\t"
|
||||
"vle.v v2, (t5) \n\t"
|
||||
"addi t5, t5, 8*8 \n\t"
|
||||
"vle.v v3, (t6) \n\t"
|
||||
"addi t6, t6, 8*8 \n\t"
|
||||
|
||||
"vfmacc.vv v16, v8, v0 \n\t"
|
||||
"fld ft2, (t2) \n\t"
|
||||
"addi t2, t2, 4*8 \n\t"
|
||||
"vfmacc.vv v17, v8, v1 \n\t"
|
||||
"vfmacc.vv v18, v8, v2 \n\t"
|
||||
"vfmv.v.f v9, ft1 \n\t"
|
||||
"vfmacc.vv v19, v8, v3 \n\t"
|
||||
|
||||
|
||||
"vfmacc.vv v20, v9, v0 \n\t"
|
||||
"fld ft3, (t3) \n\t"
|
||||
"addi t3, t3, 4*8 \n\t"
|
||||
"vfmacc.vv v21, v9, v1 \n\t"
|
||||
"vfmacc.vv v22, v9, v2 \n\t"
|
||||
"vfmv.v.f v10, ft2 \n\t"
|
||||
"vfmacc.vv v23, v9, v3 \n\t"
|
||||
|
||||
"vfmv.v.f v11, ft3 \n\t"
|
||||
"vfmacc.vv v24, v10, v0 \n\t"
|
||||
"vfmacc.vv v25, v10, v1 \n\t"
|
||||
"vfmacc.vv v26, v10, v2 \n\t"
|
||||
"vfmacc.vv v27, v10, v3 \n\t"
|
||||
|
||||
"vfmacc.vv v28, v11, v0 \n\t"
|
||||
"vfmacc.vv v29, v11, v1 \n\t"
|
||||
"vfmacc.vv v30, v11, v2 \n\t"
|
||||
"vfmacc.vv v31, v11, v3 \n\t"
|
||||
|
||||
"addi t0, t0, -1 \n\t"
|
||||
"bgtz t0, M8x4_TAILLOOP \n\t"
|
||||
|
||||
//Save result
|
||||
//load C
|
||||
"M8x4_SAVERESULT: \n\t"
|
||||
//use v8 to store alpha
|
||||
"vfmv.v.f v8, %[ALPHA] \n\t"
|
||||
"vle.v v0, (%[C0]) \n\t"
|
||||
"addi t4, %[C0], 2*8 \n\t"
|
||||
"vle.v v1, (%[C1]) \n\t"
|
||||
"addi t5, %[C1], 2*8 \n\t"
|
||||
"vle.v v2, (%[C2]) \n\t"
|
||||
"addi t6, %[C2], 2*8 \n\t"
|
||||
"vle.v v3, (%[C3]) \n\t"
|
||||
"addi t3, %[C3], 2*8 \n\t"
|
||||
|
||||
//Multiply Alpha
|
||||
"vfmacc.vv v0, v8, v16 \n\t"
|
||||
"vle.v v4, (t4) \n\t"
|
||||
"vfmacc.vv v1, v8, v20 \n\t"
|
||||
"vle.v v5, (t5) \n\t"
|
||||
"vfmacc.vv v2, v8, v24 \n\t"
|
||||
"vle.v v6, (t6) \n\t"
|
||||
"vfmacc.vv v3, v8, v28 \n\t"
|
||||
"vle.v v7, (t3) \n\t"
|
||||
|
||||
"vfmacc.vv v4, v8, v17 \n\t"
|
||||
"vse.v v0, (%[C0]) \n\t"
|
||||
"add %[C0], %[C0], 4*8 \n\t"
|
||||
"vfmacc.vv v5, v8, v21 \n\t"
|
||||
"vse.v v1, (%[C1]) \n\t"
|
||||
"add %[C1], %[C1], 4*8 \n\t"
|
||||
|
||||
"vfmacc.vv v6, v8, v25 \n\t"
|
||||
"vse.v v2, (%[C2]) \n\t"
|
||||
"add %[C2], %[C2], 4*8 \n\t"
|
||||
|
||||
"vfmacc.vv v7, v8, v29 \n\t"
|
||||
"vse.v v3, (%[C3]) \n\t"
|
||||
"add %[C3], %[C3], 4*8 \n\t"
|
||||
|
||||
"vle.v v0, (%[C0]) \n\t"
|
||||
"vse.v v4, (t4) \n\t"
|
||||
"add t4, t4, 4*8 \n\t"
|
||||
|
||||
"vle.v v1, (%[C1]) \n\t"
|
||||
"vse.v v5, (t5) \n\t"
|
||||
"add t5, t5, 4*8 \n\t"
|
||||
|
||||
"vle.v v2, (%[C2]) \n\t"
|
||||
"vse.v v6, (t6) \n\t"
|
||||
"add t6, t6, 4*8 \n\t"
|
||||
|
||||
"vle.v v3, (%[C3]) \n\t"
|
||||
"vse.v v7, (t3) \n\t"
|
||||
"add t3, t3, 4*8 \n\t"
|
||||
|
||||
|
||||
"vfmacc.vv v0, v8, v18 \n\t"
|
||||
"vle.v v4, (t4) \n\t"
|
||||
"vfmacc.vv v1, v8, v22 \n\t"
|
||||
"vle.v v5, (t5) \n\t"
|
||||
"vfmacc.vv v2, v8, v26 \n\t"
|
||||
"vle.v v6, (t6) \n\t"
|
||||
"vfmacc.vv v3, v8, v30 \n\t"
|
||||
"vle.v v7, (t3) \n\t"
|
||||
|
||||
"vfmacc.vv v4, v8, v19 \n\t"
|
||||
"vse.v v0, (%[C0]) \n\t"
|
||||
"add %[C0], %[C0], 4*8 \n\t"
|
||||
|
||||
"vfmacc.vv v5, v8, v23 \n\t"
|
||||
"vse.v v1, (%[C1]) \n\t"
|
||||
"add %[C1], %[C1], 4*8 \n\t"
|
||||
|
||||
"vfmacc.vv v6, v8, v27 \n\t"
|
||||
"vse.v v2, (%[C2]) \n\t"
|
||||
"add %[C2], %[C2], 4*8 \n\t"
|
||||
|
||||
"vfmacc.vv v7, v8, v31 \n\t"
|
||||
"vse.v v3, (%[C3]) \n\t"
|
||||
"add %[C3], %[C3], 4*8 \n\t"
|
||||
|
||||
"vse.v v4, (t4) \n\t"
|
||||
"vse.v v5, (t5) \n\t"
|
||||
"vse.v v6, (t6) \n\t"
|
||||
"vse.v v7, (t3) \n\t"
|
||||
"M8x4_END: \n\t"
|
||||
|
||||
:[C0]"+r"(C0),[C1]"+r"(C1),[C2]"+r"(C2),[C3]"+r"(C3),
|
||||
[PA]"+r"(ptrba), [PB]"+r"(ptrbb)
|
||||
:[ALPHA]"f"(alpha), [BK]"r"(bk)
|
||||
:"cc", "t0", "t4","t5","t6","t3","t1","t2",
|
||||
"ft11", "ft0", "ft1", "ft2","ft3","ft4", "ft5", "ft6","ft7",
|
||||
"v0", "v1", "v2", "v3","v4", "v5", "v6", "v7",
|
||||
"v8", "v9", "v10", "v11","v12", "v13", "v14", "v15",
|
||||
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
|
||||
"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
|
||||
}
|
||||
if(bm&4){
|
||||
ptrbb = bb;
|
||||
res0 = 0;
|
||||
res1 = 0;
|
||||
res2 = 0;
|
||||
res3 = 0;
|
||||
res4 = 0;
|
||||
res5 = 0;
|
||||
res6 = 0;
|
||||
res7 = 0;
|
||||
res8 = 0;
|
||||
res9 = 0;
|
||||
res10 = 0;
|
||||
res11 = 0;
|
||||
res12 = 0;
|
||||
res13 = 0;
|
||||
res14 = 0;
|
||||
res15 = 0;
|
||||
|
||||
for(k=0; k<bk; k+=1){
|
||||
loadb0 = ptrbb[0];
|
||||
loadb1 = ptrbb[1];
|
||||
|
||||
load0 = ptrba[0];
|
||||
load1 = ptrba[1];
|
||||
load2 = ptrba[2];
|
||||
load3 = ptrba[3];
|
||||
|
||||
res0 = res0 + load0 * loadb0;
|
||||
res1 = res1 + load1 * loadb0;
|
||||
res2 = res2 + load2 * loadb0;
|
||||
res3 = res3 + load3 * loadb0;
|
||||
|
||||
res4 = res4 + load0 * loadb1;
|
||||
res5 = res5 + load1 * loadb1;
|
||||
res6 = res6 + load2 * loadb1;
|
||||
res7 = res7 + load3 * loadb1;
|
||||
|
||||
loadb2 = ptrbb[2];
|
||||
loadb3 = ptrbb[3];
|
||||
|
||||
res8 = res8 + load0 * loadb2;
|
||||
res9 = res9 + load1 * loadb2;
|
||||
res10 = res10 + load2 * loadb2;
|
||||
res11 = res11 + load3 * loadb2;
|
||||
|
||||
res12 = res12 + load0 * loadb3;
|
||||
res13 = res13 + load1 * loadb3;
|
||||
res14 = res14 + load2 * loadb3;
|
||||
res15 = res15 + load3 * loadb3;
|
||||
|
||||
ptrba += 4;
|
||||
ptrbb += 4;
|
||||
}
|
||||
|
||||
res0 = res0 * alpha;
|
||||
res1 = res1 * alpha;
|
||||
res2 = res2 * alpha;
|
||||
res3 = res3 * alpha;
|
||||
res4 = res4 * alpha;
|
||||
res5 = res5 * alpha;
|
||||
res6 = res6 * alpha;
|
||||
res7 = res7 * alpha;
|
||||
|
||||
res8 = res8 * alpha;
|
||||
res9 = res9 * alpha;
|
||||
res10 = res10 * alpha;
|
||||
res11 = res11 * alpha;
|
||||
res12 = res12 * alpha;
|
||||
res13 = res13 * alpha;
|
||||
res14 = res14 * alpha;
|
||||
res15 = res15 * alpha;
|
||||
|
||||
C0[0] += res0;
|
||||
C0[1] += res1;
|
||||
C0[2] += res2;
|
||||
C0[3] += res3;
|
||||
|
||||
C1[0] += res4;
|
||||
C1[1] += res5;
|
||||
C1[2] += res6;
|
||||
C1[3] += res7;
|
||||
|
||||
C2[0] += res8;
|
||||
C2[1] += res9;
|
||||
C2[2] += res10;
|
||||
C2[3] += res11;
|
||||
|
||||
C3[0] += res12;
|
||||
C3[1] += res13;
|
||||
C3[2] += res14;
|
||||
C3[3] += res15;
|
||||
|
||||
C0 += 4;
|
||||
C1 += 4;
|
||||
C2 += 4;
|
||||
C3 += 4;
|
||||
}
|
||||
if(bm&2){
|
||||
ptrbb = bb;
|
||||
|
||||
res0 = 0;
|
||||
res1 = 0;
|
||||
|
||||
res4 = 0;
|
||||
res5 = 0;
|
||||
|
||||
res8 = 0;
|
||||
res9 = 0;
|
||||
|
||||
res12 = 0;
|
||||
res13 = 0;
|
||||
|
||||
for(k=0; k<bk; k+=1){
|
||||
loadb0 = ptrbb[0];
|
||||
loadb1 = ptrbb[1];
|
||||
|
||||
load0 = ptrba[0];
|
||||
load1 = ptrba[1];
|
||||
|
||||
res0 = res0 + load0 * loadb0;
|
||||
res1 = res1 + load1 * loadb0;
|
||||
|
||||
res4 = res4 + load0 * loadb1;
|
||||
res5 = res5 + load1 * loadb1;
|
||||
|
||||
loadb2 = ptrbb[2];
|
||||
loadb3 = ptrbb[3];
|
||||
|
||||
res8 = res8 + load0 * loadb2;
|
||||
res9 = res9 + load1 * loadb2;
|
||||
|
||||
res12 = res12 + load0 * loadb3;
|
||||
res13 = res13 + load1 * loadb3;
|
||||
|
||||
ptrba += 2;
|
||||
ptrbb += 4;
|
||||
}
|
||||
|
||||
res0 = res0 * alpha;
|
||||
res1 = res1 * alpha;
|
||||
|
||||
res4 = res4 * alpha;
|
||||
res5 = res5 * alpha;
|
||||
|
||||
res8 = res8 * alpha;
|
||||
res9 = res9 * alpha;
|
||||
|
||||
res12 = res12 * alpha;
|
||||
res13 = res13 * alpha;
|
||||
|
||||
C0[0] += res0;
|
||||
C0[1] += res1;
|
||||
|
||||
C1[0] += res4;
|
||||
C1[1] += res5;
|
||||
|
||||
C2[0] += res8;
|
||||
C2[1] += res9;
|
||||
|
||||
C3[0] += res12;
|
||||
C3[1] += res13;
|
||||
|
||||
C0 += 2;
|
||||
C1 += 2;
|
||||
C2 += 2;
|
||||
C3 += 2;
|
||||
}
|
||||
if(bm&1){
|
||||
ptrbb = bb;
|
||||
|
||||
res0 = 0;
|
||||
|
||||
res4 = 0;
|
||||
|
||||
res8 = 0;
|
||||
|
||||
res12 = 0;
|
||||
|
||||
for(k=0; k<bk; k+=1){
|
||||
loadb0 = ptrbb[0];
|
||||
loadb1 = ptrbb[1];
|
||||
|
||||
load0 = ptrba[0];
|
||||
|
||||
res0 = res0 + load0 * loadb0;
|
||||
|
||||
res4 = res4 + load0 * loadb1;
|
||||
|
||||
loadb2 = ptrbb[2];
|
||||
loadb3 = ptrbb[3];
|
||||
|
||||
res8 = res8 + load0 * loadb2;
|
||||
|
||||
res12 = res12 + load0 * loadb3;
|
||||
|
||||
ptrba += 1;
|
||||
ptrbb += 4;
|
||||
}
|
||||
|
||||
res0 = res0 * alpha;
|
||||
|
||||
res4 = res4 * alpha;
|
||||
|
||||
res8 = res8 * alpha;
|
||||
|
||||
res12 = res12 * alpha;
|
||||
|
||||
C0[0] += res0;
|
||||
C1[0] += res4;
|
||||
C2[0] += res8;
|
||||
C3[0] += res12;
|
||||
|
||||
C0 += 1;
|
||||
C1 += 1;
|
||||
C2 += 1;
|
||||
C3 += 1;
|
||||
}
|
||||
|
||||
k = bk<<2;
|
||||
bb = bb+k;
|
||||
i = ldc<<2;
|
||||
C = C+i;
|
||||
}
|
||||
|
||||
if(bn&2){
|
||||
C0 = C;
|
||||
C1 = C0+ldc;
|
||||
|
||||
ptrba = ba;
|
||||
for(i=0; i<bm/8; i+=1){
|
||||
ptrbb = bb;
|
||||
res0 = 0;
|
||||
res1 = 0;
|
||||
res2 = 0;
|
||||
res3 = 0;
|
||||
res4 = 0;
|
||||
res5 = 0;
|
||||
res6 = 0;
|
||||
res7 = 0;
|
||||
res8 = 0;
|
||||
res9 = 0;
|
||||
res10 = 0;
|
||||
res11 = 0;
|
||||
res12 = 0;
|
||||
res13 = 0;
|
||||
res14 = 0;
|
||||
res15 = 0;
|
||||
|
||||
for(k=0; k<bk; k+=1){
|
||||
loadb0 = ptrbb[0];
|
||||
loadb1 = ptrbb[1];
|
||||
|
||||
load0 = ptrba[0];
|
||||
load1 = ptrba[1];
|
||||
load2 = ptrba[2];
|
||||
load3 = ptrba[3];
|
||||
load4 = ptrba[4];
|
||||
load5 = ptrba[5];
|
||||
load6 = ptrba[6];
|
||||
load7 = ptrba[7];
|
||||
|
||||
res0 = res0 + load0 * loadb0;
|
||||
res1 = res1 + load1 * loadb0;
|
||||
res2 = res2 + load2 * loadb0;
|
||||
res3 = res3 + load3 * loadb0;
|
||||
|
||||
res4 = res4 + load4 * loadb0;
|
||||
res5 = res5 + load5 * loadb0;
|
||||
res6 = res6 + load6 * loadb0;
|
||||
res7 = res7 + load7 * loadb0;
|
||||
|
||||
res8 = res8 + load0 * loadb1;
|
||||
res9 = res9 + load1 * loadb1;
|
||||
res10 = res10 + load2 * loadb1;
|
||||
res11 = res11 + load3 * loadb1;
|
||||
|
||||
res12 = res12 + load4 * loadb1;
|
||||
res13 = res13 + load5 * loadb1;
|
||||
res14 = res14 + load6 * loadb1;
|
||||
res15 = res15 + load7 * loadb1;
|
||||
|
||||
ptrba += 8;
|
||||
ptrbb += 2;
|
||||
}
|
||||
|
||||
res0 = res0 * alpha;
|
||||
res1 = res1 * alpha;
|
||||
res2 = res2 * alpha;
|
||||
res3 = res3 * alpha;
|
||||
res4 = res4 * alpha;
|
||||
res5 = res5 * alpha;
|
||||
res6 = res6 * alpha;
|
||||
res7 = res7 * alpha;
|
||||
|
||||
res8 = res8 * alpha;
|
||||
res9 = res9 * alpha;
|
||||
res10 = res10 * alpha;
|
||||
res11 = res11 * alpha;
|
||||
res12 = res12 * alpha;
|
||||
res13 = res13 * alpha;
|
||||
res14 = res14 * alpha;
|
||||
res15 = res15 * alpha;
|
||||
|
||||
C0[0] += res0;
|
||||
C0[1] += res1;
|
||||
C0[2] += res2;
|
||||
C0[3] += res3;
|
||||
C0[4] += res4;
|
||||
C0[5] += res5;
|
||||
C0[6] += res6;
|
||||
C0[7] += res7;
|
||||
|
||||
C1[0] += res8;
|
||||
C1[1] += res9;
|
||||
C1[2] += res10;
|
||||
C1[3] += res11;
|
||||
C1[4] += res12;
|
||||
C1[5] += res13;
|
||||
C1[6] += res14;
|
||||
C1[7] += res15;
|
||||
|
||||
C0 += 8;
|
||||
C1 += 8;
|
||||
}
|
||||
if(bm&4){
|
||||
ptrbb = bb;
|
||||
res0 = 0;
|
||||
res1 = 0;
|
||||
res2 = 0;
|
||||
res3 = 0;
|
||||
|
||||
res8 = 0;
|
||||
res9 = 0;
|
||||
res10 = 0;
|
||||
res11 = 0;
|
||||
|
||||
for(k=0; k<bk; k+=1){
|
||||
loadb0 = ptrbb[0];
|
||||
loadb1 = ptrbb[1];
|
||||
|
||||
load0 = ptrba[0];
|
||||
load1 = ptrba[1];
|
||||
load2 = ptrba[2];
|
||||
load3 = ptrba[3];
|
||||
|
||||
res0 = res0 + load0 * loadb0;
|
||||
res1 = res1 + load1 * loadb0;
|
||||
res2 = res2 + load2 * loadb0;
|
||||
res3 = res3 + load3 * loadb0;
|
||||
|
||||
res8 = res8 + load0 * loadb1;
|
||||
res9 = res9 + load1 * loadb1;
|
||||
res10 = res10 + load2 * loadb1;
|
||||
res11 = res11 + load3 * loadb1;
|
||||
|
||||
ptrba += 4;
|
||||
ptrbb += 2;
|
||||
}
|
||||
|
||||
res0 = res0 * alpha;
|
||||
res1 = res1 * alpha;
|
||||
res2 = res2 * alpha;
|
||||
res3 = res3 * alpha;
|
||||
|
||||
res8 = res8 * alpha;
|
||||
res9 = res9 * alpha;
|
||||
res10 = res10 * alpha;
|
||||
res11 = res11 * alpha;
|
||||
|
||||
C0[0] += res0;
|
||||
C0[1] += res1;
|
||||
C0[2] += res2;
|
||||
C0[3] += res3;
|
||||
|
||||
C1[0] += res8;
|
||||
C1[1] += res9;
|
||||
C1[2] += res10;
|
||||
C1[3] += res11;
|
||||
|
||||
C0 += 4;
|
||||
C1 += 4;
|
||||
}
|
||||
if(bm&2){
|
||||
ptrbb = bb;
|
||||
res0 = 0;
|
||||
res1 = 0;
|
||||
|
||||
res8 = 0;
|
||||
res9 = 0;
|
||||
|
||||
for(k=0; k<bk; k+=1){
|
||||
loadb0 = ptrbb[0];
|
||||
loadb1 = ptrbb[1];
|
||||
|
||||
load0 = ptrba[0];
|
||||
load1 = ptrba[1];
|
||||
|
||||
res0 = res0 + load0 * loadb0;
|
||||
res1 = res1 + load1 * loadb0;
|
||||
|
||||
res8 = res8 + load0 * loadb1;
|
||||
res9 = res9 + load1 * loadb1;
|
||||
|
||||
ptrba += 2;
|
||||
ptrbb += 2;
|
||||
}
|
||||
|
||||
res0 = res0 * alpha;
|
||||
res1 = res1 * alpha;
|
||||
|
||||
res8 = res8 * alpha;
|
||||
res9 = res9 * alpha;
|
||||
|
||||
C0[0] += res0;
|
||||
C0[1] += res1;
|
||||
|
||||
C1[0] += res8;
|
||||
C1[1] += res9;
|
||||
|
||||
C0 += 2;
|
||||
C1 += 2;
|
||||
}
|
||||
if(bm&1){
|
||||
ptrbb = bb;
|
||||
res0 = 0;
|
||||
res8 = 0;
|
||||
for(k=0; k<bk; k+=1){
|
||||
loadb0 = ptrbb[0];
|
||||
loadb1 = ptrbb[1];
|
||||
load0 = ptrba[0];
|
||||
|
||||
res0 = res0 + load0 * loadb0;
|
||||
res8 = res8 + load0 * loadb1;
|
||||
ptrba += 1;
|
||||
ptrbb += 2;
|
||||
}
|
||||
|
||||
res0 = res0 * alpha;
|
||||
res8 = res8 * alpha;
|
||||
|
||||
C0[0] += res0;
|
||||
C1[0] += res8;
|
||||
|
||||
C0 += 1;
|
||||
C1 += 1;
|
||||
}
|
||||
k = bk<<1;
|
||||
bb = bb+k;
|
||||
i = ldc<<1;
|
||||
C = C+i;
|
||||
}
|
||||
|
||||
if (bn&1){
|
||||
C0 = C;
|
||||
ptrba = ba;
|
||||
for(i=0; i<bm/8; i+=1){
|
||||
ptrbb = bb;
|
||||
res0 = 0;
|
||||
res1 = 0;
|
||||
res2 = 0;
|
||||
res3 = 0;
|
||||
res4 = 0;
|
||||
res5 = 0;
|
||||
res6 = 0;
|
||||
res7 = 0;
|
||||
|
||||
for(k=0; k<bk; k+=1){
|
||||
loadb0 = ptrbb[0];
|
||||
res0 = res0 + ptrba[0] * loadb0;
|
||||
res1 = res1 + ptrba[1] * loadb0;
|
||||
res2 = res2 + ptrba[2] * loadb0;
|
||||
res3 = res3 + ptrba[3] * loadb0;
|
||||
|
||||
res4 = res4 + ptrba[4] * loadb0;
|
||||
res5 = res5 + ptrba[5] * loadb0;
|
||||
res6 = res6 + ptrba[6] * loadb0;
|
||||
res7 = res7 + ptrba[7] * loadb0;
|
||||
|
||||
ptrba += 8;
|
||||
ptrbb += 1;
|
||||
}
|
||||
res0 = res0 * alpha;
|
||||
res1 = res1 * alpha;
|
||||
res2 = res2 * alpha;
|
||||
res3 = res3 * alpha;
|
||||
res4 = res4 * alpha;
|
||||
res5 = res5 * alpha;
|
||||
res6 = res6 * alpha;
|
||||
res7 = res7 * alpha;
|
||||
|
||||
C0[0] += res0;
|
||||
C0[1] += res1;
|
||||
C0[2] += res2;
|
||||
C0[3] += res3;
|
||||
C0[4] += res4;
|
||||
C0[5] += res5;
|
||||
C0[6] += res6;
|
||||
C0[7] += res7;
|
||||
|
||||
C0 += 8;
|
||||
}
|
||||
if(bm&4){
|
||||
ptrbb = bb;
|
||||
res0 = 0;
|
||||
res1 = 0;
|
||||
res2 = 0;
|
||||
res3 = 0;
|
||||
for(k=0; k<bk; k+=1){
|
||||
loadb0 = ptrbb[0];
|
||||
res0 = res0 + ptrba[0] * loadb0;
|
||||
res1 = res1 + ptrba[1] * loadb0;
|
||||
res2 = res2 + ptrba[2] * loadb0;
|
||||
res3 = res3 + ptrba[3] * loadb0;
|
||||
|
||||
ptrba += 4;
|
||||
ptrbb += 1;
|
||||
}
|
||||
res0 = res0 * alpha;
|
||||
res1 = res1 * alpha;
|
||||
res2 = res2 * alpha;
|
||||
res3 = res3 * alpha;
|
||||
|
||||
C0[0] += res0;
|
||||
C0[1] += res1;
|
||||
C0[2] += res2;
|
||||
C0[3] += res3;
|
||||
|
||||
C0 += 4;
|
||||
}
|
||||
if(bm&2){
|
||||
ptrbb = bb;
|
||||
res0 = 0;
|
||||
res1 = 0;
|
||||
for(k=0; k<bk; k+=1){
|
||||
loadb0 = ptrbb[0];
|
||||
res0 = res0 + ptrba[0] * loadb0;
|
||||
res1 = res1 + ptrba[1] * loadb0;
|
||||
|
||||
ptrba += 2;
|
||||
ptrbb += 1;
|
||||
}
|
||||
res0 = res0 * alpha;
|
||||
res1 = res1 * alpha;
|
||||
|
||||
C0[0] += res0;
|
||||
C0[1] += res1;
|
||||
|
||||
C0 += 2;
|
||||
}
|
||||
if(bm&1){
|
||||
ptrbb = bb;
|
||||
res0 = 0;
|
||||
for(k=0; k<bk; k+=1){
|
||||
loadb0 = ptrbb[0];
|
||||
res0 = res0 + ptrba[0] * loadb0;
|
||||
ptrba += 1;
|
||||
ptrbb += 1;
|
||||
}
|
||||
res0 = res0 * alpha;
|
||||
C0[0] += res0;
|
||||
C0 += 1;
|
||||
}
|
||||
|
||||
k = bk;
|
||||
bb = bb+k;
|
||||
C = C+ldc;
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(DSDOT)
|
||||
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||
#else
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||
#endif
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0,iy=0;
|
||||
double dot = 0.0 ;
|
||||
|
||||
if ( n < 0 ) return(dot);
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
|
||||
dot += y[iy] * x[ix] ;
|
||||
ix += inc_x ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
|
||||
}
|
||||
return(dot);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,172 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLEV_FLOAT vlev_float32xm4
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float32xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float32xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm4
|
||||
#define VFDOTVV_FLOAT vfdotvv_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLEV_FLOAT vlev_float64xm4
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float64xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float64xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm4
|
||||
#define VFDOTVV_FLOAT vfdotvv_float64xm4
|
||||
#endif
|
||||
|
||||
#if defined(DSDOT)
|
||||
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||
#else
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||
#endif
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
double dot = 0.0 ;
|
||||
|
||||
if ( n < 0 ) return(dot);
|
||||
|
||||
FLOAT_V_T vr, vx, vy;
|
||||
unsigned int gvl = 0;
|
||||
if(inc_x == 1 && inc_y == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
vy = VLEV_FLOAT(&y[j], gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, vx, vy, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
if(j > 0){
|
||||
vx = VFMVVF_FLOAT(0, gvl);
|
||||
vx = VFREDSUM_FLOAT(vr, vx, gvl);
|
||||
dot += vx[0];
|
||||
}
|
||||
//tail
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
vy = VLEV_FLOAT(&y[j], gvl);
|
||||
FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl);
|
||||
//vr = VFDOTVV_FLOAT(vx, vy, gvl);
|
||||
vr = VFMACCVV_FLOAT(vz, vx, vy, gvl);
|
||||
vx = VFREDSUM_FLOAT(vr, vz, gvl);
|
||||
dot += vx[0];
|
||||
}
|
||||
}else if(inc_y == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
unsigned int stride_x = inc_x * sizeof(FLOAT);
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
vy = VLEV_FLOAT(&y[j], gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, vx, vy, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
if(j > 0){
|
||||
vx = VFMVVF_FLOAT(0, gvl);
|
||||
vx = VFREDSUM_FLOAT(vr, vx, gvl);
|
||||
dot += vx[0];
|
||||
}
|
||||
//tail
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
vy = VLEV_FLOAT(&y[j], gvl);
|
||||
FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl);
|
||||
//vr = VFDOTVV_FLOAT(vx, vy, gvl);
|
||||
vr = VFMACCVV_FLOAT(vz, vx, vy, gvl);
|
||||
vx = VFREDSUM_FLOAT(vr, vz, gvl);
|
||||
dot += vx[0];
|
||||
}
|
||||
}else if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
unsigned int stride_y = inc_y * sizeof(FLOAT);
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
vy = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, vx, vy, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
if(j > 0){
|
||||
vx = VFMVVF_FLOAT(0, gvl);
|
||||
vx = VFREDSUM_FLOAT(vr, vx, gvl);
|
||||
dot += vx[0];
|
||||
}
|
||||
//tail
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
vy = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl);
|
||||
FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl);
|
||||
//vr = VFDOTVV_FLOAT(vx, vy, gvl);
|
||||
vr = VFMACCVV_FLOAT(vz, vx, vy, gvl);
|
||||
vx = VFREDSUM_FLOAT(vr, vz, gvl);
|
||||
dot += vx[0];
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
unsigned int stride_x = inc_x * sizeof(FLOAT);
|
||||
unsigned int stride_y = inc_y * sizeof(FLOAT);
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
vy = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, vx, vy, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
if(j > 0){
|
||||
vx = VFMVVF_FLOAT(0, gvl);
|
||||
vx = VFREDSUM_FLOAT(vr, vx, gvl);
|
||||
dot += vx[0];
|
||||
}
|
||||
//tail
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
vy = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl);
|
||||
FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl);
|
||||
//vr = VFDOTVV_FLOAT(vx, vy, gvl);
|
||||
vr = VFMACCVV_FLOAT(vz, vx, vy, gvl);
|
||||
vx = VFREDSUM_FLOAT(vr, vz, gvl);
|
||||
dot += vx[0];
|
||||
}
|
||||
}
|
||||
return(dot);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
/**************************************************************************************
|
||||
* * 2013/09/14 Saar
|
||||
* * BLASTEST float : OK
|
||||
* * BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* *
|
||||
* **************************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
|
||||
{
|
||||
BLASLONG i;
|
||||
BLASLONG ix,iy;
|
||||
BLASLONG j;
|
||||
FLOAT *a_ptr;
|
||||
FLOAT temp;
|
||||
|
||||
ix = 0;
|
||||
a_ptr = a;
|
||||
|
||||
for (j=0; j<n; j++)
|
||||
{
|
||||
temp = alpha * x[ix];
|
||||
iy = 0;
|
||||
for (i=0; i<m; i++)
|
||||
{
|
||||
y[iy] += temp * a_ptr[i];
|
||||
iy += inc_y;
|
||||
}
|
||||
a_ptr += lda;
|
||||
ix += inc_x;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,146 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLEV_FLOAT vlev_float32xm4
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VSEV_FLOAT vsev_float32xm4
|
||||
#define VSSEV_FLOAT vssev_float32xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLEV_FLOAT vlev_float64xm4
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VSEV_FLOAT vsev_float64xm4
|
||||
#define VSSEV_FLOAT vssev_float64xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float64xm4
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
|
||||
{
|
||||
BLASLONG i = 0, j = 0, k = 0;
|
||||
BLASLONG ix = 0, iy = 0;
|
||||
|
||||
if(n < 0) return(0);
|
||||
FLOAT *a_ptr = a;
|
||||
FLOAT temp = 0.0;
|
||||
FLOAT_V_T va0, va1, vy0, vy1;
|
||||
unsigned int gvl = 0;
|
||||
if(inc_y == 1){
|
||||
gvl = vsetvli(m, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= m/2){
|
||||
for(k=0,j=0; k<m/(2*gvl); k++){
|
||||
a_ptr = a;
|
||||
ix = 0;
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
vy1 = VLEV_FLOAT(&y[j+gvl], gvl);
|
||||
for(i = 0; i < n; i++){
|
||||
temp = alpha * x[ix];
|
||||
va0 = VLEV_FLOAT(&a_ptr[j], gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp, va0, gvl);
|
||||
|
||||
va1 = VLEV_FLOAT(&a_ptr[j+gvl], gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp, va1, gvl);
|
||||
a_ptr += lda;
|
||||
ix += inc_x;
|
||||
}
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
VSEV_FLOAT(&y[j+gvl], vy1, gvl);
|
||||
j += gvl * 2;
|
||||
}
|
||||
}
|
||||
//tail
|
||||
for(;j < m;){
|
||||
gvl = vsetvli(m-j, RVV_EFLOAT, RVV_M);
|
||||
a_ptr = a;
|
||||
ix = 0;
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
for(i = 0; i < n; i++){
|
||||
temp = alpha * x[ix];
|
||||
va0 = VLEV_FLOAT(&a_ptr[j], gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp, va0, gvl);
|
||||
|
||||
a_ptr += lda;
|
||||
ix += inc_x;
|
||||
}
|
||||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else{
|
||||
BLASLONG stride_y = inc_y * sizeof(FLOAT);
|
||||
gvl = vsetvli(m, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= m/2){
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
for(k=0,j=0; k<m/(2*gvl); k++){
|
||||
a_ptr = a;
|
||||
ix = 0;
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+inc_yv], stride_y, gvl);
|
||||
for(i = 0; i < n; i++){
|
||||
temp = alpha * x[ix];
|
||||
va0 = VLEV_FLOAT(&a_ptr[j], gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp, va0, gvl);
|
||||
|
||||
va1 = VLEV_FLOAT(&a_ptr[j+gvl], gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp, va1, gvl);
|
||||
a_ptr += lda;
|
||||
ix += inc_x;
|
||||
}
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy1, gvl);
|
||||
j += gvl * 2;
|
||||
iy += inc_yv * 2;
|
||||
}
|
||||
}
|
||||
//tail
|
||||
for(;j < m;){
|
||||
gvl = vsetvli(m-j, RVV_EFLOAT, RVV_M);
|
||||
a_ptr = a;
|
||||
ix = 0;
|
||||
vy0 = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl);
|
||||
for(i = 0; i < n; i++){
|
||||
temp = alpha * x[ix];
|
||||
va0 = VLEV_FLOAT(&a_ptr[j], gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp, va0, gvl);
|
||||
|
||||
a_ptr += lda;
|
||||
ix += inc_x;
|
||||
}
|
||||
VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* * 2013/09/14 Saar
|
||||
* * BLASTEST float : OK
|
||||
* * BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* *
|
||||
* **************************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
|
||||
{
|
||||
BLASLONG i;
|
||||
BLASLONG ix,iy;
|
||||
BLASLONG j;
|
||||
FLOAT *a_ptr;
|
||||
FLOAT temp;
|
||||
|
||||
iy = 0;
|
||||
a_ptr = a;
|
||||
|
||||
for (j=0; j<n; j++)
|
||||
{
|
||||
temp = 0.0;
|
||||
ix = 0;
|
||||
for (i=0; i<m; i++)
|
||||
{
|
||||
temp += a_ptr[i] * x[ix];
|
||||
ix += inc_x;
|
||||
}
|
||||
y[iy] += alpha * temp;
|
||||
iy += inc_y;
|
||||
a_ptr += lda;
|
||||
}
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,126 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLEV_FLOAT vlev_float32xm4
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float32xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float32xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm4
|
||||
#define VFDOTVV_FLOAT vfdotvv_float32xm4
|
||||
#define VFMULVV_FLOAT vfmulvv_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLEV_FLOAT vlev_float64xm4
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float64xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float64xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm4
|
||||
#define VFDOTVV_FLOAT vfdotvv_float64xm4
|
||||
#define VFMULVV_FLOAT vfmulvv_float64xm4
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
|
||||
{
|
||||
BLASLONG i = 0, j = 0, k = 0;
|
||||
BLASLONG ix = 0, iy = 0;
|
||||
FLOAT *a_ptr = a;
|
||||
FLOAT temp;
|
||||
|
||||
FLOAT_V_T va, vr, vx;
|
||||
unsigned int gvl = 0;
|
||||
if(inc_x == 1){
|
||||
for(i = 0; i < n; i++){
|
||||
gvl = vsetvli(m, RVV_EFLOAT, RVV_M);
|
||||
j = 0;
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
for(k = 0; k < m/gvl; k++){
|
||||
va = VLEV_FLOAT(&a_ptr[j], gvl);
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, va, vx, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp = va[0];
|
||||
if(j < m){
|
||||
gvl = vsetvli(m-j, RVV_EFLOAT, RVV_M);
|
||||
va = VLEV_FLOAT(&a_ptr[j], gvl);
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
vr = VFMULVV_FLOAT(va, vx, gvl);
|
||||
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp += va[0];
|
||||
}
|
||||
y[iy] += alpha * temp;
|
||||
iy += inc_y;
|
||||
a_ptr += lda;
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(m, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG stride_x = inc_x * sizeof(FLOAT);
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
for(i = 0; i < n; i++){
|
||||
gvl = vsetvli(m, RVV_EFLOAT, RVV_M);
|
||||
j = 0;
|
||||
ix = 0;
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
for(k = 0; k < m/gvl; k++){
|
||||
va = VLEV_FLOAT(&a_ptr[j], gvl);
|
||||
vx = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, va, vx, gvl);
|
||||
j += gvl;
|
||||
ix += inc_xv;
|
||||
}
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp = va[0];
|
||||
if(j < m){
|
||||
gvl = vsetvli(m-j, RVV_EFLOAT, RVV_M);
|
||||
va = VLEV_FLOAT(&a_ptr[j], gvl);
|
||||
vx = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vr = VFMULVV_FLOAT(va, vx, gvl);
|
||||
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp += va[0];
|
||||
}
|
||||
y[iy] += alpha * temp;
|
||||
iy += inc_y;
|
||||
a_ptr += lda;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : NoTest
|
||||
* BLASTEST double : NoTest
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT maxf=0.0;
|
||||
BLASLONG max=0;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(max);
|
||||
|
||||
maxf=ABS(x[0]);
|
||||
ix += inc_x;
|
||||
i++;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( ABS(x[ix]) > maxf )
|
||||
{
|
||||
max = i;
|
||||
maxf = ABS(x[ix]);
|
||||
}
|
||||
ix += inc_x;
|
||||
i++;
|
||||
}
|
||||
return(max+1);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,191 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLEV_FLOAT vlev_float64xm8
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8
|
||||
#define MASK_T e64xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8
|
||||
#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8
|
||||
#define VFMAXVV_FLOAT vfmaxvv_float64xm8
|
||||
#define VMFGEVF_FLOAT vmfgevf_e64xm8_float64xm8
|
||||
#define VMFIRSTM vmfirstm_e64xm8
|
||||
#define UINT_V_T uint64xm8_t
|
||||
#define VIDV_MASK_UINT vidv_mask_uint64xm8
|
||||
#define VIDV_UINT vidv_uint64xm8
|
||||
#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8
|
||||
#define VADDVX_UINT vaddvx_uint64xm8
|
||||
#define VMVVX_UINT vmvvx_uint64xm8
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLEV_FLOAT vlev_float32xm8
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8
|
||||
#define MASK_T e32xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8
|
||||
#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8
|
||||
#define VFMAXVV_FLOAT vfmaxvv_float32xm8
|
||||
#define VMFGEVF_FLOAT vmfgevf_e32xm8_float32xm8
|
||||
#define VMFIRSTM vmfirstm_e32xm8
|
||||
#define UINT_V_T uint32xm8_t
|
||||
#define VIDV_MASK_UINT vidv_mask_uint32xm8
|
||||
#define VIDV_UINT vidv_uint32xm8
|
||||
#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8
|
||||
#define VADDVX_UINT vaddvx_uint32xm8
|
||||
#define VMVVX_UINT vmvvx_uint32xm8
|
||||
#endif
|
||||
|
||||
|
||||
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
FLOAT maxf=0.0;
|
||||
unsigned int max_index = 0;
|
||||
if (n <= 0 || inc_x <= 0) return(max_index);
|
||||
|
||||
FLOAT_V_T vx, v_max;
|
||||
UINT_V_T v_max_index;
|
||||
MASK_T mask;
|
||||
unsigned int gvl = 0;
|
||||
if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
v_max_index = VMVVX_UINT(0, gvl);
|
||||
v_max = VFMVVF_FLOAT(-1, gvl);
|
||||
for(i=0,j=0; i < n/gvl; i++){
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(vx, 0, gvl);
|
||||
vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl);
|
||||
|
||||
//index where element greater than v_max
|
||||
mask = VMFLTVV_FLOAT(v_max, vx, gvl);
|
||||
v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl);
|
||||
v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl);
|
||||
|
||||
//update v_max and start_index j
|
||||
v_max = VFMAXVV_FLOAT(v_max, vx, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
vx = VFMVVF_FLOAT(0, gvl);
|
||||
vx = VFREDMAXVS_FLOAT(v_max, vx, gvl);
|
||||
maxf = vx[0];
|
||||
mask = VMFGEVF_FLOAT(v_max, maxf, gvl);
|
||||
max_index = VMFIRSTM(mask,gvl);
|
||||
max_index = v_max_index[max_index];
|
||||
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(vx, 0, gvl);
|
||||
v_max = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl);
|
||||
|
||||
vx = VFMVVF_FLOAT(0, gvl);
|
||||
vx = VFREDMAXVS_FLOAT(v_max, vx, gvl);
|
||||
FLOAT cur_maxf = vx[0];
|
||||
if(cur_maxf > maxf){
|
||||
//tail index
|
||||
v_max_index = VIDV_UINT(gvl);
|
||||
v_max_index = VADDVX_UINT(v_max_index, j, gvl);
|
||||
|
||||
mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl);
|
||||
max_index = VMFIRSTM(mask,gvl);
|
||||
max_index = v_max_index[max_index];
|
||||
}
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
unsigned int stride_x = inc_x * sizeof(FLOAT);
|
||||
unsigned int idx = 0, inc_v = gvl * inc_x;
|
||||
|
||||
v_max_index = VMVVX_UINT(0, gvl);
|
||||
v_max = VFMVVF_FLOAT(-1, gvl);
|
||||
for(i=0,j=0; i < n/gvl; i++){
|
||||
vx = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(vx, 0, gvl);
|
||||
vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl);
|
||||
|
||||
//index where element greater than v_max
|
||||
mask = VMFLTVV_FLOAT(v_max, vx, gvl);
|
||||
v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl);
|
||||
v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl);
|
||||
|
||||
//update v_max and start_index j
|
||||
v_max = VFMAXVV_FLOAT(v_max, vx, gvl);
|
||||
j += gvl;
|
||||
idx += inc_v;
|
||||
}
|
||||
vx = VFMVVF_FLOAT(0, gvl);
|
||||
vx = VFREDMAXVS_FLOAT(v_max, vx, gvl);
|
||||
maxf = vx[0];
|
||||
mask = VMFGEVF_FLOAT(v_max, maxf, gvl);
|
||||
max_index = VMFIRSTM(mask,gvl);
|
||||
max_index = v_max_index[max_index];
|
||||
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(vx, 0, gvl);
|
||||
v_max = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl);
|
||||
|
||||
vx = VFMVVF_FLOAT(0, gvl);
|
||||
vx = VFREDMAXVS_FLOAT(v_max, vx, gvl);
|
||||
FLOAT cur_maxf = vx[0];
|
||||
if(cur_maxf > maxf){
|
||||
//tail index
|
||||
v_max_index = VIDV_UINT(gvl);
|
||||
v_max_index = VADDVX_UINT(v_max_index, j, gvl);
|
||||
|
||||
mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl);
|
||||
max_index = VMFIRSTM(mask,gvl);
|
||||
max_index = v_max_index[max_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
return(max_index+1);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : NoTest
|
||||
* BLASTEST double : NoTest
|
||||
* CTEST : NoTest
|
||||
* TEST : NoTest
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT minf=0.0;
|
||||
BLASLONG min=0;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(min);
|
||||
|
||||
minf=ABS(x[0]);
|
||||
ix += inc_x;
|
||||
i++;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( ABS(x[ix]) < ABS(minf) )
|
||||
{
|
||||
min = i;
|
||||
minf = ABS(x[ix]);
|
||||
}
|
||||
ix += inc_x;
|
||||
i++;
|
||||
}
|
||||
return(min+1);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,192 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLEV_FLOAT vlev_float64xm8
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDMINVS_FLOAT vfredminvs_float64xm8
|
||||
#define MASK_T e64xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8
|
||||
#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8
|
||||
#define VFMINVV_FLOAT vfminvv_float64xm8
|
||||
#define VMFLEVF_FLOAT vmflevf_e64xm8_float64xm8
|
||||
#define VMFIRSTM vmfirstm_e64xm8
|
||||
#define UINT_V_T uint64xm8_t
|
||||
#define VIDV_MASK_UINT vidv_mask_uint64xm8
|
||||
#define VIDV_UINT vidv_uint64xm8
|
||||
#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8
|
||||
#define VADDVX_UINT vaddvx_uint64xm8
|
||||
#define VMVVX_UINT vmvvx_uint64xm8
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLEV_FLOAT vlev_float32xm8
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDMINVS_FLOAT vfredminvs_float32xm8
|
||||
#define MASK_T e32xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8
|
||||
#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8
|
||||
#define VFMINVV_FLOAT vfminvv_float32xm8
|
||||
#define VMFLEVF_FLOAT vmflevf_e32xm8_float32xm8
|
||||
#define VMFIRSTM vmfirstm_e32xm8
|
||||
#define UINT_V_T uint32xm8_t
|
||||
#define VIDV_MASK_UINT vidv_mask_uint32xm8
|
||||
#define VIDV_UINT vidv_uint32xm8
|
||||
#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8
|
||||
#define VADDVX_UINT vaddvx_uint32xm8
|
||||
#define VMVVX_UINT vmvvx_uint32xm8
|
||||
#endif
|
||||
|
||||
|
||||
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
FLOAT minf=FLT_MAX;
|
||||
unsigned int min_index = 0;
|
||||
if (n <= 0 || inc_x <= 0) return(min_index);
|
||||
|
||||
FLOAT_V_T vx, v_min;
|
||||
UINT_V_T v_min_index;
|
||||
MASK_T mask;
|
||||
unsigned int gvl = 0;
|
||||
if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
v_min = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
v_min_index = VMVVX_UINT(0, gvl);
|
||||
for(i=0,j=0; i < n/gvl; i++){
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(vx, 0, gvl);
|
||||
vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl);
|
||||
|
||||
//index where element less than v_min
|
||||
mask = VMFLTVV_FLOAT(vx, v_min, gvl);
|
||||
v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl);
|
||||
v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl);
|
||||
|
||||
//update v_min and start_index j
|
||||
v_min = VFMINVV_FLOAT(v_min, vx, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
vx = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
vx = VFREDMINVS_FLOAT(v_min, vx, gvl);
|
||||
minf = vx[0];
|
||||
mask = VMFLEVF_FLOAT(v_min, minf, gvl);
|
||||
min_index = VMFIRSTM(mask,gvl);
|
||||
min_index = v_min_index[min_index];
|
||||
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(vx, 0, gvl);
|
||||
v_min = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl);
|
||||
|
||||
vx = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
vx = VFREDMINVS_FLOAT(v_min, vx, gvl);
|
||||
FLOAT cur_minf = vx[0];
|
||||
if(cur_minf < minf){
|
||||
//tail index
|
||||
v_min_index = VIDV_UINT(gvl);
|
||||
v_min_index = VADDVX_UINT(v_min_index, j, gvl);
|
||||
|
||||
mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl);
|
||||
min_index = VMFIRSTM(mask,gvl);
|
||||
min_index = v_min_index[min_index];
|
||||
}
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
unsigned int stride_x = inc_x * sizeof(FLOAT);
|
||||
unsigned int idx = 0, inc_v = gvl * inc_x;
|
||||
|
||||
v_min = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
v_min_index = VMVVX_UINT(0, gvl);
|
||||
for(i=0,j=0; i < n/gvl; i++){
|
||||
vx = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(vx, 0, gvl);
|
||||
vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl);
|
||||
|
||||
//index where element less than v_min
|
||||
mask = VMFLTVV_FLOAT(vx, v_min, gvl);
|
||||
v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl);
|
||||
v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl);
|
||||
|
||||
//update v_min and start_index j
|
||||
v_min = VFMINVV_FLOAT(v_min, vx, gvl);
|
||||
j += gvl;
|
||||
idx += inc_v;
|
||||
}
|
||||
vx = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
vx = VFREDMINVS_FLOAT(v_min, vx, gvl);
|
||||
minf = vx[0];
|
||||
mask = VMFLEVF_FLOAT(v_min, minf, gvl);
|
||||
min_index = VMFIRSTM(mask,gvl);
|
||||
min_index = v_min_index[min_index];
|
||||
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(vx, 0, gvl);
|
||||
v_min = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl);
|
||||
|
||||
vx = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
vx = VFREDMINVS_FLOAT(v_min, vx, gvl);
|
||||
FLOAT cur_minf = vx[0];
|
||||
if(cur_minf < minf){
|
||||
//tail index
|
||||
v_min_index = VIDV_UINT(gvl);
|
||||
v_min_index = VADDVX_UINT(v_min_index, j, gvl);
|
||||
|
||||
mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl);
|
||||
min_index = VMFIRSTM(mask,gvl);
|
||||
min_index = v_min_index[min_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
return(min_index+1);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : NoTest
|
||||
* BLASTEST double : NoTest
|
||||
* CTEST : NoTest
|
||||
* TEST : NoTest
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
|
||||
|
||||
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT maxf=0.0;
|
||||
BLASLONG max=0;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(max);
|
||||
|
||||
maxf=x[0];
|
||||
ix += inc_x;
|
||||
i++;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( x[ix] > maxf )
|
||||
{
|
||||
max = i;
|
||||
maxf = x[ix];
|
||||
}
|
||||
ix += inc_x;
|
||||
i++;
|
||||
}
|
||||
return(max+1);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,176 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLEV_FLOAT vlev_float64xm8
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8
|
||||
#define MASK_T e64xm8_t
|
||||
#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFMAXVV_FLOAT vfmaxvv_float64xm8
|
||||
#define VMFGEVF_FLOAT vmfgevf_e64xm8_float64xm8
|
||||
#define VMFIRSTM vmfirstm_e64xm8
|
||||
#define UINT_V_T uint64xm8_t
|
||||
#define VIDV_MASK_UINT vidv_mask_uint64xm8
|
||||
#define VIDV_UINT vidv_uint64xm8
|
||||
#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8
|
||||
#define VADDVX_UINT vaddvx_uint64xm8
|
||||
#define VMVVX_UINT vmvvx_uint64xm8
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLEV_FLOAT vlev_float32xm8
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8
|
||||
#define MASK_T e32xm8_t
|
||||
#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFMAXVV_FLOAT vfmaxvv_float32xm8
|
||||
#define VMFGEVF_FLOAT vmfgevf_e32xm8_float32xm8
|
||||
#define VMFIRSTM vmfirstm_e32xm8
|
||||
#define UINT_V_T uint32xm8_t
|
||||
#define VIDV_MASK_UINT vidv_mask_uint32xm8
|
||||
#define VIDV_UINT vidv_uint32xm8
|
||||
#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8
|
||||
#define VADDVX_UINT vaddvx_uint32xm8
|
||||
#define VMVVX_UINT vmvvx_uint32xm8
|
||||
#endif
|
||||
|
||||
|
||||
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
unsigned int max_index = 0;
|
||||
if (n <= 0 || inc_x <= 0) return(max_index);
|
||||
FLOAT maxf=-FLT_MAX;
|
||||
|
||||
FLOAT_V_T vx, v_max;
|
||||
UINT_V_T v_max_index;
|
||||
MASK_T mask;
|
||||
unsigned int gvl = 0;
|
||||
if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
v_max_index = VMVVX_UINT(0, gvl);
|
||||
v_max = VFMVVF_FLOAT(-FLT_MAX, gvl);
|
||||
for(i=0,j=0; i < n/gvl; i++){
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
|
||||
//index where element greater than v_max
|
||||
mask = VMFLTVV_FLOAT(v_max, vx, gvl);
|
||||
v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl);
|
||||
v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl);
|
||||
|
||||
//update v_max and start_index j
|
||||
v_max = VFMAXVV_FLOAT(v_max, vx, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
vx = VFMVVF_FLOAT(-FLT_MAX, gvl);
|
||||
vx = VFREDMAXVS_FLOAT(v_max, vx, gvl);
|
||||
maxf = vx[0];
|
||||
mask = VMFGEVF_FLOAT(v_max, maxf, gvl);
|
||||
max_index = VMFIRSTM(mask,gvl);
|
||||
max_index = v_max_index[max_index];
|
||||
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v_max = VLEV_FLOAT(&x[j], gvl);
|
||||
|
||||
vx = VFMVVF_FLOAT(-FLT_MAX, gvl);
|
||||
vx = VFREDMAXVS_FLOAT(v_max, vx, gvl);
|
||||
FLOAT cur_maxf = vx[0];
|
||||
if(cur_maxf > maxf){
|
||||
//tail index
|
||||
v_max_index = VIDV_UINT(gvl);
|
||||
v_max_index = VADDVX_UINT(v_max_index, j, gvl);
|
||||
|
||||
mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl);
|
||||
max_index = VMFIRSTM(mask,gvl);
|
||||
max_index = v_max_index[max_index];
|
||||
}
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
unsigned int stride_x = inc_x * sizeof(FLOAT);
|
||||
unsigned int idx = 0, inc_v = gvl * inc_x;
|
||||
|
||||
v_max = VFMVVF_FLOAT(-FLT_MAX, gvl);
|
||||
v_max_index = VMVVX_UINT(0, gvl);
|
||||
for(i=0,j=0; i < n/gvl; i++){
|
||||
vx = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
|
||||
//index where element greater than v_max
|
||||
mask = VMFLTVV_FLOAT(v_max, vx, gvl);
|
||||
v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl);
|
||||
v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl);
|
||||
|
||||
//update v_max and start_index j
|
||||
v_max = VFMAXVV_FLOAT(v_max, vx, gvl);
|
||||
j += gvl;
|
||||
idx += inc_v;
|
||||
}
|
||||
vx = VFMVVF_FLOAT(-FLT_MAX, gvl);
|
||||
vx = VFREDMAXVS_FLOAT(v_max, vx, gvl);
|
||||
maxf = vx[0];
|
||||
mask = VMFGEVF_FLOAT(v_max, maxf, gvl);
|
||||
max_index = VMFIRSTM(mask,gvl);
|
||||
max_index = v_max_index[max_index];
|
||||
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v_max = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
|
||||
vx = VFMVVF_FLOAT(-FLT_MAX, gvl);
|
||||
vx = VFREDMAXVS_FLOAT(v_max, vx, gvl);
|
||||
FLOAT cur_maxf = vx[0];
|
||||
if(cur_maxf > maxf){
|
||||
//tail index
|
||||
v_max_index = VIDV_UINT(gvl);
|
||||
v_max_index = VADDVX_UINT(v_max_index, j, gvl);
|
||||
|
||||
mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl);
|
||||
max_index = VMFIRSTM(mask,gvl);
|
||||
max_index = v_max_index[max_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
return(max_index+1);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/08/19 Saar
|
||||
* BLASTEST float
|
||||
* BLASTEST double
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
|
||||
|
||||
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT minf=0.0;
|
||||
BLASLONG min=0;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(min);
|
||||
|
||||
minf=x[0];
|
||||
ix += inc_x;
|
||||
i++;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( x[ix] < minf )
|
||||
{
|
||||
min = i;
|
||||
minf = x[ix];
|
||||
}
|
||||
ix += inc_x;
|
||||
i++;
|
||||
}
|
||||
return(min+1);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,212 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLEV_FLOAT vlev_float64xm8
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDMINVS_FLOAT vfredminvs_float64xm8
|
||||
#define MASK_T e64xm8_t
|
||||
#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFMINVV_FLOAT vfminvv_float64xm8
|
||||
#define VMFLEVF_FLOAT vmflevf_e64xm8_float64xm8
|
||||
#define VMFIRSTM vmfirstm_e64xm8
|
||||
#define UINT_V_T uint64xm8_t
|
||||
#define VIDV_MASK_UINT vidv_mask_uint64xm8
|
||||
#define VIDV_UINT vidv_uint64xm8
|
||||
#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8
|
||||
#define VADDVX_UINT vaddvx_uint64xm8
|
||||
#define VMVVX_UINT vmvvx_uint64xm8
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLEV_FLOAT vlev_float32xm8
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDMINVS_FLOAT vfredminvs_float32xm8
|
||||
#define MASK_T e32xm8_t
|
||||
#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFMINVV_FLOAT vfminvv_float32xm8
|
||||
#define VMFLEVF_FLOAT vmflevf_e32xm8_float32xm8
|
||||
#define VMFIRSTM vmfirstm_e32xm8
|
||||
#define UINT_V_T uint32xm8_t
|
||||
#define VIDV_MASK_UINT vidv_mask_uint32xm8
|
||||
#define VIDV_UINT vidv_uint32xm8
|
||||
#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8
|
||||
#define VADDVX_UINT vaddvx_uint32xm8
|
||||
#define VMVVX_UINT vmvvx_uint32xm8
|
||||
#endif
|
||||
|
||||
|
||||
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
FLOAT minf=FLT_MAX;
|
||||
unsigned int min_index = 0;
|
||||
if (n <= 0 || inc_x <= 0) return(min_index);
|
||||
|
||||
FLOAT_V_T vx, v_min;
|
||||
UINT_V_T v_min_index;
|
||||
MASK_T mask;
|
||||
unsigned int gvl = 0;
|
||||
if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
v_min = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
v_min_index = VMVVX_UINT(0, gvl);
|
||||
for(i=0,j=0; i < n/gvl; i++){
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
//index where element less than v_min
|
||||
mask = VMFLTVV_FLOAT(vx, v_min, gvl);
|
||||
v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl);
|
||||
/*
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1 \n\t"
|
||||
"vsetvli x0, %2, e64,m8 \n\t"
|
||||
"vid.v %0, v0.t \n\t"
|
||||
:"+v"(v_min_index)
|
||||
:"v"(mask), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1 \n\t"
|
||||
"vsetvli x0, %2, e32,m8 \n\t"
|
||||
"vid.v %0, v0.t \n\t"
|
||||
:"+v"(v_min_index)
|
||||
:"v"(mask), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
*/
|
||||
v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl);
|
||||
|
||||
//update v_min and start_index j
|
||||
v_min = VFMINVV_FLOAT(v_min, vx, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
vx = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
vx = VFREDMINVS_FLOAT(v_min, vx, gvl);
|
||||
minf = vx[0];
|
||||
mask = VMFLEVF_FLOAT(v_min, minf, gvl);
|
||||
min_index = VMFIRSTM(mask,gvl);
|
||||
min_index = v_min_index[min_index];
|
||||
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v_min = VLEV_FLOAT(&x[j], gvl);
|
||||
|
||||
vx = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
vx = VFREDMINVS_FLOAT(v_min, vx, gvl);
|
||||
FLOAT cur_minf = vx[0];
|
||||
if(cur_minf < minf){
|
||||
//tail index
|
||||
v_min_index = VIDV_UINT(gvl);
|
||||
v_min_index = VADDVX_UINT(v_min_index, j, gvl);
|
||||
mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl);
|
||||
min_index = VMFIRSTM(mask,gvl);
|
||||
min_index = v_min_index[min_index];
|
||||
}
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
unsigned int stride_x = inc_x * sizeof(FLOAT);
|
||||
unsigned int idx = 0, inc_v = gvl * inc_x;
|
||||
|
||||
v_min = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
v_min_index = VMVVX_UINT(0, gvl);
|
||||
for(i=0,j=0; i < n/gvl; i++){
|
||||
vx = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
|
||||
//index where element less than v_min
|
||||
mask = VMFLTVV_FLOAT(vx, v_min, gvl);
|
||||
v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl);
|
||||
/*
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1 \n\t"
|
||||
"vsetvli x0, %2, e64,m8 \n\t"
|
||||
"vid.v %0, v0.t \n\t"
|
||||
:"+v"(v_min_index)
|
||||
:"v"(mask), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1 \n\t"
|
||||
"vsetvli x0, %2, e32,m8 \n\t"
|
||||
"vid.v %0, v0.t \n\t"
|
||||
:"+v"(v_min_index)
|
||||
:"v"(mask), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
*/
|
||||
|
||||
v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl);
|
||||
|
||||
//update v_min and start_index j
|
||||
v_min = VFMINVV_FLOAT(v_min, vx, gvl);
|
||||
j += gvl;
|
||||
idx += inc_v;
|
||||
}
|
||||
vx = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
vx = VFREDMINVS_FLOAT(v_min, vx, gvl);
|
||||
minf = vx[0];
|
||||
mask = VMFLEVF_FLOAT(v_min, minf, gvl);
|
||||
min_index = VMFIRSTM(mask,gvl);
|
||||
min_index = v_min_index[min_index];
|
||||
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v_min = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
|
||||
vx = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
vx = VFREDMINVS_FLOAT(v_min, vx, gvl);
|
||||
FLOAT cur_minf = vx[0];
|
||||
if(cur_minf < minf){
|
||||
//tail index
|
||||
v_min_index = VIDV_UINT(gvl);
|
||||
v_min_index = VADDVX_UINT(v_min_index, j, gvl);
|
||||
mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl);
|
||||
min_index = VMFIRSTM(mask,gvl);
|
||||
min_index = v_min_index[min_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
return(min_index+1);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : NoTest
|
||||
* BLASTEST double : NoTest
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
#define CABS1(x,i) ABS(x[i])+ABS(x[i+1])
|
||||
|
||||
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT maxf;
|
||||
BLASLONG max=0;
|
||||
BLASLONG inc_x2;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(max);
|
||||
|
||||
inc_x2 = 2 * inc_x;
|
||||
|
||||
maxf = CABS1(x,0);
|
||||
ix += inc_x2;
|
||||
i++;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( CABS1(x,ix) > maxf )
|
||||
{
|
||||
max = i;
|
||||
maxf = CABS1(x,ix);
|
||||
}
|
||||
ix += inc_x2;
|
||||
i++;
|
||||
}
|
||||
return(max+1);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,246 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8
|
||||
#define MASK_T e64xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8
|
||||
#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8
|
||||
#define VFMAXVV_FLOAT vfmaxvv_float64xm8
|
||||
#define VMFGEVF_FLOAT vmfgevf_e64xm8_float64xm8
|
||||
#define VMFIRSTM vmfirstm_e64xm8
|
||||
#define UINT_V_T uint64xm8_t
|
||||
#define VIDV_MASK_UINT vidv_mask_uint64xm8
|
||||
#define VIDV_UINT vidv_uint64xm8
|
||||
#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8
|
||||
#define VADDVX_UINT vaddvx_uint64xm8
|
||||
#define VFADDVV_FLOAT vfaddvv_float64xm8
|
||||
#define VMVVX_UINT vmvvx_uint64xm8
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8
|
||||
#define MASK_T e32xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8
|
||||
#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8
|
||||
#define VFMAXVV_FLOAT vfmaxvv_float32xm8
|
||||
#define VMFGEVF_FLOAT vmfgevf_e32xm8_float32xm8
|
||||
#define VMFIRSTM vmfirstm_e32xm8
|
||||
#define UINT_V_T uint32xm8_t
|
||||
#define VIDV_MASK_UINT vidv_mask_uint32xm8
|
||||
#define VIDV_UINT vidv_uint32xm8
|
||||
#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8
|
||||
#define VADDVX_UINT vaddvx_uint32xm8
|
||||
#define VFADDVV_FLOAT vfaddvv_float32xm8
|
||||
#define VMVVX_UINT vmvvx_uint32xm8
|
||||
#endif
|
||||
|
||||
#define RVV_M RVV_M8
|
||||
|
||||
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
FLOAT maxf=0.0;
|
||||
unsigned int max_index = 0;
|
||||
if (n <= 0 || inc_x <= 0) return(max_index);
|
||||
|
||||
FLOAT_V_T vx0, vx1, v_max;
|
||||
UINT_V_T v_max_index;
|
||||
MASK_T mask0, mask1;
|
||||
unsigned int gvl = 0;
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
v_max_index = VMVVX_UINT(0, gvl);
|
||||
v_max = VFMVVF_FLOAT(-1, gvl);
|
||||
BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT);
|
||||
BLASLONG inc_xv = gvl * inc_x * 2;
|
||||
BLASLONG ix = 0;
|
||||
for(i=0,j=0; i < n/gvl; i++){
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask0 = VMFLTVF_FLOAT(vx0, 0, gvl);
|
||||
vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl);
|
||||
/*
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
*/
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask1 = VMFLTVF_FLOAT(vx1, 0, gvl);
|
||||
vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl);
|
||||
/*
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
*/
|
||||
vx0 = VFADDVV_FLOAT(vx0, vx1, gvl);
|
||||
|
||||
//index where element greater than v_max
|
||||
mask0 = VMFLTVV_FLOAT(v_max, vx0, gvl);
|
||||
v_max_index = VIDV_MASK_UINT(v_max_index, mask0, gvl);
|
||||
/*
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1 \n\t"
|
||||
"vsetvli x0, %2, e64,m8 \n\t"
|
||||
"vid.v %0, v0.t \n\t"
|
||||
:"+v"(v_max_index)
|
||||
:"v"(mask0), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1 \n\t"
|
||||
"vsetvli x0, %2, e32,m8 \n\t"
|
||||
"vid.v %0, v0.t \n\t"
|
||||
:"+v"(v_max_index)
|
||||
:"v"(mask0), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
*/
|
||||
v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask0, gvl);
|
||||
|
||||
//update v_max and start_index j
|
||||
v_max = VFMAXVV_FLOAT(v_max, vx0, gvl);
|
||||
j += gvl;
|
||||
ix += inc_xv;
|
||||
}
|
||||
vx0 = VFMVVF_FLOAT(0, gvl);
|
||||
vx0 = VFREDMAXVS_FLOAT(v_max, vx0, gvl);
|
||||
maxf = vx0[0];
|
||||
mask0 = VMFGEVF_FLOAT(v_max, maxf, gvl);
|
||||
max_index = VMFIRSTM(mask0,gvl);
|
||||
max_index = v_max_index[max_index];
|
||||
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v_max_index = VMVVX_UINT(0, gvl);
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask0 = VMFLTVF_FLOAT(vx0, 0, gvl);
|
||||
vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl);
|
||||
/*
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
*/
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask1 = VMFLTVF_FLOAT(vx1, 0, gvl);
|
||||
vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl);
|
||||
/*
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
*/
|
||||
v_max = VFADDVV_FLOAT(vx0, vx1, gvl);
|
||||
vx0 = VFMVVF_FLOAT(0, gvl);
|
||||
vx0 = VFREDMAXVS_FLOAT(v_max, vx0, gvl);
|
||||
FLOAT cur_maxf = vx0[0];
|
||||
if(cur_maxf > maxf){
|
||||
//tail index
|
||||
v_max_index = VIDV_UINT(gvl);
|
||||
v_max_index = VADDVX_UINT(v_max_index, j, gvl);
|
||||
|
||||
mask0 = VMFGEVF_FLOAT(v_max, cur_maxf, gvl);
|
||||
max_index = VMFIRSTM(mask0,gvl);
|
||||
max_index = v_max_index[max_index];
|
||||
}
|
||||
}
|
||||
return(max_index+1);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : NoTest
|
||||
* BLASTEST double : NoTest
|
||||
* CTEST : NoTest
|
||||
* TEST : NoTest
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
#define CABS1(x,i) ABS(x[i])+ABS(x[i+1])
|
||||
|
||||
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT minf;
|
||||
BLASLONG min=0;
|
||||
BLASLONG inc_x2;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(min);
|
||||
|
||||
inc_x2 = 2 * inc_x;
|
||||
|
||||
minf = CABS1(x,0);
|
||||
ix += inc_x2;
|
||||
i++;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( CABS1(x,ix) < minf )
|
||||
{
|
||||
min = i;
|
||||
minf = CABS1(x,ix);
|
||||
}
|
||||
ix += inc_x2;
|
||||
i++;
|
||||
}
|
||||
return(min+1);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,247 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDMINVS_FLOAT vfredminvs_float64xm8
|
||||
#define MASK_T e64xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8
|
||||
#define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8
|
||||
#define VFMINVV_FLOAT vfminvv_float64xm8
|
||||
#define VMFLEVF_FLOAT vmflevf_e64xm8_float64xm8
|
||||
#define VMFIRSTM vmfirstm_e64xm8
|
||||
#define UINT_V_T uint64xm8_t
|
||||
#define VIDV_MASK_UINT vidv_mask_uint64xm8
|
||||
#define VIDV_UINT vidv_uint64xm8
|
||||
#define VADDVX_MASK_UINT vaddvx_mask_uint64xm8
|
||||
#define VADDVX_UINT vaddvx_uint64xm8
|
||||
#define VFADDVV_FLOAT vfaddvv_float64xm8
|
||||
#define VMVVX_UINT vmvvx_uint64xm8
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDMINVS_FLOAT vfredminvs_float32xm8
|
||||
#define MASK_T e32xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8
|
||||
#define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8
|
||||
#define VFMINVV_FLOAT vfminvv_float32xm8
|
||||
#define VMFLEVF_FLOAT vmflevf_e32xm8_float32xm8
|
||||
#define VMFIRSTM vmfirstm_e32xm8
|
||||
#define UINT_V_T uint32xm8_t
|
||||
#define VIDV_MASK_UINT vidv_mask_uint32xm8
|
||||
#define VIDV_UINT vidv_uint32xm8
|
||||
#define VADDVX_MASK_UINT vaddvx_mask_uint32xm8
|
||||
#define VADDVX_UINT vaddvx_uint32xm8
|
||||
#define VFADDVV_FLOAT vfaddvv_float32xm8
|
||||
#define VMVVX_UINT vmvvx_uint32xm8
|
||||
#endif
|
||||
|
||||
#define RVV_M RVV_M8
|
||||
|
||||
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
FLOAT minf=FLT_MAX;
|
||||
unsigned int min_index = 0;
|
||||
if (n <= 0 || inc_x <= 0) return(min_index);
|
||||
|
||||
FLOAT_V_T vx0, vx1, v_min;
|
||||
UINT_V_T v_min_index;
|
||||
MASK_T mask0, mask1;
|
||||
unsigned int gvl = 0;
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
v_min_index = VMVVX_UINT(0, gvl);
|
||||
v_min = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT);
|
||||
BLASLONG inc_xv = gvl * inc_x * 2;
|
||||
BLASLONG ix = 0;
|
||||
for(i=0,j=0; i < n/gvl; i++){
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask0 = VMFLTVF_FLOAT(vx0, 0, gvl);
|
||||
vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl);
|
||||
/*
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
*/
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask1 = VMFLTVF_FLOAT(vx1, 0, gvl);
|
||||
vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl);
|
||||
/*
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
*/
|
||||
vx0 = VFADDVV_FLOAT(vx0, vx1, gvl);
|
||||
|
||||
//index where element less than v_min
|
||||
mask0 = VMFLTVV_FLOAT(vx0, v_min, gvl);
|
||||
v_min_index = VIDV_MASK_UINT(v_min_index, mask0, gvl);
|
||||
/*
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1 \n\t"
|
||||
"vsetvli x0, %2, e64,m8 \n\t"
|
||||
"vid.v %0, v0.t \n\t"
|
||||
:"+v"(v_min_index)
|
||||
:"v"(mask0), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1 \n\t"
|
||||
"vsetvli x0, %2, e32,m8 \n\t"
|
||||
"vid.v %0, v0.t \n\t"
|
||||
:"+v"(v_min_index)
|
||||
:"v"(mask0), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
*/
|
||||
v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask0, gvl);
|
||||
|
||||
//update v_min and start_index j
|
||||
v_min = VFMINVV_FLOAT(v_min, vx0, gvl);
|
||||
j += gvl;
|
||||
ix += inc_xv;
|
||||
}
|
||||
vx0 = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
vx0 = VFREDMINVS_FLOAT(v_min, vx0, gvl);
|
||||
minf = vx0[0];
|
||||
mask0 = VMFLEVF_FLOAT(v_min, minf, gvl);
|
||||
min_index = VMFIRSTM(mask0,gvl);
|
||||
min_index = v_min_index[min_index];
|
||||
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v_min_index = VMVVX_UINT(0, gvl);
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask0 = VMFLTVF_FLOAT(vx0, 0, gvl);
|
||||
vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl);
|
||||
/*
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx0)
|
||||
:"v"(mask0), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
*/
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask1 = VMFLTVF_FLOAT(vx1, 0, gvl);
|
||||
vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl);
|
||||
/*
|
||||
#if defined(DOUBLE)
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e64,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#else
|
||||
asm volatile(
|
||||
"vor.vv v0, %1, %1\n\t"
|
||||
"vsetvli x0, %3, e32,m8 \n\t"
|
||||
"vfrsub.vf %0, %0, %2, v0.t \n\t"
|
||||
:"+v"(vx1)
|
||||
:"v"(mask1), "f"(zero), "r"(gvl)
|
||||
:"v0");
|
||||
#endif
|
||||
*/
|
||||
v_min = VFADDVV_FLOAT(vx0, vx1, gvl);
|
||||
vx0 = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
vx0 = VFREDMINVS_FLOAT(v_min, vx0, gvl);
|
||||
FLOAT cur_minf = vx0[0];
|
||||
if(cur_minf < minf){
|
||||
//tail index
|
||||
v_min_index = VIDV_UINT(gvl);
|
||||
v_min_index = VADDVX_UINT(v_min_index, j, gvl);
|
||||
|
||||
mask0 = VMFLEVF_FLOAT(v_min, cur_minf, gvl);
|
||||
min_index = VMFIRSTM(mask0,gvl);
|
||||
min_index = v_min_index[min_index];
|
||||
}
|
||||
}
|
||||
return(min_index+1);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : NoTest
|
||||
* BLASTEST double : NoTest
|
||||
* CTEST : NoTest
|
||||
* TEST : NoTest
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT maxf=0.0;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(maxf);
|
||||
|
||||
maxf=x[0];
|
||||
ix += inc_x;
|
||||
i++;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( x[ix] > maxf )
|
||||
{
|
||||
maxf = x[ix];
|
||||
}
|
||||
ix += inc_x;
|
||||
i++;
|
||||
}
|
||||
return(maxf);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLEV_FLOAT vlev_float32xm8
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFMAXVV_FLOAT vfmaxvv_float32xm8
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLEV_FLOAT vlev_float64xm8
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFMAXVV_FLOAT vfmaxvv_float64xm8
|
||||
#endif
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
if (n <= 0 || inc_x <= 0) return(0.0);
|
||||
FLOAT maxf=-FLT_MAX;
|
||||
unsigned int gvl = 0;
|
||||
FLOAT_V_T v0, v1, v_max;
|
||||
|
||||
if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n/2){
|
||||
v_max = VFMVVF_FLOAT(-FLT_MAX, gvl);
|
||||
for(i=0,j=0; i<n/(gvl*2); i++){
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
v_max = VFMAXVV_FLOAT(v_max, v0, gvl);
|
||||
|
||||
v1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
v_max = VFMAXVV_FLOAT(v_max, v1, gvl);
|
||||
j += gvl * 2;
|
||||
}
|
||||
v1 = VFMVVF_FLOAT(-FLT_MAX, gvl);
|
||||
v0 = VFREDMAXVS_FLOAT(v_max, v1, gvl);
|
||||
maxf = v0[0];
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
v1 = VFMVVF_FLOAT(-FLT_MAX, gvl);
|
||||
v0 = VFREDMAXVS_FLOAT(v0, v1, gvl);
|
||||
if(v0[0] > maxf)
|
||||
maxf = v0[0];
|
||||
j += gvl;
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG stride_x = inc_x * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
v_max = VFMVVF_FLOAT(-FLT_MAX, gvl);
|
||||
BLASLONG idx = 0, inc_xv = inc_x * gvl;
|
||||
for(i=0,j=0; i<n/(gvl*2); i++){
|
||||
v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
v_max = VFMAXVV_FLOAT(v_max, v0, gvl);
|
||||
|
||||
v1 = VLSEV_FLOAT(&x[idx+inc_xv], stride_x, gvl);
|
||||
v_max = VFMAXVV_FLOAT(v_max, v1, gvl);
|
||||
j += gvl * 2;
|
||||
idx += inc_xv * 2;
|
||||
}
|
||||
v1 = VFMVVF_FLOAT(-FLT_MAX, gvl);
|
||||
v0 = VFREDMAXVS_FLOAT(v_max, v1, gvl);
|
||||
maxf = v0[0];
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
v1 = VFMVVF_FLOAT(-FLT_MAX, gvl);
|
||||
v0 = VFREDMAXVS_FLOAT(v0, v1, gvl);
|
||||
if(v0[0] > maxf)
|
||||
maxf = v0[0];
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
return(maxf);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : NoTest
|
||||
* BLASTEST double : NoTest
|
||||
* CTEST : NoTest
|
||||
* TEST : NoTest
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT minf=0.0;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(minf);
|
||||
|
||||
minf=x[0];
|
||||
ix += inc_x;
|
||||
i++;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( x[ix] < minf )
|
||||
{
|
||||
minf = x[ix];
|
||||
}
|
||||
ix += inc_x;
|
||||
i++;
|
||||
}
|
||||
return(minf);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLEV_FLOAT vlev_float32xm8
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDMINVS_FLOAT vfredminvs_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFMINVV_FLOAT vfminvv_float32xm8
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLEV_FLOAT vlev_float64xm8
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDMINVS_FLOAT vfredminvs_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFMINVV_FLOAT vfminvv_float64xm8
|
||||
#endif
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
if (n <= 0 || inc_x <= 0) return(0.0);
|
||||
FLOAT minf=FLT_MAX;
|
||||
unsigned int gvl = 0;
|
||||
FLOAT_V_T v0, v1, v_min;
|
||||
|
||||
if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n/2){
|
||||
v_min = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
for(i=0,j=0; i<n/(gvl*2); i++){
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
v_min = VFMINVV_FLOAT(v_min, v0, gvl);
|
||||
|
||||
v1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
v_min = VFMINVV_FLOAT(v_min, v1, gvl);
|
||||
j += gvl * 2;
|
||||
}
|
||||
v1 = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
v0 = VFREDMINVS_FLOAT(v_min, v1, gvl);
|
||||
minf = v0[0];
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
v1 = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
v0 = VFREDMINVS_FLOAT(v0, v1, gvl);
|
||||
if(v0[0] < minf)
|
||||
minf = v0[0];
|
||||
j += gvl;
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG stride_x = inc_x * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
v_min = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
BLASLONG idx = 0, inc_xv = inc_x * gvl;
|
||||
for(i=0,j=0; i<n/(gvl*2); i++){
|
||||
v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
v_min = VFMINVV_FLOAT(v_min, v0, gvl);
|
||||
|
||||
v1 = VLSEV_FLOAT(&x[idx+inc_xv], stride_x, gvl);
|
||||
v_min = VFMINVV_FLOAT(v_min, v1, gvl);
|
||||
j += gvl * 2;
|
||||
idx += inc_xv * 2;
|
||||
}
|
||||
v1 = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
v0 = VFREDMINVS_FLOAT(v_min, v1, gvl);
|
||||
minf = v0[0];
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
v1 = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
v0 = VFREDMINVS_FLOAT(v0, v1, gvl);
|
||||
if(v0[0] < minf)
|
||||
minf = v0[0];
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
return(minf);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/13 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
FLOAT scale = 0.0;
|
||||
FLOAT ssq = 1.0;
|
||||
FLOAT absxi = 0.0;
|
||||
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(0.0);
|
||||
if ( n == 1 ) return( ABS(x[0]) );
|
||||
|
||||
n *= inc_x;
|
||||
while(i < n)
|
||||
{
|
||||
|
||||
if ( x[i] != 0.0 )
|
||||
{
|
||||
absxi = ABS( x[i] );
|
||||
if ( scale < absxi )
|
||||
{
|
||||
ssq = 1 + ssq * ( scale / absxi ) * ( scale / absxi );
|
||||
scale = absxi ;
|
||||
}
|
||||
else
|
||||
{
|
||||
ssq += ( absxi/scale ) * ( absxi/scale );
|
||||
}
|
||||
|
||||
}
|
||||
i += inc_x;
|
||||
}
|
||||
scale = scale * sqrt( ssq );
|
||||
return(scale);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,220 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLEV_FLOAT vlev_float32xm4
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float32xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float32xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm4
|
||||
#define VFDOTVV_FLOAT vfdotvv_float32xm4
|
||||
#define ABS fabsf
|
||||
#define MASK_T e32xm4_t
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm4
|
||||
#define VMFGTVF_FLOAT vmfgtvf_e32xm4_float32xm4
|
||||
#define VMFIRSTM vmfirstm_e32xm4
|
||||
#define VFDIVVF_FLOAT vfdivvf_float32xm4
|
||||
#define VMFLTVF_FLOAT vmfltvf_e32xm4_float32xm4
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLEV_FLOAT vlev_float64xm4
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float64xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float64xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm4
|
||||
#define VFDOTVV_FLOAT vfdotvv_float64xm4
|
||||
#define ABS fabs
|
||||
#define MASK_T e64xm4_t
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm4
|
||||
#define VMFGTVF_FLOAT vmfgtvf_e64xm4_float64xm4
|
||||
#define VMFIRSTM vmfirstm_e64xm4
|
||||
#define VFDIVVF_FLOAT vfdivvf_float64xm4
|
||||
#define VMFLTVF_FLOAT vmfltvf_e64xm4_float64xm4
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm4
|
||||
#endif
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
|
||||
if ( n < 0 ) return(0.0);
|
||||
if(n == 1) return (ABS(x[0]));
|
||||
|
||||
FLOAT_V_T vr, v0, v_zero;
|
||||
unsigned int gvl = 0;
|
||||
FLOAT scale = 0.0, ssq = 0.0;
|
||||
MASK_T mask;
|
||||
BLASLONG index = 0;
|
||||
if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
v_zero = VFMVVF_FLOAT(0, gvl);
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl);
|
||||
//if scale change
|
||||
mask = VMFGTVF_FLOAT(v0, scale, gvl);
|
||||
index = VMFIRSTM(mask, gvl);
|
||||
if(index == -1){//no elements greater than scale
|
||||
if(scale != 0.0){
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, v0, v0, gvl);
|
||||
}
|
||||
}else{//found greater element
|
||||
//ssq in vector vr: vr[0]
|
||||
vr = VFREDSUM_FLOAT(vr, v_zero, gvl);
|
||||
//total ssq before current vector
|
||||
ssq += vr[0];
|
||||
//find max
|
||||
vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl);
|
||||
//update ssq before max_index
|
||||
ssq = ssq * (scale/vr[0])*(scale/vr[0]);
|
||||
//update scale
|
||||
scale = vr[0];
|
||||
//ssq in vector vr
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl);
|
||||
}
|
||||
j += gvl;
|
||||
}
|
||||
//ssq in vector vr: vr[0]
|
||||
vr = VFREDSUM_FLOAT(vr, v_zero, gvl);
|
||||
//total ssq now
|
||||
ssq += vr[0];
|
||||
|
||||
//tail
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl);
|
||||
//if scale change
|
||||
mask = VMFGTVF_FLOAT(v0, scale, gvl);
|
||||
index = VMFIRSTM(mask, gvl);
|
||||
if(index == -1){//no elements greater than scale
|
||||
if(scale != 0.0)
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
}else{//found greater element
|
||||
//find max
|
||||
vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl);
|
||||
//update ssq before max_index
|
||||
ssq = ssq * (scale/vr[0])*(scale/vr[0]);
|
||||
//update scale
|
||||
scale = vr[0];
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
}
|
||||
vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl);
|
||||
//ssq in vector vr: vr[0]
|
||||
vr = VFREDSUM_FLOAT(vr, v_zero, gvl);
|
||||
//total ssq now
|
||||
ssq += vr[0];
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
v_zero = VFMVVF_FLOAT(0, gvl);
|
||||
unsigned int stride_x = inc_x * sizeof(FLOAT);
|
||||
int idx = 0, inc_v = inc_x * gvl;
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl);
|
||||
//if scale change
|
||||
mask = VMFGTVF_FLOAT(v0, scale, gvl);
|
||||
index = VMFIRSTM(mask, gvl);
|
||||
if(index == -1){//no elements greater than scale
|
||||
if(scale != 0.0){
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, v0, v0, gvl);
|
||||
}
|
||||
}else{//found greater element
|
||||
//ssq in vector vr: vr[0]
|
||||
vr = VFREDSUM_FLOAT(vr, v_zero, gvl);
|
||||
//total ssq before current vector
|
||||
ssq += vr[0];
|
||||
//find max
|
||||
vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl);
|
||||
//update ssq before max_index
|
||||
ssq = ssq * (scale/vr[0])*(scale/vr[0]);
|
||||
//update scale
|
||||
scale = vr[0];
|
||||
//ssq in vector vr
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl);
|
||||
}
|
||||
j += gvl;
|
||||
idx += inc_v;
|
||||
}
|
||||
//ssq in vector vr: vr[0]
|
||||
vr = VFREDSUM_FLOAT(vr, v_zero, gvl);
|
||||
//total ssq now
|
||||
ssq += vr[0];
|
||||
|
||||
//tail
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl);
|
||||
//if scale change
|
||||
mask = VMFGTVF_FLOAT(v0, scale, gvl);
|
||||
index = VMFIRSTM(mask, gvl);
|
||||
if(index == -1){//no elements greater than scale
|
||||
if(scale != 0.0)
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
}else{//found greater element
|
||||
//find max
|
||||
vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl);
|
||||
//update ssq before max_index
|
||||
ssq = ssq * (scale/vr[0])*(scale/vr[0]);
|
||||
//update scale
|
||||
scale = vr[0];
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
}
|
||||
vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl);
|
||||
//ssq in vector vr: vr[0]
|
||||
vr = VFREDSUM_FLOAT(vr, v_zero, gvl);
|
||||
//total ssq now
|
||||
ssq += vr[0];
|
||||
}
|
||||
}
|
||||
return(scale * sqrt(ssq));
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,128 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLEV_FLOAT vlev_float32xm8
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float32xm8
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFDOTVV_FLOAT vfdotvv_float32xm8
|
||||
#define ABS fabsf
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLEV_FLOAT vlev_float64xm8
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float64xm8
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFDOTVV_FLOAT vfdotvv_float64xm8
|
||||
#define ABS fabs
|
||||
#endif
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
double len = 0.0 ;
|
||||
|
||||
if ( n < 0 ) return(0.0);
|
||||
if(n == 1) return (ABS(x[0]));
|
||||
|
||||
FLOAT_V_T vr, v0, v1;
|
||||
unsigned int gvl = 0;
|
||||
if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl < n/2){
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
for(i=0,j=0; i<n/(2*gvl); i++){
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, v0, v0, gvl);
|
||||
j += gvl;
|
||||
|
||||
v1 = VLEV_FLOAT(&x[j], gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, v1, v1, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
v0 = VFMVVF_FLOAT(0, gvl);
|
||||
v0 = VFREDSUM_FLOAT(vr, v0, gvl);
|
||||
len += v0[0];
|
||||
}
|
||||
//tail
|
||||
for(;j < n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
//v1 = 0
|
||||
v1 = VFMVVF_FLOAT(0, gvl);
|
||||
//vr = VFDOTVV_FLOAT(v0, v0, gvl);
|
||||
vr = VFMACCVV_FLOAT(v1, v0, v0, gvl);
|
||||
v0 = VFREDSUM_FLOAT(vr, v1, gvl);
|
||||
len += v0[0];
|
||||
|
||||
j += gvl;
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
unsigned int stride_x = inc_x * sizeof(FLOAT);
|
||||
if(gvl < n/2){
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
for(i=0,j=0; i<n/(2*gvl); i++){
|
||||
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, v0, v0, gvl);
|
||||
j += gvl;
|
||||
|
||||
v1 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, v1, v1, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
v0 = VFMVVF_FLOAT(0, gvl);
|
||||
v0 = VFREDSUM_FLOAT(vr, v0, gvl);
|
||||
len += v0[0];
|
||||
}
|
||||
//tail
|
||||
for(;j < n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
//v1 = 0
|
||||
v1 = VFMVVF_FLOAT(0, gvl);
|
||||
//vr = VFDOTVV_FLOAT(v0, v0, gvl);
|
||||
vr = VFMACCVV_FLOAT(v1, v0, v0, gvl);
|
||||
v0 = VFREDSUM_FLOAT(vr, v1, gvl);
|
||||
len += v0[0];
|
||||
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
return(sqrt(len));
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order ColMajor
|
||||
* No Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
bptr = b;
|
||||
|
||||
if ( alpha == 0.0 )
|
||||
{
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[j] = 0.0;
|
||||
}
|
||||
bptr += ldb;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
if ( alpha == 1.0 )
|
||||
{
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[j] = aptr[j];
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[j] = alpha * aptr[j];
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order ColMajor
|
||||
* Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
|
||||
if ( alpha == 0.0 )
|
||||
{
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
bptr = &b[i];
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[j*ldb] = 0.0;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
if ( alpha == 1.0 )
|
||||
{
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
bptr = &b[i];
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[j*ldb] = aptr[j];
|
||||
}
|
||||
aptr += lda;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
bptr = &b[i];
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[j*ldb] = alpha * aptr[j];
|
||||
}
|
||||
aptr += lda;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order rowMajor
|
||||
* No Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
bptr = b;
|
||||
|
||||
if ( alpha == 0.0 )
|
||||
{
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[j] = 0.0;
|
||||
}
|
||||
bptr += ldb;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
if ( alpha == 1.0 )
|
||||
{
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[j] = aptr[j];
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[j] = alpha * aptr[j];
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order rowMajor
|
||||
* Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
bptr = &b[i];
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[j*ldb] = alpha * aptr[j];
|
||||
}
|
||||
aptr += lda;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0,iy=0;
|
||||
FLOAT temp;
|
||||
|
||||
if ( n <= 0 ) return(0);
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
temp = c*x[ix] + s*y[iy] ;
|
||||
y[iy] = c*y[iy] - s*x[ix] ;
|
||||
x[ix] = temp ;
|
||||
|
||||
ix += inc_x ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
|
||||
}
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,196 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLEV_FLOAT vlev_float32xm4
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VSEV_FLOAT vsev_float32xm4
|
||||
#define VSSEV_FLOAT vssev_float32xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float32xm4
|
||||
#define VFMULVF_FLOAT vfmulvf_float32xm4
|
||||
#define VFMSACVF_FLOAT vfmsacvf_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLEV_FLOAT vlev_float64xm4
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VSEV_FLOAT vsev_float64xm4
|
||||
#define VSSEV_FLOAT vssev_float64xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float64xm4
|
||||
#define VFMULVF_FLOAT vfmulvf_float64xm4
|
||||
#define VFMSACVF_FLOAT vfmsacvf_float64xm4
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
BLASLONG ix=0,iy=0;
|
||||
|
||||
if(n <= 0) return(0);
|
||||
unsigned int gvl = 0;
|
||||
FLOAT_V_T v0, v1, vx, vy;
|
||||
|
||||
if(inc_x == 1 && inc_y == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
vy = VLEV_FLOAT(&y[j], gvl);
|
||||
|
||||
v0 = VFMULVF_FLOAT(vx, c, gvl);
|
||||
v0 = VFMACCVF_FLOAT(v0, s, vy, gvl);
|
||||
VSEV_FLOAT(&x[j], v0, gvl);
|
||||
|
||||
v1 = VFMULVF_FLOAT(vx, s, gvl);
|
||||
v1 = VFMSACVF_FLOAT(v1, c, vy, gvl);
|
||||
VSEV_FLOAT(&y[j], v1, gvl);
|
||||
|
||||
j += gvl;
|
||||
}
|
||||
if(j<n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
vy = VLEV_FLOAT(&y[j], gvl);
|
||||
|
||||
v0 = VFMULVF_FLOAT(vx, c, gvl);
|
||||
v0 = VFMACCVF_FLOAT(v0, s, vy, gvl);
|
||||
VSEV_FLOAT(&x[j], v0, gvl);
|
||||
|
||||
v1 = VFMULVF_FLOAT(vx, s, gvl);
|
||||
v1 = VFMSACVF_FLOAT(v1, c, vy, gvl);
|
||||
VSEV_FLOAT(&y[j], v1, gvl);
|
||||
}
|
||||
}else if(inc_y == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG stride_x = inc_x * sizeof(FLOAT);
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vy = VLEV_FLOAT(&y[j], gvl);
|
||||
|
||||
v0 = VFMULVF_FLOAT(vx, c, gvl);
|
||||
v0 = VFMACCVF_FLOAT(v0, s, vy, gvl);
|
||||
VSSEV_FLOAT(&x[ix], stride_x, v0, gvl);
|
||||
|
||||
v1 = VFMULVF_FLOAT(vx, s, gvl);
|
||||
v1 = VFMSACVF_FLOAT(v1, c, vy, gvl);
|
||||
VSEV_FLOAT(&y[j], v1, gvl);
|
||||
|
||||
j += gvl;
|
||||
ix += inc_xv;
|
||||
}
|
||||
if(j<n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
vy = VLEV_FLOAT(&y[j], gvl);
|
||||
|
||||
v0 = VFMULVF_FLOAT(vx, c, gvl);
|
||||
v0 = VFMACCVF_FLOAT(v0, s, vy, gvl);
|
||||
VSSEV_FLOAT(&x[j*inc_x], stride_x, v0, gvl);
|
||||
|
||||
v1 = VFMULVF_FLOAT(vx, s, gvl);
|
||||
v1 = VFMSACVF_FLOAT(v1, c, vy, gvl);
|
||||
VSEV_FLOAT(&y[j], v1, gvl);
|
||||
}
|
||||
}else if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG stride_y = inc_y * sizeof(FLOAT);
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
vy = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
|
||||
v0 = VFMULVF_FLOAT(vx, c, gvl);
|
||||
v0 = VFMACCVF_FLOAT(v0, s, vy, gvl);
|
||||
VSEV_FLOAT(&x[j], v0, gvl);
|
||||
|
||||
v1 = VFMULVF_FLOAT(vx, s, gvl);
|
||||
v1 = VFMSACVF_FLOAT(v1, c, vy, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, v1, gvl);
|
||||
|
||||
j += gvl;
|
||||
iy += inc_yv;
|
||||
}
|
||||
if(j<n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx = VLEV_FLOAT(&x[j], gvl);
|
||||
vy = VLSEV_FLOAT(&y[j*inc_y],stride_y, gvl);
|
||||
|
||||
v0 = VFMULVF_FLOAT(vx, c, gvl);
|
||||
v0 = VFMACCVF_FLOAT(v0, s, vy, gvl);
|
||||
VSEV_FLOAT(&x[j], v0, gvl);
|
||||
|
||||
v1 = VFMULVF_FLOAT(vx, s, gvl);
|
||||
v1 = VFMSACVF_FLOAT(v1, c, vy, gvl);
|
||||
VSSEV_FLOAT(&y[j*inc_y], stride_y, v1, gvl);
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG stride_x = inc_x * sizeof(FLOAT);
|
||||
BLASLONG stride_y = inc_y * sizeof(FLOAT);
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vy = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
|
||||
v0 = VFMULVF_FLOAT(vx, c, gvl);
|
||||
v0 = VFMACCVF_FLOAT(v0, s, vy, gvl);
|
||||
VSSEV_FLOAT(&x[ix], stride_x, v0, gvl);
|
||||
|
||||
v1 = VFMULVF_FLOAT(vx, s, gvl);
|
||||
v1 = VFMSACVF_FLOAT(v1, c, vy, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, v1, gvl);
|
||||
|
||||
j += gvl;
|
||||
ix += inc_xv;
|
||||
iy += inc_yv;
|
||||
}
|
||||
if(j<n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx = VLSEV_FLOAT(&x[j*inc_x],stride_x, gvl);
|
||||
vy = VLSEV_FLOAT(&y[j*inc_y],stride_y, gvl);
|
||||
|
||||
v0 = VFMULVF_FLOAT(vx, c, gvl);
|
||||
v0 = VFMACCVF_FLOAT(v0, s, vy, gvl);
|
||||
VSSEV_FLOAT(&x[j*inc_x], stride_x, v0, gvl);
|
||||
|
||||
v1 = VFMULVF_FLOAT(vx, s, gvl);
|
||||
v1 = VFMSACVF_FLOAT(v1, c, vy, gvl);
|
||||
VSSEV_FLOAT(&y[j*inc_y], stride_y, v1, gvl);
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
|
||||
{
|
||||
BLASLONG i=0,j=0;
|
||||
|
||||
if ( (n <= 0) || (inc_x <= 0))
|
||||
return(0);
|
||||
|
||||
|
||||
while(j < n)
|
||||
{
|
||||
|
||||
if ( da == 0.0 )
|
||||
x[i]=0.0;
|
||||
else
|
||||
x[i] = da * x[i] ;
|
||||
|
||||
i += inc_x ;
|
||||
j++;
|
||||
|
||||
}
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLEV_FLOAT vlev_float32xm8
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VSEV_FLOAT vsev_float32xm8
|
||||
#define VSSEV_FLOAT vssev_float32xm8
|
||||
#define VFMULVF_FLOAT vfmulvf_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLEV_FLOAT vlev_float64xm8
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VSEV_FLOAT vsev_float64xm8
|
||||
#define VSSEV_FLOAT vssev_float64xm8
|
||||
#define VFMULVF_FLOAT vfmulvf_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
|
||||
{
|
||||
BLASLONG i=0,j=0;
|
||||
|
||||
if ( (n <= 0) || (inc_x <= 0))
|
||||
return(0);
|
||||
|
||||
FLOAT_V_T v0, v1;
|
||||
unsigned int gvl = 0;
|
||||
if(inc_x == 1){
|
||||
if(da == 0.0){
|
||||
memset(&x[0], 0, n * sizeof(FLOAT));
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n / 2){
|
||||
for(i = 0, j = 0; i < n/(2*gvl); i++, j+=2*gvl){
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
v0 = VFMULVF_FLOAT(v0, da,gvl);
|
||||
VSEV_FLOAT(&x[j], v0, gvl);
|
||||
|
||||
v1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
v1 = VFMULVF_FLOAT(v1, da, gvl);
|
||||
VSEV_FLOAT(&x[j+gvl], v1, gvl);
|
||||
}
|
||||
}
|
||||
//tail
|
||||
for(; j <n; ){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
v0 = VFMULVF_FLOAT(v0, da, gvl);
|
||||
VSEV_FLOAT(&x[j], v0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
}else{
|
||||
if(da == 0.0){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n / 2){
|
||||
v0 = VFMVVF_FLOAT(0, gvl);
|
||||
for(i = 0, j = 0; i < n/(2*gvl); i++, j+=2*gvl){
|
||||
VSEV_FLOAT(&x[j], v0, gvl);
|
||||
VSEV_FLOAT(&x[j+gvl], v0, gvl);
|
||||
}
|
||||
}
|
||||
//tail
|
||||
for(; j <n; ){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VFMVVF_FLOAT(0, gvl);
|
||||
VSEV_FLOAT(&x[j], v0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG stride_x = inc_x * sizeof(FLOAT);
|
||||
BLASLONG ix = 0;
|
||||
if(gvl < n / 2){
|
||||
BLASLONG inc_xv = gvl * inc_x;
|
||||
for(i = 0, j = 0; i < n/(2*gvl); i++, j+=2*gvl){
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
v0 = VFMULVF_FLOAT(v0, da,gvl);
|
||||
VSSEV_FLOAT(&x[ix], stride_x, v0, gvl);
|
||||
|
||||
v1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
|
||||
v1 = VFMULVF_FLOAT(v1, da, gvl);
|
||||
VSSEV_FLOAT(&x[ix+inc_xv], stride_x, v1, gvl);
|
||||
ix += inc_xv * 2;
|
||||
}
|
||||
}
|
||||
//tail
|
||||
for(; j <n; ){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
v0 = VFMULVF_FLOAT(v0, da, gvl);
|
||||
VSSEV_FLOAT(&x[ix], stride_x, v0, gvl);
|
||||
j += gvl;
|
||||
ix += inc_x * gvl;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,62 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/08/20 Saar
|
||||
* BLASTEST float OK
|
||||
* BLASTEST double OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0,iy=0;
|
||||
FLOAT temp;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
|
||||
temp = x[ix] ;
|
||||
x[ix] = y[iy] ;
|
||||
y[iy] = temp ;
|
||||
|
||||
ix += inc_x ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
|
||||
}
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,173 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <stdio.h>
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLEV_FLOAT vlev_float32xm8
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VSEV_FLOAT vsev_float32xm8
|
||||
#define VSSEV_FLOAT vssev_float32xm8
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLEV_FLOAT vlev_float64xm8
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VSEV_FLOAT vsev_float64xm8
|
||||
#define VSSEV_FLOAT vssev_float64xm8
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
|
||||
{
|
||||
BLASLONG i = 0, j = 0;
|
||||
BLASLONG ix = 0,iy = 0;
|
||||
BLASLONG stride_x, stride_y;
|
||||
FLOAT_V_T vx0, vx1, vy0, vy1;
|
||||
unsigned int gvl = 0;
|
||||
|
||||
if (n < 0) return(0);
|
||||
if(inc_x == 1 && inc_y == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n/2){
|
||||
for(i=0,j=0; i<n/(2*gvl); i++){
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
VSEV_FLOAT(&x[j], vy0, gvl);
|
||||
VSEV_FLOAT(&y[j], vx0, gvl);
|
||||
|
||||
vx1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
vy1 = VLEV_FLOAT(&y[j+gvl], gvl);
|
||||
VSEV_FLOAT(&x[j+gvl], vy1, gvl);
|
||||
VSEV_FLOAT(&y[j+gvl], vx1, gvl);
|
||||
j+=gvl * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
VSEV_FLOAT(&x[j], vy0, gvl);
|
||||
VSEV_FLOAT(&y[j], vx0, gvl);
|
||||
j+=gvl;
|
||||
}
|
||||
}else if (inc_y == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
for(i=0,j=0; i<n/(2*gvl); i++){
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
VSSEV_FLOAT(&x[ix], stride_x, vy0, gvl);
|
||||
VSEV_FLOAT(&y[j], vx0, gvl);
|
||||
|
||||
vx1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
|
||||
vy1 = VLEV_FLOAT(&y[j+gvl], gvl);
|
||||
VSSEV_FLOAT(&x[ix+inc_xv], stride_x, vy1, gvl);
|
||||
VSEV_FLOAT(&y[j+gvl], vx1, gvl);
|
||||
j += gvl * 2;
|
||||
ix += inc_xv * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vy0 = VLEV_FLOAT(&y[j], gvl);
|
||||
VSSEV_FLOAT(&x[ix], stride_x, vy0, gvl);
|
||||
VSEV_FLOAT(&y[j], vx0, gvl);
|
||||
j += gvl;
|
||||
ix += inc_x * gvl;
|
||||
}
|
||||
}else if(inc_x == 1){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
for(i=0,j=0; i<n/(2*gvl); i++){
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
VSEV_FLOAT(&x[j], vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vx0, gvl);
|
||||
|
||||
vx1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+inc_yv], stride_y, gvl);
|
||||
VSEV_FLOAT(&x[j+gvl], vy1, gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vx1, gvl);
|
||||
j += gvl * 2;
|
||||
iy += inc_yv * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLEV_FLOAT(&x[j], gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
VSEV_FLOAT(&x[j], vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vx0, gvl);
|
||||
j += gvl;
|
||||
iy += inc_y * gvl;
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
BLASLONG inc_yv = inc_y * gvl;
|
||||
for(i=0,j=0; i<n/(2*gvl); i++){
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
VSSEV_FLOAT(&x[ix], stride_x, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vx0, gvl);
|
||||
|
||||
vx1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+inc_yv], stride_y, gvl);
|
||||
VSSEV_FLOAT(&x[ix+inc_xv], stride_x, vy1, gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vx1, gvl);
|
||||
j += gvl * 2;
|
||||
ix += inc_xv * 2;
|
||||
iy += inc_yv * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
VSSEV_FLOAT(&x[ix], stride_x, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vx0, gvl);
|
||||
j += gvl;
|
||||
ix += inc_x * gvl;
|
||||
iy += inc_y * gvl;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
|
||||
{
|
||||
BLASLONG i;
|
||||
BLASLONG ix,iy;
|
||||
BLASLONG jx,jy;
|
||||
BLASLONG j;
|
||||
FLOAT temp1;
|
||||
FLOAT temp2;
|
||||
|
||||
#if 0
|
||||
if ( m != offset )
|
||||
printf("Symv_L: m=%d offset=%d\n",m,offset);
|
||||
#endif
|
||||
|
||||
jx = 0;
|
||||
jy = 0;
|
||||
|
||||
for (j=0; j<offset; j++)
|
||||
{
|
||||
temp1 = alpha * x[jx];
|
||||
temp2 = 0.0;
|
||||
y[jy] += temp1 * a[j*lda+j];
|
||||
iy = jy;
|
||||
ix = jx;
|
||||
for (i=j+1; i<m; i++)
|
||||
{
|
||||
ix += inc_x;
|
||||
iy += inc_y;
|
||||
y[iy] += temp1 * a[j*lda+i];
|
||||
temp2 += a[j*lda+i] * x[ix];
|
||||
|
||||
}
|
||||
y[jy] += alpha * temp2;
|
||||
jx += inc_x;
|
||||
jy += inc_y;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,265 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLEV_FLOAT vlev_float32xm4
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VSEV_FLOAT vsev_float32xm4
|
||||
#define VSSEV_FLOAT vssev_float32xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float32xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float32xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float32xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm4
|
||||
#define VFMULVV_FLOAT vfmulvv_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLEV_FLOAT vlev_float64xm4
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VSEV_FLOAT vsev_float64xm4
|
||||
#define VSSEV_FLOAT vssev_float64xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float64xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float64xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float64xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm4
|
||||
#define VFMULVV_FLOAT vfmulvv_float64xm4
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
|
||||
{
|
||||
BLASLONG i, j, k;
|
||||
BLASLONG ix,iy;
|
||||
BLASLONG jx,jy;
|
||||
FLOAT temp1;
|
||||
FLOAT temp2;
|
||||
FLOAT *a_ptr = a;
|
||||
unsigned int gvl = 0;
|
||||
|
||||
FLOAT_V_T va, vx, vy, vr;
|
||||
BLASLONG stride_x, stride_y, inc_xv, inc_yv, len;
|
||||
|
||||
if(inc_x == 1 && inc_y == 1){
|
||||
for (j=0; j<offset; j++)
|
||||
{
|
||||
temp1 = alpha * x[j];
|
||||
temp2 = 0.0;
|
||||
y[j] += temp1 * a_ptr[j];
|
||||
i = j + 1;
|
||||
len = m - i;
|
||||
if(len > 0){
|
||||
gvl = vsetvli(len, RVV_EFLOAT, RVV_M);
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
for(k = 0; k < len / gvl; k++){
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VLEV_FLOAT(&y[i], gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSEV_FLOAT(&y[i], vy, gvl);
|
||||
|
||||
vx = VLEV_FLOAT(&x[i], gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, vx, va, gvl);
|
||||
|
||||
i += gvl;
|
||||
}
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 = va[0];
|
||||
if(i < m){
|
||||
gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M);
|
||||
vy = VLEV_FLOAT(&y[i], gvl);
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSEV_FLOAT(&y[i], vy, gvl);
|
||||
|
||||
vx = VLEV_FLOAT(&x[i], gvl);
|
||||
vr = VFMULVV_FLOAT(vx, va, gvl);
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 += va[0];
|
||||
}
|
||||
}
|
||||
y[j] += alpha * temp2;
|
||||
a_ptr += lda;
|
||||
}
|
||||
}else if(inc_x == 1){
|
||||
jy = 0;
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
for (j=0; j<offset; j++)
|
||||
{
|
||||
temp1 = alpha * x[j];
|
||||
temp2 = 0.0;
|
||||
y[jy] += temp1 * a_ptr[j];
|
||||
iy = jy + inc_y;
|
||||
i = j + 1;
|
||||
len = m - i;
|
||||
if(len > 0){
|
||||
gvl = vsetvli(len, RVV_EFLOAT, RVV_M);
|
||||
inc_yv = inc_y * gvl;
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
for(k = 0; k < len / gvl; k++){
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy, gvl);
|
||||
|
||||
vx = VLEV_FLOAT(&x[i], gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, vx, va, gvl);
|
||||
|
||||
i += gvl;
|
||||
iy += inc_yv;
|
||||
}
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 = va[0];
|
||||
if(i < m){
|
||||
gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M);
|
||||
vy = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy, gvl);
|
||||
|
||||
vx = VLEV_FLOAT(&x[i], gvl);
|
||||
vr = VFMULVV_FLOAT(vx, va, gvl);
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 += va[0];
|
||||
}
|
||||
}
|
||||
y[jy] += alpha * temp2;
|
||||
jy += inc_y;
|
||||
a_ptr += lda;
|
||||
}
|
||||
}else if(inc_y == 1){
|
||||
jx = 0;
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
for (j=0; j<offset; j++)
|
||||
{
|
||||
temp1 = alpha * x[jx];
|
||||
temp2 = 0.0;
|
||||
y[j] += temp1 * a_ptr[j];
|
||||
ix = jx + inc_x;
|
||||
i = j + 1;
|
||||
len = m - i;
|
||||
if(len > 0){
|
||||
gvl = vsetvli(len, RVV_EFLOAT, RVV_M);
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
inc_xv = inc_x * gvl;
|
||||
for(k = 0; k < len / gvl; k++){
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VLEV_FLOAT(&y[i], gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSEV_FLOAT(&y[i], vy, gvl);
|
||||
|
||||
vx = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, vx, va, gvl);
|
||||
|
||||
i += gvl;
|
||||
ix += inc_xv;
|
||||
}
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 = va[0];
|
||||
if(i < m){
|
||||
gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M);
|
||||
vy = VLEV_FLOAT(&y[i], gvl);
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSEV_FLOAT(&y[i], vy, gvl);
|
||||
|
||||
vx = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vr = VFMULVV_FLOAT(vx, va, gvl);
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 += va[0];
|
||||
}
|
||||
}
|
||||
y[j] += alpha * temp2;
|
||||
jx += inc_x;
|
||||
a_ptr += lda;
|
||||
}
|
||||
}else{
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
jx = 0;
|
||||
jy = 0;
|
||||
for (j=0; j<offset; j++)
|
||||
{
|
||||
temp1 = alpha * x[jx];
|
||||
temp2 = 0.0;
|
||||
y[jy] += temp1 * a_ptr[j];
|
||||
ix = jx + inc_x;
|
||||
iy = jy + inc_y;
|
||||
i = j + 1;
|
||||
len = m - i;
|
||||
if(len > 0){
|
||||
gvl = vsetvli(len, RVV_EFLOAT, RVV_M);
|
||||
inc_xv = inc_x * gvl;
|
||||
inc_yv = inc_y * gvl;
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
for(k = 0; k < len / gvl; k++){
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy, gvl);
|
||||
|
||||
vx = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, vx, va, gvl);
|
||||
|
||||
i += gvl;
|
||||
ix += inc_xv;
|
||||
iy += inc_yv;
|
||||
}
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 = va[0];
|
||||
if(i < m){
|
||||
gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M);
|
||||
vy = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy, gvl);
|
||||
|
||||
vx = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vr = VFMULVV_FLOAT(vx, va, gvl);
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 += va[0];
|
||||
}
|
||||
}
|
||||
y[jy] += alpha * temp2;
|
||||
jx += inc_x;
|
||||
jy += inc_y;
|
||||
a_ptr += lda;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
|
||||
{
|
||||
BLASLONG i;
|
||||
BLASLONG ix,iy;
|
||||
BLASLONG jx,jy;
|
||||
BLASLONG j;
|
||||
FLOAT temp1;
|
||||
FLOAT temp2;
|
||||
|
||||
#if 0
|
||||
if( m != offset )
|
||||
printf("Symv_U: m=%d offset=%d\n",m,offset);
|
||||
#endif
|
||||
|
||||
BLASLONG m1 = m - offset;
|
||||
|
||||
jx = m1 * inc_x;
|
||||
jy = m1 * inc_y;
|
||||
|
||||
for (j=m1; j<m; j++)
|
||||
{
|
||||
temp1 = alpha * x[jx];
|
||||
temp2 = 0.0;
|
||||
iy = 0;
|
||||
ix = 0;
|
||||
for (i=0; i<j; i++)
|
||||
{
|
||||
y[iy] += temp1 * a[j*lda+i];
|
||||
temp2 += a[j*lda+i] * x[ix];
|
||||
ix += inc_x;
|
||||
iy += inc_y;
|
||||
|
||||
}
|
||||
y[jy] += temp1 * a[j*lda+j] + alpha * temp2;
|
||||
jx += inc_x;
|
||||
jy += inc_y;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,264 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLEV_FLOAT vlev_float32xm4
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VSEV_FLOAT vsev_float32xm4
|
||||
#define VSSEV_FLOAT vssev_float32xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float32xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float32xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float32xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm4
|
||||
#define VFDOTVV_FLOAT vfdotvv_float32xm4
|
||||
#define VFMULVV_FLOAT vfmulvv_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLEV_FLOAT vlev_float64xm4
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VSEV_FLOAT vsev_float64xm4
|
||||
#define VSSEV_FLOAT vssev_float64xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float64xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float64xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float64xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm4
|
||||
#define VFDOTVV_FLOAT vfdotvv_float64xm4
|
||||
#define VFMULVV_FLOAT vfmulvv_float64xm4
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
|
||||
{
|
||||
BLASLONG i, j, k;
|
||||
BLASLONG ix,iy;
|
||||
BLASLONG jx,jy;
|
||||
FLOAT temp1;
|
||||
FLOAT temp2;
|
||||
FLOAT *a_ptr = a;
|
||||
unsigned int gvl = 0;
|
||||
|
||||
FLOAT_V_T va, vx, vy, vr;
|
||||
BLASLONG stride_x, stride_y, inc_xv, inc_yv;
|
||||
|
||||
BLASLONG m1 = m - offset;
|
||||
if(inc_x == 1 && inc_y == 1){
|
||||
a_ptr += m1 * lda;
|
||||
for (j=m1; j<m; j++)
|
||||
{
|
||||
temp1 = alpha * x[j];
|
||||
temp2 = 0.0;
|
||||
if(j > 0){
|
||||
i = 0;
|
||||
gvl = vsetvli(j, RVV_EFLOAT, RVV_M);
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
for(k = 0; k < j / gvl; k++){
|
||||
vy = VLEV_FLOAT(&y[i], gvl);
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSEV_FLOAT(&y[i], vy, gvl);
|
||||
|
||||
vx = VLEV_FLOAT(&x[i], gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, vx, va, gvl);
|
||||
|
||||
i += gvl;
|
||||
}
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 = va[0];
|
||||
if(i < j){
|
||||
gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M);
|
||||
vy = VLEV_FLOAT(&y[i], gvl);
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSEV_FLOAT(&y[i], vy, gvl);
|
||||
|
||||
vx = VLEV_FLOAT(&x[i], gvl);
|
||||
vr = VFMULVV_FLOAT(vx, va, gvl);
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 += va[0];
|
||||
}
|
||||
}
|
||||
y[j] += temp1 * a_ptr[j] + alpha * temp2;
|
||||
a_ptr += lda;
|
||||
}
|
||||
}else if(inc_x == 1){
|
||||
jy = m1 * inc_y;
|
||||
a_ptr += m1 * lda;
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
for (j=m1; j<m; j++)
|
||||
{
|
||||
temp1 = alpha * x[j];
|
||||
temp2 = 0.0;
|
||||
if(j > 0){
|
||||
iy = 0;
|
||||
i = 0;
|
||||
gvl = vsetvli(j, RVV_EFLOAT, RVV_M);
|
||||
inc_yv = inc_y * gvl;
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
for(k = 0; k < j / gvl; k++){
|
||||
vy = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy, gvl);
|
||||
|
||||
vx = VLEV_FLOAT(&x[i], gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, vx, va, gvl);
|
||||
|
||||
i += gvl;
|
||||
iy += inc_yv;
|
||||
}
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 = va[0];
|
||||
if(i < j){
|
||||
gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M);
|
||||
vy = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy, gvl);
|
||||
|
||||
vx = VLEV_FLOAT(&x[i], gvl);
|
||||
vr = VFMULVV_FLOAT(vx, va, gvl);
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 += va[0];
|
||||
}
|
||||
}
|
||||
y[jy] += temp1 * a_ptr[j] + alpha * temp2;
|
||||
a_ptr += lda;
|
||||
jy += inc_y;
|
||||
}
|
||||
}else if(inc_y == 1){
|
||||
jx = m1 * inc_x;
|
||||
a_ptr += m1 * lda;
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
for (j=m1; j<m; j++)
|
||||
{
|
||||
temp1 = alpha * x[jx];
|
||||
temp2 = 0.0;
|
||||
if(j > 0){
|
||||
ix = 0;
|
||||
i = 0;
|
||||
gvl = vsetvli(j, RVV_EFLOAT, RVV_M);
|
||||
inc_xv = inc_x * gvl;
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
for(k = 0; k < j / gvl; k++){
|
||||
vy = VLEV_FLOAT(&y[i], gvl);
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSEV_FLOAT(&y[i], vy, gvl);
|
||||
|
||||
vx = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, vx, va, gvl);
|
||||
|
||||
i += gvl;
|
||||
ix += inc_xv;
|
||||
}
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 = va[0];
|
||||
if(i < j){
|
||||
gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M);
|
||||
vy = VLEV_FLOAT(&y[i], gvl);
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSEV_FLOAT(&y[i], vy, gvl);
|
||||
|
||||
vx = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vr = VFMULVV_FLOAT(vx, va, gvl);
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 += va[0];
|
||||
}
|
||||
}
|
||||
y[j] += temp1 * a_ptr[j] + alpha * temp2;
|
||||
a_ptr += lda;
|
||||
jx += inc_x;
|
||||
}
|
||||
}else{
|
||||
jx = m1 * inc_x;
|
||||
jy = m1 * inc_y;
|
||||
a_ptr += m1 * lda;
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
for (j=m1; j<m; j++)
|
||||
{
|
||||
temp1 = alpha * x[jx];
|
||||
temp2 = 0.0;
|
||||
if(j > 0){
|
||||
ix = 0;
|
||||
iy = 0;
|
||||
i = 0;
|
||||
gvl = vsetvli(j, RVV_EFLOAT, RVV_M);
|
||||
inc_xv = inc_x * gvl;
|
||||
inc_yv = inc_y * gvl;
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
for(k = 0; k < j / gvl; k++){
|
||||
vy = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy, gvl);
|
||||
|
||||
vx = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, vx, va, gvl);
|
||||
|
||||
i += gvl;
|
||||
ix += inc_xv;
|
||||
iy += inc_yv;
|
||||
}
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 = va[0];
|
||||
if(i < j){
|
||||
gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M);
|
||||
vy = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
va = VLEV_FLOAT(&a_ptr[i], gvl);
|
||||
vy = VFMACCVF_FLOAT(vy, temp1, va, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy, gvl);
|
||||
|
||||
vx = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vr = VFMULVV_FLOAT(vx, va, gvl);
|
||||
va = VFMVVF_FLOAT(0, gvl);
|
||||
va = VFREDSUM_FLOAT(vr, va, gvl);
|
||||
temp2 += va[0];
|
||||
}
|
||||
}
|
||||
y[jy] += temp1 * a_ptr[j] + alpha * temp2;
|
||||
a_ptr += lda;
|
||||
jx += inc_x;
|
||||
jy += inc_y;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : NoTest
|
||||
* TEST : NoTest
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
#define CABS1(x,i) ABS(x[i])+ABS(x[i+1])
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT maxf;
|
||||
BLASLONG inc_x2;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(0.0);
|
||||
|
||||
inc_x2 = 2 * inc_x;
|
||||
|
||||
maxf = CABS1(x,0);
|
||||
ix += inc_x2;
|
||||
i++;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( CABS1(x,ix) > maxf )
|
||||
{
|
||||
maxf = CABS1(x,ix);
|
||||
}
|
||||
ix += inc_x2;
|
||||
i++;
|
||||
}
|
||||
return(maxf);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8
|
||||
#define MASK_T e32xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8
|
||||
#define VFMAXVV_FLOAT vfmaxvv_float32xm8
|
||||
#define VFADDVV_FLOAT vfaddvv_float32xm8
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8
|
||||
#define MASK_T e64xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8
|
||||
#define VFMAXVV_FLOAT vfmaxvv_float64xm8
|
||||
#define VFADDVV_FLOAT vfaddvv_float64xm8
|
||||
#endif
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT maxf=0.0;
|
||||
if (n <= 0 || inc_x <= 0) return(maxf);
|
||||
unsigned int gvl = 0;
|
||||
FLOAT_V_T v0, v1, v_max;
|
||||
|
||||
MASK_T mask0, mask1;
|
||||
BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2;
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
v_max = VFMVVF_FLOAT(0, gvl);
|
||||
BLASLONG inc_xv = inc_x * gvl * 2;
|
||||
for(; i<n/gvl; i++){
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl);
|
||||
|
||||
v0 = VFADDVV_FLOAT(v0, v1, gvl);
|
||||
v_max = VFMAXVV_FLOAT(v_max, v0, gvl);
|
||||
|
||||
j += gvl;
|
||||
ix += inc_xv;
|
||||
}
|
||||
v0 = VFMVVF_FLOAT(0, gvl);
|
||||
v_max = VFREDMAXVS_FLOAT(v_max, v0, gvl);
|
||||
maxf = v_max[0];
|
||||
|
||||
if(j<n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl);
|
||||
v1 = VFADDVV_FLOAT(v0, v1, gvl);
|
||||
v0 = VFMVVF_FLOAT(0, gvl);
|
||||
v_max = VFREDMAXVS_FLOAT(v1, v0, gvl);
|
||||
if(v_max[0] > maxf)
|
||||
maxf = v_max[0];
|
||||
}
|
||||
return(maxf);
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : NoTest
|
||||
* TEST : NoTest
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
#define CABS1(x,i) ABS(x[i])+ABS(x[i+1])
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT minf;
|
||||
BLASLONG inc_x2;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(0.0);
|
||||
|
||||
inc_x2 = 2 * inc_x;
|
||||
|
||||
minf = CABS1(x,0);
|
||||
ix += inc_x2;
|
||||
i++;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
if( CABS1(x,ix) < minf )
|
||||
{
|
||||
minf = CABS1(x,ix);
|
||||
}
|
||||
ix += inc_x2;
|
||||
i++;
|
||||
}
|
||||
return(minf);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDMINVS_FLOAT vfredminvs_float32xm8
|
||||
#define MASK_T e32xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8
|
||||
#define VFMINVV_FLOAT vfminvv_float32xm8
|
||||
#define VFADDVV_FLOAT vfaddvv_float32xm8
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDMINVS_FLOAT vfredminvs_float64xm8
|
||||
#define MASK_T e64xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8
|
||||
#define VFMINVV_FLOAT vfminvv_float64xm8
|
||||
#define VFADDVV_FLOAT vfaddvv_float64xm8
|
||||
#endif
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
BLASLONG ix=0;
|
||||
if (n <= 0 || inc_x <= 0) return(0.0);
|
||||
FLOAT minf=FLT_MAX;
|
||||
unsigned int gvl = 0;
|
||||
FLOAT_V_T v0, v1, v_min;
|
||||
MASK_T mask0, mask1;
|
||||
BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2;
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
v_min = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
BLASLONG inc_xv = inc_x * gvl * 2;
|
||||
for(; i<n/gvl; i++){
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl);
|
||||
|
||||
v0 = VFADDVV_FLOAT(v0, v1, gvl);
|
||||
v_min = VFMINVV_FLOAT(v_min, v0, gvl);
|
||||
|
||||
j += gvl;
|
||||
ix += inc_xv;
|
||||
}
|
||||
v0 = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
v_min = VFREDMINVS_FLOAT(v_min, v0, gvl);
|
||||
minf = v_min[0];
|
||||
|
||||
if(j<n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl);
|
||||
v1 = VFADDVV_FLOAT(v0, v1, gvl);
|
||||
v0 = VFMVVF_FLOAT(FLT_MAX, gvl);
|
||||
v_min = VFREDMINVS_FLOAT(v1, v0, gvl);
|
||||
if(v_min[0] < minf)
|
||||
minf = v_min[0];
|
||||
}
|
||||
return(minf);
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
#define CABS1(x,i) ABS(x[i])+ABS(x[i+1])
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
FLOAT sumf = 0.0;
|
||||
BLASLONG inc_x2;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(sumf);
|
||||
|
||||
inc_x2 = 2 * inc_x;
|
||||
|
||||
n *= inc_x2;
|
||||
while(i < n)
|
||||
{
|
||||
sumf += CABS1(x,i);
|
||||
i += inc_x2;
|
||||
}
|
||||
return(sumf);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float32xm8_t
|
||||
#define VLEV_FLOAT vlev_float32xm8
|
||||
#define VLSEV_FLOAT vlsev_float32xm8
|
||||
#define VFREDSUMVS_FLOAT vfredsumvs_float32xm8
|
||||
#define MASK_T e32xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8
|
||||
#define VFADDVV_FLOAT vfaddvv_float32xm8
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M8
|
||||
#define FLOAT_V_T float64xm8_t
|
||||
#define VLEV_FLOAT vlev_float64xm8
|
||||
#define VLSEV_FLOAT vlsev_float64xm8
|
||||
#define VFREDSUMVS_FLOAT vfredsumvs_float64xm8
|
||||
#define MASK_T e64xm8_t
|
||||
#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm8
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8
|
||||
#define VFADDVV_FLOAT vfaddvv_float64xm8
|
||||
#endif
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT asumf=0.0;
|
||||
if (n <= 0 || inc_x <= 0) return(asumf);
|
||||
unsigned int gvl = 0;
|
||||
FLOAT_V_T v0, v1, v_zero,v_sum;
|
||||
|
||||
MASK_T mask0, mask1;
|
||||
if(inc_x == 1){
|
||||
BLASLONG n2 = n * 2;
|
||||
gvl = vsetvli(n2, RVV_EFLOAT, RVV_M);
|
||||
v_zero = VFMVVF_FLOAT(0, gvl);
|
||||
if(gvl <= n2/2){
|
||||
v_sum = VFMVVF_FLOAT(0, gvl);
|
||||
for(i=0,j=0; i<n2/(gvl*2); i++){
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl);
|
||||
v_sum = VFADDVV_FLOAT(v_sum, v0, gvl);
|
||||
|
||||
v1 = VLEV_FLOAT(&x[j+gvl], gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl);
|
||||
v_sum = VFADDVV_FLOAT(v_sum, v1, gvl);
|
||||
j += gvl * 2;
|
||||
}
|
||||
v0 = VFREDSUMVS_FLOAT(v_sum, v_zero, gvl);
|
||||
asumf += v0[0];
|
||||
}
|
||||
for(;j<n2;){
|
||||
gvl = vsetvli(n2-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl);
|
||||
v0 = VFREDSUMVS_FLOAT(v0, v_zero, gvl);
|
||||
asumf += v0[0];
|
||||
j += gvl;
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
unsigned int stride_x = inc_x * sizeof(FLOAT) * 2;
|
||||
v_zero = VFMVVF_FLOAT(0, gvl);
|
||||
|
||||
BLASLONG inc_xv = inc_x * 2 * gvl;
|
||||
v_sum = VFMVVF_FLOAT(0, gvl);
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl);
|
||||
v_sum = VFADDVV_FLOAT(v_sum, v0, gvl);
|
||||
|
||||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl);
|
||||
v_sum = VFADDVV_FLOAT(v_sum, v1, gvl);
|
||||
|
||||
j += gvl;
|
||||
ix += inc_xv;
|
||||
}
|
||||
v0 = VFREDSUMVS_FLOAT(v_sum, v_zero, gvl);
|
||||
asumf += v0[0];
|
||||
if(j<n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl);
|
||||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl);
|
||||
v_sum = VFADDVV_FLOAT(v0, v1, gvl);
|
||||
v_sum = VFREDSUMVS_FLOAT(v_sum, v_zero, gvl);
|
||||
asumf += v_sum[0];
|
||||
}
|
||||
}
|
||||
return(asumf);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,118 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/***************************************************************************
|
||||
* 2014/06/07 Saar
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FLOAT beta_r, FLOAT beta_i,FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix,iy;
|
||||
FLOAT temp;
|
||||
BLASLONG inc_x2, inc_y2;
|
||||
|
||||
if ( n <= 0 ) return(0);
|
||||
|
||||
ix = 0;
|
||||
iy = 0;
|
||||
|
||||
inc_x2 = 2 * inc_x;
|
||||
inc_y2 = 2 * inc_y;
|
||||
|
||||
if ( beta_r == 0.0 && beta_i == 0.0)
|
||||
{
|
||||
if ( alpha_r == 0.0 && alpha_i == 0.0 )
|
||||
{
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = 0.0 ;
|
||||
y[iy+1] = 0.0 ;
|
||||
iy += inc_y2 ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) ;
|
||||
y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) ;
|
||||
ix += inc_x2 ;
|
||||
iy += inc_y2 ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( alpha_r == 0.0 && alpha_i == 0.0 )
|
||||
{
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
temp = ( beta_r * y[iy] - beta_i * y[iy+1] ) ;
|
||||
y[iy+1] = ( beta_r * y[iy+1] + beta_i * y[iy] ) ;
|
||||
y[iy] = temp;
|
||||
iy += inc_y2 ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
temp = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) + ( beta_r * y[iy] - beta_i * y[iy+1] ) ;
|
||||
y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) + ( beta_r * y[iy+1] + beta_i * y[iy] ) ;
|
||||
y[iy] = temp;
|
||||
ix += inc_x2 ;
|
||||
iy += inc_y2 ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,197 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VSSEV_FLOAT vssev_float32xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float32xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm4
|
||||
#define VFMULVF_FLOAT vfmulvf_float32xm4
|
||||
#define VFMSACVF_FLOAT vfmsacvf_float32xm4
|
||||
#define VFNMSACVF_FLOAT vfnmsacvf_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VSSEV_FLOAT vssev_float64xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float64xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm4
|
||||
#define VFMULVF_FLOAT vfmulvf_float64xm4
|
||||
#define VFMSACVF_FLOAT vfmsacvf_float64xm4
|
||||
#define VFNMSACVF_FLOAT vfnmsacvf_float64xm4
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FLOAT beta_r, FLOAT beta_i, FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
if (n <= 0) return(0);
|
||||
|
||||
BLASLONG i=0, j=0;
|
||||
unsigned int gvl = 0;
|
||||
FLOAT_V_T vx0, vx1;
|
||||
FLOAT_V_T vy0, vy1;
|
||||
|
||||
BLASLONG stride_x, stride_y, ix = 0, iy = 0;
|
||||
stride_x = inc_x * 2 * sizeof(FLOAT);
|
||||
stride_y = inc_y * 2 * sizeof(FLOAT);
|
||||
|
||||
if(beta_r == 0.0 && beta_i == 0.0){
|
||||
if(alpha_r == 0.0 && alpha_i == 0.0){
|
||||
if(inc_y == 1){
|
||||
memset(&y[0], 0, 2 * n * sizeof(FLOAT));
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
if(gvl <= n/2){
|
||||
vy0 = VFMVVF_FLOAT(0.0, gvl);
|
||||
BLASLONG inc_yv = inc_y * gvl * 2;
|
||||
for(i=0,j=0;i<n/(gvl*2);i++){
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1+inc_yv], stride_y, vy0, gvl);
|
||||
j += gvl * 2;
|
||||
iy += inc_yv * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vy0 = VFMVVF_FLOAT(0.0, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, vy0, gvl);
|
||||
j += gvl;
|
||||
iy += inc_y * gvl * 2;
|
||||
}
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG inc_xv = inc_x * gvl * 2;
|
||||
BLASLONG inc_yv = inc_y * gvl * 2;
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
vy0 = VFMULVF_FLOAT(vx1, alpha_i, gvl);
|
||||
vy0 = VFMSACVF_FLOAT(vy0, alpha_r, vx0, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
vy1 = VFMULVF_FLOAT(vx1, alpha_r, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, alpha_i, vx0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl);
|
||||
|
||||
j += gvl;
|
||||
ix += inc_xv;
|
||||
iy += inc_yv;
|
||||
}
|
||||
if(j<n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
vy0 = VFMULVF_FLOAT(vx1, alpha_i, gvl);
|
||||
vy0 = VFMSACVF_FLOAT(vy0, alpha_r, vx0, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
vy1 = VFMULVF_FLOAT(vx1, alpha_r, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, alpha_i, vx0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl);
|
||||
}
|
||||
}
|
||||
}else{
|
||||
FLOAT_V_T v0, v1;
|
||||
if(alpha_r == 0.0 && alpha_i == 0.0){
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG inc_yv = inc_y * gvl * 2;
|
||||
for(i=0,j=0;i<n/gvl;i++){
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl);
|
||||
v0 = VFMULVF_FLOAT(vy1, beta_i, gvl);
|
||||
v0 = VFMSACVF_FLOAT(v0, beta_r, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, v0, gvl);
|
||||
v1 = VFMULVF_FLOAT(vy1, beta_r, gvl);
|
||||
v1 = VFMACCVF_FLOAT(v1, beta_i, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, v1, gvl);
|
||||
j += gvl;
|
||||
iy += inc_yv;
|
||||
}
|
||||
if(j<n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl);
|
||||
v0 = VFMULVF_FLOAT(vy1, beta_i, gvl);
|
||||
v0 = VFMSACVF_FLOAT(v0, beta_r, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, v0, gvl);
|
||||
v1 = VFMULVF_FLOAT(vy1, beta_r, gvl);
|
||||
v1 = VFMACCVF_FLOAT(v1, beta_i, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, v1, gvl);
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG inc_xv = inc_x * gvl * 2;
|
||||
BLASLONG inc_yv = inc_y * gvl * 2;
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl);
|
||||
v0 = VFMULVF_FLOAT(vx0, alpha_r, gvl);
|
||||
v0 = VFNMSACVF_FLOAT(v0, alpha_i, vx1, gvl);
|
||||
v0 = VFMACCVF_FLOAT(v0, beta_r, vy0, gvl);
|
||||
v0 = VFNMSACVF_FLOAT(v0, beta_i, vy1, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, v0, gvl);
|
||||
v1 = VFMULVF_FLOAT(vx1, alpha_r, gvl);
|
||||
v1 = VFMACCVF_FLOAT(v1, alpha_i, vx0, gvl);
|
||||
v1 = VFMACCVF_FLOAT(v1, beta_r, vy1, gvl);
|
||||
v1 = VFMACCVF_FLOAT(v1, beta_i, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, v1, gvl);
|
||||
|
||||
j += gvl;
|
||||
ix += inc_xv;
|
||||
iy += inc_yv;
|
||||
}
|
||||
if(j<n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl);
|
||||
v0 = VFMULVF_FLOAT(vx0, alpha_r, gvl);
|
||||
v0 = VFNMSACVF_FLOAT(v0, alpha_i, vx1, gvl);
|
||||
v0 = VFMACCVF_FLOAT(v0, beta_r, vy0, gvl);
|
||||
v0 = VFNMSACVF_FLOAT(v0, beta_i, vy1, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, v0, gvl);
|
||||
v1 = VFMULVF_FLOAT(vx1, alpha_r, gvl);
|
||||
v1 = VFMACCVF_FLOAT(v1, alpha_i, vx0, gvl);
|
||||
v1 = VFMACCVF_FLOAT(v1, beta_r, vy1, gvl);
|
||||
v1 = VFMACCVF_FLOAT(v1, beta_i, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, v1, gvl);
|
||||
}
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/15 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix,iy;
|
||||
BLASLONG inc_x2;
|
||||
BLASLONG inc_y2;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
if ( da_r == 0.0 && da_i == 0.0 ) return(0);
|
||||
|
||||
ix = 0;
|
||||
iy = 0;
|
||||
|
||||
inc_x2 = 2 * inc_x;
|
||||
inc_y2 = 2 * inc_y;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
#if !defined(CONJ)
|
||||
y[iy] += ( da_r * x[ix] - da_i * x[ix+1] ) ;
|
||||
y[iy+1] += ( da_r * x[ix+1] + da_i * x[ix] ) ;
|
||||
#else
|
||||
y[iy] += ( da_r * x[ix] + da_i * x[ix+1] ) ;
|
||||
y[iy+1] -= ( da_r * x[ix+1] - da_i * x[ix] ) ;
|
||||
#endif
|
||||
ix += inc_x2 ;
|
||||
iy += inc_y2 ;
|
||||
i++ ;
|
||||
|
||||
}
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VSSEV_FLOAT vssev_float32xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float32xm4
|
||||
#define VFNMSACVF_FLOAT vfnmsacvf_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VSSEV_FLOAT vssev_float64xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float64xm4
|
||||
#define VFNMSACVF_FLOAT vfnmsacvf_float64xm4
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
|
||||
{
|
||||
BLASLONG i = 0, j = 0;
|
||||
BLASLONG ix = 0,iy = 0;
|
||||
if(n < 0) return(0);
|
||||
if(da_r == 0.0 && da_i == 0.0) return(0);
|
||||
unsigned int gvl = 0;
|
||||
BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT);
|
||||
BLASLONG stride_y = inc_y * 2 * sizeof(FLOAT);
|
||||
|
||||
FLOAT_V_T vx0, vx1, vy0, vy1;
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG inc_xv = inc_x * 2 * gvl;
|
||||
BLASLONG inc_yv = inc_y * 2 * gvl;
|
||||
for(i=0,j=0; i < n/gvl; i++){
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl);
|
||||
#if !defined(CONJ)
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da_r, vx0, gvl);
|
||||
vy0 = VFNMSACVF_FLOAT(vy0, da_i, vx1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, da_r, vx1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, da_i, vx0, gvl);
|
||||
#else
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da_r, vx0, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da_i, vx1, gvl);
|
||||
vy1 = VFNMSACVF_FLOAT(vy1, da_r, vx1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, da_i, vx0, gvl);
|
||||
#endif
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl);
|
||||
j += gvl;
|
||||
ix += inc_xv;
|
||||
iy += inc_yv;
|
||||
}
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl);
|
||||
#if !defined(CONJ)
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da_r, vx0, gvl);
|
||||
vy0 = VFNMSACVF_FLOAT(vy0, da_i, vx1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, da_r, vx1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, da_i, vx0, gvl);
|
||||
#else
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da_r, vx0, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da_i, vx1, gvl);
|
||||
vy1 = VFNMSACVF_FLOAT(vy1, da_r, vx1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, da_i, vx0, gvl);
|
||||
#endif
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl);
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0,iy=0;
|
||||
BLASLONG inc_x2;
|
||||
BLASLONG inc_y2;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
|
||||
inc_x2 = 2 * inc_x;
|
||||
inc_y2 = 2 * inc_y;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
|
||||
y[iy] = x[ix] ;
|
||||
y[iy+1] = x[ix+1] ;
|
||||
ix += inc_x2;
|
||||
iy += inc_y2;
|
||||
i++ ;
|
||||
|
||||
}
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VSSEV_FLOAT vssev_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VSSEV_FLOAT vssev_float64xm4
|
||||
#endif
|
||||
|
||||
|
||||
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
BLASLONG i = 0, j = 0;
|
||||
BLASLONG ix = 0,iy = 0;
|
||||
if(n < 0) return(0);
|
||||
|
||||
unsigned int gvl = 0;
|
||||
if(inc_x == 1 && inc_y == 1){
|
||||
memcpy(&y[0], &x[0], n * 2 * sizeof(FLOAT));
|
||||
}else{
|
||||
FLOAT_V_T vx0, vx1, vx2, vx3;
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT);
|
||||
BLASLONG stride_y = inc_y * 2 * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
BLASLONG inc_xv = inc_x * gvl * 2;
|
||||
BLASLONG inc_yv = inc_y * gvl * 2;
|
||||
for(i=0,j=0; i < n/(2*gvl); i++){
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vx0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, vx1, gvl);
|
||||
|
||||
vx2 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
|
||||
vx3 = VLSEV_FLOAT(&x[ix+1+inc_xv], stride_x, gvl);
|
||||
VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vx2, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1+inc_yv], stride_y, vx3, gvl);
|
||||
|
||||
j += gvl * 2;
|
||||
ix += inc_xv * 2;
|
||||
iy += inc_yv * 2;
|
||||
}
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vx0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, vx1, gvl);
|
||||
|
||||
j += gvl;
|
||||
ix += inc_x * 2 * gvl;
|
||||
iy += inc_y * 2 * gvl;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/14 Saar
|
||||
* BLASTEST float : FAIL
|
||||
* BLASTEST double : FAIL
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix=0,iy=0;
|
||||
FLOAT dot[2];
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
BLASLONG inc_x2;
|
||||
BLASLONG inc_y2;
|
||||
|
||||
dot[0]=0.0;
|
||||
dot[1]=0.0;
|
||||
|
||||
CREAL(result) = 0.0 ;
|
||||
CIMAG(result) = 0.0 ;
|
||||
|
||||
if ( n < 1 ) return(result);
|
||||
|
||||
inc_x2 = 2 * inc_x ;
|
||||
inc_y2 = 2 * inc_y ;
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
#if !defined(CONJ)
|
||||
dot[0] += ( x[ix] * y[iy] - x[ix+1] * y[iy+1] ) ;
|
||||
dot[1] += ( x[ix+1] * y[iy] + x[ix] * y[iy+1] ) ;
|
||||
#else
|
||||
dot[0] += ( x[ix] * y[iy] + x[ix+1] * y[iy+1] ) ;
|
||||
dot[1] -= ( x[ix+1] * y[iy] - x[ix] * y[iy+1] ) ;
|
||||
#endif
|
||||
ix += inc_x2 ;
|
||||
iy += inc_y2 ;
|
||||
i++ ;
|
||||
|
||||
}
|
||||
CREAL(result) = dot[0];
|
||||
CIMAG(result) = dot[1];
|
||||
return(result);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,135 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLEV_FLOAT vlev_float32xm4
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float32xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float32xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm4
|
||||
#define VFDOTVV_FLOAT vfdotvv_float32xm4
|
||||
#define VFMULVV_FLOAT vfmulvv_float32xm4
|
||||
#define VFMSACVV_FLOAT vfmsacvv_float32xm4
|
||||
#define VFNMSACVV_FLOAT vfnmsacvv_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLEV_FLOAT vlev_float64xm4
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float64xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float64xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm4
|
||||
#define VFDOTVV_FLOAT vfdotvv_float64xm4
|
||||
#define VFMULVV_FLOAT vfmulvv_float64xm4
|
||||
#define VFMSACVV_FLOAT vfmsacvv_float64xm4
|
||||
#define VFNMSACVV_FLOAT vfnmsacvv_float64xm4
|
||||
#endif
|
||||
|
||||
OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
BLASLONG ix=0,iy=0;
|
||||
FLOAT dot[2];
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
|
||||
dot[0]=0.0;
|
||||
dot[1]=0.0;
|
||||
|
||||
CREAL(result) = 0.0;
|
||||
CIMAG(result) = 0.0;
|
||||
|
||||
if ( n < 1 ) return(result);
|
||||
|
||||
unsigned int gvl = 0;
|
||||
|
||||
FLOAT_V_T vr0, vr1, vx0, vx1, vy0, vy1;
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
vr0 = VFMVVF_FLOAT(0, gvl);
|
||||
vr1 = VFMVVF_FLOAT(0, gvl);
|
||||
BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT);
|
||||
BLASLONG stride_y = inc_y * 2 * sizeof(FLOAT);
|
||||
BLASLONG inc_xv = inc_x * 2 * gvl;
|
||||
BLASLONG inc_yv = inc_y * 2 * gvl;
|
||||
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl);
|
||||
|
||||
vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, gvl);
|
||||
vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, gvl);
|
||||
#if !defined(CONJ)
|
||||
vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, gvl);
|
||||
vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, gvl);
|
||||
#else
|
||||
vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, gvl);
|
||||
vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, gvl);
|
||||
#endif
|
||||
j += gvl;
|
||||
ix += inc_xv;
|
||||
iy += inc_yv;
|
||||
}
|
||||
vx0 = VFMVVF_FLOAT(0, gvl);
|
||||
vr0 = VFREDSUM_FLOAT(vr0, vx0, gvl);
|
||||
dot[0] += vr0[0];
|
||||
vr1 = VFREDSUM_FLOAT(vr1, vx0, gvl);
|
||||
dot[1] += vr1[0];
|
||||
//tail
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl);
|
||||
|
||||
#if !defined(CONJ)
|
||||
vr0 = VFMULVV_FLOAT(vx1, vy1, gvl);
|
||||
vr0 = VFMSACVV_FLOAT(vr0, vx0, vy0, gvl);
|
||||
vr1 = VFMULVV_FLOAT(vx0, vy1, gvl);
|
||||
vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, gvl);
|
||||
#else
|
||||
vr0 = VFMULVV_FLOAT(vx0, vy0, gvl);
|
||||
vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, gvl);
|
||||
vr1 = VFMULVV_FLOAT(vx1, vy0, gvl);
|
||||
vr1 = VFMSACVV_FLOAT(vr1, vx0, vy1, gvl);
|
||||
#endif
|
||||
vx0 = VFMVVF_FLOAT(0, gvl);
|
||||
vr0 = VFREDSUM_FLOAT(vr0, vx0, gvl);
|
||||
dot[0] += vr0[0];
|
||||
vr1 = VFREDSUM_FLOAT(vr1, vx0, gvl);
|
||||
dot[1] += vr1[0];
|
||||
}
|
||||
CREAL(result) = dot[0];
|
||||
CIMAG(result) = dot[1];
|
||||
return(result);
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* * 2013/11/23 Saar
|
||||
* * BLASTEST float : OK
|
||||
* * BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* *
|
||||
* **************************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
|
||||
{
|
||||
BLASLONG i;
|
||||
BLASLONG ix,iy;
|
||||
BLASLONG j;
|
||||
FLOAT *a_ptr;
|
||||
FLOAT temp_r,temp_i;
|
||||
BLASLONG inc_x2,inc_y2;
|
||||
BLASLONG lda2;
|
||||
BLASLONG i2;
|
||||
|
||||
lda2 = 2*lda;
|
||||
|
||||
ix = 0;
|
||||
a_ptr = a;
|
||||
|
||||
if ( inc_x == 1 && inc_y == 1 )
|
||||
{
|
||||
|
||||
for (j=0; j<n; j++)
|
||||
{
|
||||
|
||||
#if !defined(XCONJ)
|
||||
temp_r = alpha_r * x[ix] - alpha_i * x[ix+1];
|
||||
temp_i = alpha_r * x[ix+1] + alpha_i * x[ix];
|
||||
#else
|
||||
temp_r = alpha_r * x[ix] + alpha_i * x[ix+1];
|
||||
temp_i = alpha_r * x[ix+1] - alpha_i * x[ix];
|
||||
#endif
|
||||
iy = 0;
|
||||
i2=0;
|
||||
|
||||
for (i=0; i<m; i++)
|
||||
{
|
||||
#if !defined(CONJ)
|
||||
|
||||
#if !defined(XCONJ)
|
||||
y[iy] += temp_r * a_ptr[i2] - temp_i * a_ptr[i2+1];
|
||||
y[iy+1] += temp_r * a_ptr[i2+1] + temp_i * a_ptr[i2];
|
||||
#else
|
||||
y[iy] += temp_r * a_ptr[i2] + temp_i * a_ptr[i2+1];
|
||||
y[iy+1] += temp_r * a_ptr[i2+1] - temp_i * a_ptr[i2];
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if !defined(XCONJ)
|
||||
y[iy] += temp_r * a_ptr[i2] + temp_i * a_ptr[i2+1];
|
||||
y[iy+1] -= temp_r * a_ptr[i2+1] - temp_i * a_ptr[i2];
|
||||
#else
|
||||
y[iy] += temp_r * a_ptr[i2] - temp_i * a_ptr[i2+1];
|
||||
y[iy+1] -= temp_r * a_ptr[i2+1] + temp_i * a_ptr[i2];
|
||||
#endif
|
||||
|
||||
#endif
|
||||
i2 += 2;
|
||||
iy += 2;
|
||||
}
|
||||
a_ptr += lda2;
|
||||
ix += 2;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
inc_x2 = 2 * inc_x;
|
||||
inc_y2 = 2 * inc_y;
|
||||
|
||||
for (j=0; j<n; j++)
|
||||
{
|
||||
|
||||
#if !defined(XCONJ)
|
||||
temp_r = alpha_r * x[ix] - alpha_i * x[ix+1];
|
||||
temp_i = alpha_r * x[ix+1] + alpha_i * x[ix];
|
||||
#else
|
||||
temp_r = alpha_r * x[ix] + alpha_i * x[ix+1];
|
||||
temp_i = alpha_r * x[ix+1] - alpha_i * x[ix];
|
||||
#endif
|
||||
iy = 0;
|
||||
i2=0;
|
||||
|
||||
for (i=0; i<m; i++)
|
||||
{
|
||||
#if !defined(CONJ)
|
||||
|
||||
#if !defined(XCONJ)
|
||||
y[iy] += temp_r * a_ptr[i2] - temp_i * a_ptr[i2+1];
|
||||
y[iy+1] += temp_r * a_ptr[i2+1] + temp_i * a_ptr[i2];
|
||||
#else
|
||||
y[iy] += temp_r * a_ptr[i2] + temp_i * a_ptr[i2+1];
|
||||
y[iy+1] += temp_r * a_ptr[i2+1] - temp_i * a_ptr[i2];
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if !defined(XCONJ)
|
||||
y[iy] += temp_r * a_ptr[i2] + temp_i * a_ptr[i2+1];
|
||||
y[iy+1] -= temp_r * a_ptr[i2+1] - temp_i * a_ptr[i2];
|
||||
#else
|
||||
y[iy] += temp_r * a_ptr[i2] - temp_i * a_ptr[i2+1];
|
||||
y[iy+1] -= temp_r * a_ptr[i2+1] + temp_i * a_ptr[i2];
|
||||
#endif
|
||||
|
||||
#endif
|
||||
i2 += 2;
|
||||
iy += inc_y2;
|
||||
}
|
||||
a_ptr += lda2;
|
||||
ix += inc_x2;
|
||||
}
|
||||
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,175 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLEV_FLOAT vlev_float32xm4
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VSEV_FLOAT vsev_float32xm4
|
||||
#define VSSEV_FLOAT vssev_float32xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float32xm4
|
||||
#define VFNMSACVF_FLOAT vfnmsacvf_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLEV_FLOAT vlev_float64xm4
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VSEV_FLOAT vsev_float64xm4
|
||||
#define VSSEV_FLOAT vssev_float64xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float64xm4
|
||||
#define VFNMSACVF_FLOAT vfnmsacvf_float64xm4
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
|
||||
{
|
||||
BLASLONG i = 0, j = 0, k = 0;
|
||||
BLASLONG ix = 0, iy = 0;
|
||||
FLOAT *a_ptr = a;
|
||||
FLOAT temp_r = 0.0, temp_i = 0.0;
|
||||
FLOAT_V_T va0, va1, vy0, vy1;
|
||||
unsigned int gvl = 0;
|
||||
BLASLONG stride_a = sizeof(FLOAT) * 2;
|
||||
BLASLONG stride_y = inc_y * sizeof(FLOAT) * 2;
|
||||
gvl = vsetvli(m, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG inc_yv = inc_y * gvl * 2;
|
||||
BLASLONG inc_x2 = inc_x * 2;
|
||||
BLASLONG lda2 = lda * 2;
|
||||
for(k=0,j=0; k<m/gvl; k++){
|
||||
a_ptr = a;
|
||||
ix = 0;
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl);
|
||||
for(i = 0; i < n; i++){
|
||||
#if !defined(XCONJ)
|
||||
temp_r = alpha_r * x[ix] - alpha_i * x[ix+1];
|
||||
temp_i = alpha_r * x[ix+1] + alpha_i * x[ix];
|
||||
#else
|
||||
temp_r = alpha_r * x[ix] + alpha_i * x[ix+1];
|
||||
temp_i = alpha_r * x[ix+1] - alpha_i * x[ix];
|
||||
#endif
|
||||
|
||||
va0 = VLSEV_FLOAT(&a_ptr[j], stride_a, gvl);
|
||||
va1 = VLSEV_FLOAT(&a_ptr[j+1], stride_a, gvl);
|
||||
#if !defined(CONJ)
|
||||
#if !defined(XCONJ)
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl);
|
||||
vy0 = VFNMSACVF_FLOAT(vy0, temp_i, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_r, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_i, va0, gvl);
|
||||
#else
|
||||
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_i, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_r, va1, gvl);
|
||||
vy1 = VFNMSACVF_FLOAT(vy1, temp_i, va0, gvl);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if !defined(XCONJ)
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_i, va1, gvl);
|
||||
vy1 = VFNMSACVF_FLOAT(vy1, temp_r, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_i, va0, gvl);
|
||||
#else
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl);
|
||||
vy0 = VFNMSACVF_FLOAT(vy0, temp_i, va1, gvl);
|
||||
vy1 = VFNMSACVF_FLOAT(vy1, temp_r, va1, gvl);
|
||||
vy1 = VFNMSACVF_FLOAT(vy1, temp_i, va0, gvl);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
a_ptr += lda2;
|
||||
ix += inc_x2;
|
||||
}
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl);
|
||||
j += gvl * 2;
|
||||
iy += inc_yv;
|
||||
}
|
||||
//tail
|
||||
if(j/2 < m){
|
||||
gvl = vsetvli(m-j/2, RVV_EFLOAT, RVV_M);
|
||||
a_ptr = a;
|
||||
ix = 0;
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl);
|
||||
for(i = 0; i < n; i++){
|
||||
#if !defined(XCONJ)
|
||||
temp_r = alpha_r * x[ix] - alpha_i * x[ix+1];
|
||||
temp_i = alpha_r * x[ix+1] + alpha_i * x[ix];
|
||||
#else
|
||||
temp_r = alpha_r * x[ix] + alpha_i * x[ix+1];
|
||||
temp_i = alpha_r * x[ix+1] - alpha_i * x[ix];
|
||||
#endif
|
||||
|
||||
va0 = VLSEV_FLOAT(&a_ptr[j], stride_a, gvl);
|
||||
va1 = VLSEV_FLOAT(&a_ptr[j+1], stride_a, gvl);
|
||||
#if !defined(CONJ)
|
||||
|
||||
#if !defined(XCONJ)
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl);
|
||||
vy0 = VFNMSACVF_FLOAT(vy0, temp_i, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_r, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_i, va0, gvl);
|
||||
#else
|
||||
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_i, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_r, va1, gvl);
|
||||
vy1 = VFNMSACVF_FLOAT(vy1, temp_i, va0, gvl);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if !defined(XCONJ)
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_i, va1, gvl);
|
||||
vy1 = VFNMSACVF_FLOAT(vy1, temp_r, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_i, va0, gvl);
|
||||
#else
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl);
|
||||
vy0 = VFNMSACVF_FLOAT(vy0, temp_i, va1, gvl);
|
||||
vy1 = VFNMSACVF_FLOAT(vy1, temp_r, va1, gvl);
|
||||
vy1 = VFNMSACVF_FLOAT(vy1, temp_i, va0, gvl);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
a_ptr += lda2;
|
||||
ix += inc_x2;
|
||||
}
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl);
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,140 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* * 2013/11/23 Saar
|
||||
* * BLASTEST float : OK
|
||||
* * BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
* *
|
||||
* **************************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
|
||||
{
|
||||
BLASLONG i;
|
||||
BLASLONG ix,iy;
|
||||
BLASLONG j;
|
||||
FLOAT *a_ptr;
|
||||
FLOAT temp_r,temp_i;
|
||||
BLASLONG inc_x2,inc_y2;
|
||||
BLASLONG lda2;
|
||||
BLASLONG i2;
|
||||
|
||||
lda2 = 2*lda;
|
||||
|
||||
iy = 0;
|
||||
a_ptr = a;
|
||||
|
||||
if ( inc_x == 1 && inc_y == 1 )
|
||||
{
|
||||
|
||||
for (j=0; j<n; j++)
|
||||
{
|
||||
temp_r = 0.0;
|
||||
temp_i = 0.0;
|
||||
ix = 0;
|
||||
i2=0;
|
||||
|
||||
for (i=0; i<m; i++)
|
||||
{
|
||||
|
||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||
temp_r += a_ptr[i2] * x[ix] - a_ptr[i2+1] * x[ix+1];
|
||||
temp_i += a_ptr[i2] * x[ix+1] + a_ptr[i2+1] * x[ix];
|
||||
#else
|
||||
temp_r += a_ptr[i2] * x[ix] + a_ptr[i2+1] * x[ix+1];
|
||||
temp_i += a_ptr[i2] * x[ix+1] - a_ptr[i2+1] * x[ix];
|
||||
#endif
|
||||
|
||||
i2 += 2;
|
||||
ix += 2;
|
||||
}
|
||||
|
||||
#if !defined(XCONJ)
|
||||
y[iy] += alpha_r * temp_r - alpha_i * temp_i;
|
||||
y[iy+1] += alpha_r * temp_i + alpha_i * temp_r;
|
||||
#else
|
||||
y[iy] += alpha_r * temp_r + alpha_i * temp_i;
|
||||
y[iy+1] -= alpha_r * temp_i - alpha_i * temp_r;
|
||||
#endif
|
||||
|
||||
a_ptr += lda2;
|
||||
iy += 2;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
inc_x2 = 2 * inc_x;
|
||||
inc_y2 = 2 * inc_y;
|
||||
|
||||
for (j=0; j<n; j++)
|
||||
{
|
||||
temp_r = 0.0;
|
||||
temp_i = 0.0;
|
||||
ix = 0;
|
||||
i2=0;
|
||||
|
||||
for (i=0; i<m; i++)
|
||||
{
|
||||
|
||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||
temp_r += a_ptr[i2] * x[ix] - a_ptr[i2+1] * x[ix+1];
|
||||
temp_i += a_ptr[i2] * x[ix+1] + a_ptr[i2+1] * x[ix];
|
||||
#else
|
||||
temp_r += a_ptr[i2] * x[ix] + a_ptr[i2+1] * x[ix+1];
|
||||
temp_i += a_ptr[i2] * x[ix+1] - a_ptr[i2+1] * x[ix];
|
||||
#endif
|
||||
|
||||
i2 += 2;
|
||||
ix += inc_x2;
|
||||
}
|
||||
|
||||
#if !defined(XCONJ)
|
||||
y[iy] += alpha_r * temp_r - alpha_i * temp_i;
|
||||
y[iy+1] += alpha_r * temp_i + alpha_i * temp_r;
|
||||
#else
|
||||
y[iy] += alpha_r * temp_r + alpha_i * temp_i;
|
||||
y[iy+1] -= alpha_r * temp_i - alpha_i * temp_r;
|
||||
#endif
|
||||
|
||||
a_ptr += lda2;
|
||||
iy += inc_y2;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float32xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float32xm4
|
||||
#define VFNMSACVV_FLOAT vfnmsacvv_float32xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm4
|
||||
#define VFMULVV_FLOAT vfmulvv_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float64xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float64xm4
|
||||
#define VFNMSACVV_FLOAT vfnmsacvv_float64xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm4
|
||||
#define VFMULVV_FLOAT vfmulvv_float64xm4
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
|
||||
{
|
||||
BLASLONG i = 0, j = 0, k = 0;
|
||||
BLASLONG ix = 0, iy = 0;
|
||||
FLOAT *a_ptr = a;
|
||||
FLOAT temp_r, temp_i;
|
||||
|
||||
FLOAT_V_T va0, va1, vx0, vx1, vr, vi;
|
||||
unsigned int gvl = 0;
|
||||
BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2;
|
||||
BLASLONG stride_a = sizeof(FLOAT) * 2;
|
||||
gvl = vsetvli(m, RVV_EFLOAT, RVV_M);
|
||||
BLASLONG inc_xv = inc_x * gvl * 2;
|
||||
BLASLONG inc_av = gvl * 2;
|
||||
BLASLONG inc_y2 = inc_y * 2;
|
||||
BLASLONG lda2 = lda * 2;
|
||||
for(i = 0; i < n; i++){
|
||||
gvl = vsetvli(m, RVV_EFLOAT, RVV_M);
|
||||
j = 0;
|
||||
ix = 0;
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
vi = VFMVVF_FLOAT(0, gvl);
|
||||
for(k = 0; k < m/gvl; k++){
|
||||
va0 = VLSEV_FLOAT(&a_ptr[j], stride_a, gvl);
|
||||
va1 = VLSEV_FLOAT(&a_ptr[j+1], stride_a, gvl);
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||
vr = VFMACCVV_FLOAT(vr, va0, vx0, gvl);
|
||||
vr = VFNMSACVV_FLOAT(vr, va1, vx1, gvl);
|
||||
vi = VFMACCVV_FLOAT(vi, va0, vx1, gvl);
|
||||
vi = VFMACCVV_FLOAT(vi, va1, vx0, gvl);
|
||||
#else
|
||||
vr = VFMACCVV_FLOAT(vr, va0, vx0, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, va1, vx1, gvl);
|
||||
vi = VFMACCVV_FLOAT(vi, va0, vx1, gvl);
|
||||
vi = VFNMSACVV_FLOAT(vi, va1, vx0, gvl);
|
||||
|
||||
#endif
|
||||
j += inc_av;
|
||||
ix += inc_xv;
|
||||
}
|
||||
va0 = VFMVVF_FLOAT(0, gvl);
|
||||
vx0 = VFREDSUM_FLOAT(vr, va0, gvl);
|
||||
temp_r = vx0[0];
|
||||
vx1 = VFREDSUM_FLOAT(vi, va0, gvl);
|
||||
temp_i = vx1[0];
|
||||
if(j/2 < m){
|
||||
gvl = vsetvli(m-j/2, RVV_EFLOAT, RVV_M);
|
||||
va0 = VLSEV_FLOAT(&a_ptr[j], stride_a, gvl);
|
||||
va1 = VLSEV_FLOAT(&a_ptr[j+1], stride_a, gvl);
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||
vr = VFMULVV_FLOAT(va0, vx0, gvl);
|
||||
vr = VFNMSACVV_FLOAT(vr, va1, vx1, gvl);
|
||||
vi = VFMULVV_FLOAT(va0, vx1, gvl);
|
||||
vi = VFMACCVV_FLOAT(vi, va1, vx0, gvl);
|
||||
#else
|
||||
vr = VFMULVV_FLOAT(va0, vx0, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, va1, vx1, gvl);
|
||||
vi = VFMULVV_FLOAT(va0, vx1, gvl);
|
||||
vi = VFNMSACVV_FLOAT(vi, va1, vx0, gvl);
|
||||
|
||||
#endif
|
||||
va0 = VFMVVF_FLOAT(0, gvl);
|
||||
vx0 = VFREDSUM_FLOAT(vr, va0, gvl);
|
||||
temp_r += vx0[0];
|
||||
vx1 = VFREDSUM_FLOAT(vi, va0, gvl);
|
||||
temp_i += vx1[0];
|
||||
}
|
||||
#if !defined(XCONJ)
|
||||
y[iy] += alpha_r * temp_r - alpha_i * temp_i;
|
||||
y[iy+1] += alpha_r * temp_i + alpha_i * temp_r;
|
||||
#else
|
||||
y[iy] += alpha_r * temp_r + alpha_i * temp_i;
|
||||
y[iy+1] -= alpha_r * temp_i - alpha_i * temp_r;
|
||||
#endif
|
||||
iy += inc_y2;
|
||||
a_ptr += lda2;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
|
@ -0,0 +1,191 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VSSEV_FLOAT vssev_float32xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float32xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float32xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float32xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm4
|
||||
#define VFMULVV_FLOAT vfmulvv_float32xm4
|
||||
#define VFNMSACVF_FLOAT vfnmsacvf_float32xm4
|
||||
#define VFNMSACVV_FLOAT vfnmsacvv_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VSSEV_FLOAT vssev_float64xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float64xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float64xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float64xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm4
|
||||
#define VFMULVV_FLOAT vfmulvv_float64xm4
|
||||
#define VFNMSACVF_FLOAT vfnmsacvf_float64xm4
|
||||
#define VFNMSACVV_FLOAT vfnmsacvv_float64xm4
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){
|
||||
BLASLONG i, j, k;
|
||||
BLASLONG ix, iy, ia;
|
||||
BLASLONG jx, jy, ja;
|
||||
FLOAT temp_r1, temp_i1;
|
||||
FLOAT temp_r2, temp_i2;
|
||||
FLOAT *a_ptr = a;
|
||||
unsigned int gvl = 0;
|
||||
|
||||
|
||||
FLOAT_V_T va0, va1, vx0, vx1, vy0, vy1, vr0, vr1;
|
||||
BLASLONG stride_x, stride_y, stride_a, inc_xv, inc_yv, inc_av, len, lda2;
|
||||
|
||||
BLASLONG inc_x2 = incx * 2;
|
||||
BLASLONG inc_y2 = incy * 2;
|
||||
stride_x = inc_x2 * sizeof(FLOAT);
|
||||
stride_y = inc_y2 * sizeof(FLOAT);
|
||||
stride_a = 2 * sizeof(FLOAT);
|
||||
lda2 = lda * 2;
|
||||
|
||||
jx = 0;
|
||||
jy = 0;
|
||||
ja = 0;
|
||||
for(j = 0; j < offset; j++){
|
||||
temp_r1 = alpha_r * x[jx] - alpha_i * x[jx+1];;
|
||||
temp_i1 = alpha_r * x[jx+1] + alpha_i * x[jx];
|
||||
temp_r2 = 0;
|
||||
temp_i2 = 0;
|
||||
y[jy] += temp_r1 * a_ptr[ja];
|
||||
y[jy+1] += temp_i1 * a_ptr[ja];
|
||||
ix = jx + inc_x2;
|
||||
iy = jy + inc_y2;
|
||||
ia = ja + 2;
|
||||
i = j + 1;
|
||||
len = m - i;
|
||||
if(len > 0){
|
||||
gvl = vsetvli(len, RVV_EFLOAT, RVV_M);
|
||||
inc_xv = incx * gvl * 2;
|
||||
inc_yv = incy * gvl * 2;
|
||||
inc_av = gvl * 2;
|
||||
vr0 = VFMVVF_FLOAT(0, gvl);
|
||||
vr1 = VFMVVF_FLOAT(0, gvl);
|
||||
for(k = 0; k < len / gvl; k++){
|
||||
va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl);
|
||||
va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl);
|
||||
#ifndef HEMVREV
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl);
|
||||
vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl);
|
||||
#else
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl);
|
||||
vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl);
|
||||
#endif
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl);
|
||||
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
#ifndef HEMVREV
|
||||
vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl);
|
||||
vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl);
|
||||
vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl);
|
||||
vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl);
|
||||
#else
|
||||
vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl);
|
||||
vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl);
|
||||
vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl);
|
||||
vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl);
|
||||
|
||||
#endif
|
||||
i += gvl;
|
||||
ix += inc_xv;
|
||||
iy += inc_yv;
|
||||
ia += inc_av;
|
||||
}
|
||||
va0 = VFMVVF_FLOAT(0, gvl);
|
||||
vx0 = VFREDSUM_FLOAT(vr0, va0, gvl);
|
||||
temp_r2 = vx0[0];
|
||||
vx1 = VFREDSUM_FLOAT(vr1, va0, gvl);
|
||||
temp_i2 = vx1[0];
|
||||
if(i < m){
|
||||
gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M);
|
||||
va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl);
|
||||
va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl);
|
||||
#ifndef HEMVREV
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl);
|
||||
vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl);
|
||||
#else
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl);
|
||||
vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl);
|
||||
#endif
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl);
|
||||
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
#ifndef HEMVREV
|
||||
vr0 = VFMULVV_FLOAT(vx0, va0, gvl);
|
||||
vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl);
|
||||
vr1 = VFMULVV_FLOAT(vx1, va0, gvl);
|
||||
vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl);
|
||||
#else
|
||||
vr0 = VFMULVV_FLOAT(vx0, va0, gvl);
|
||||
vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl);
|
||||
vr1 = VFMULVV_FLOAT(vx1, va0, gvl);
|
||||
vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl);
|
||||
#endif
|
||||
|
||||
va0 = VFMVVF_FLOAT(0, gvl);
|
||||
vx0 = VFREDSUM_FLOAT(vr0, va0, gvl);
|
||||
temp_r2 += vx0[0];
|
||||
vx1 = VFREDSUM_FLOAT(vr1, va0, gvl);
|
||||
temp_i2 += vx1[0];
|
||||
}
|
||||
}
|
||||
y[jy] += alpha_r * temp_r2 - alpha_i * temp_i2;
|
||||
y[jy+1] += alpha_r * temp_i2 + alpha_i * temp_r2;
|
||||
jx += inc_x2;
|
||||
jy += inc_y2;
|
||||
ja += 2;
|
||||
a_ptr += lda2;
|
||||
}
|
||||
return(0);
|
||||
}
|
|
@ -0,0 +1,192 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VSSEV_FLOAT vssev_float32xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float32xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float32xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float32xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm4
|
||||
#define VFMULVV_FLOAT vfmulvv_float32xm4
|
||||
#define VFNMSACVF_FLOAT vfnmsacvf_float32xm4
|
||||
#define VFNMSACVV_FLOAT vfnmsacvv_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VSSEV_FLOAT vssev_float64xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float64xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float64xm4
|
||||
#define VFMACCVF_FLOAT vfmaccvf_float64xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm4
|
||||
#define VFMULVV_FLOAT vfmulvv_float64xm4
|
||||
#define VFNMSACVF_FLOAT vfnmsacvf_float64xm4
|
||||
#define VFNMSACVV_FLOAT vfnmsacvv_float64xm4
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){
|
||||
BLASLONG i, j, k;
|
||||
BLASLONG ix, iy, ia;
|
||||
BLASLONG jx, jy, ja;
|
||||
FLOAT temp_r1, temp_i1;
|
||||
FLOAT temp_r2, temp_i2;
|
||||
FLOAT *a_ptr = a;
|
||||
unsigned int gvl = 0;
|
||||
|
||||
|
||||
FLOAT_V_T va0, va1, vx0, vx1, vy0, vy1, vr0, vr1;
|
||||
BLASLONG stride_x, stride_y, stride_a, inc_xv, inc_yv, inc_av, lda2;
|
||||
|
||||
BLASLONG inc_x2 = incx * 2;
|
||||
BLASLONG inc_y2 = incy * 2;
|
||||
stride_x = inc_x2 * sizeof(FLOAT);
|
||||
stride_y = inc_y2 * sizeof(FLOAT);
|
||||
stride_a = 2 * sizeof(FLOAT);
|
||||
lda2 = lda * 2;
|
||||
|
||||
BLASLONG m1 = m - offset;
|
||||
a_ptr = a + m1 * lda2;
|
||||
jx = m1 * inc_x2;
|
||||
jy = m1 * inc_y2;
|
||||
ja = m1 * 2;
|
||||
for(j = m1; j < m; j++){
|
||||
temp_r1 = alpha_r * x[jx] - alpha_i * x[jx+1];;
|
||||
temp_i1 = alpha_r * x[jx+1] + alpha_i * x[jx];
|
||||
temp_r2 = 0;
|
||||
temp_i2 = 0;
|
||||
ix = 0;
|
||||
iy = 0;
|
||||
ia = 0;
|
||||
i = 0;
|
||||
if(j > 0){
|
||||
gvl = vsetvli(j, RVV_EFLOAT, RVV_M);
|
||||
inc_xv = incx * gvl * 2;
|
||||
inc_yv = incy * gvl * 2;
|
||||
inc_av = gvl * 2;
|
||||
vr0 = VFMVVF_FLOAT(0, gvl);
|
||||
vr1 = VFMVVF_FLOAT(0, gvl);
|
||||
for(k = 0; k < j / gvl; k++){
|
||||
va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl);
|
||||
va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl);
|
||||
#ifndef HEMVREV
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl);
|
||||
vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl);
|
||||
#else
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl);
|
||||
vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl);
|
||||
#endif
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl);
|
||||
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
#ifndef HEMVREV
|
||||
vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl);
|
||||
vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl);
|
||||
vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl);
|
||||
vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl);
|
||||
#else
|
||||
vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl);
|
||||
vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl);
|
||||
vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl);
|
||||
vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl);
|
||||
|
||||
#endif
|
||||
i += gvl;
|
||||
ix += inc_xv;
|
||||
iy += inc_yv;
|
||||
ia += inc_av;
|
||||
}
|
||||
va0 = VFMVVF_FLOAT(0, gvl);
|
||||
vx0 = VFREDSUM_FLOAT(vr0, va0, gvl);
|
||||
temp_r2 = vx0[0];
|
||||
vx1 = VFREDSUM_FLOAT(vr1, va0, gvl);
|
||||
temp_i2 = vx1[0];
|
||||
if(i < j){
|
||||
gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M);
|
||||
va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl);
|
||||
va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl);
|
||||
vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl);
|
||||
vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl);
|
||||
#ifndef HEMVREV
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl);
|
||||
vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl);
|
||||
#else
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl);
|
||||
vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl);
|
||||
vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl);
|
||||
#endif
|
||||
VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl);
|
||||
VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl);
|
||||
|
||||
vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
#ifndef HEMVREV
|
||||
vr0 = VFMULVV_FLOAT(vx0, va0, gvl);
|
||||
vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl);
|
||||
vr1 = VFMULVV_FLOAT(vx1, va0, gvl);
|
||||
vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl);
|
||||
#else
|
||||
vr0 = VFMULVV_FLOAT(vx0, va0, gvl);
|
||||
vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl);
|
||||
vr1 = VFMULVV_FLOAT(vx1, va0, gvl);
|
||||
vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl);
|
||||
#endif
|
||||
|
||||
va0 = VFMVVF_FLOAT(0, gvl);
|
||||
vx0 = VFREDSUM_FLOAT(vr0, va0, gvl);
|
||||
temp_r2 += vx0[0];
|
||||
vx1 = VFREDSUM_FLOAT(vr1, va0, gvl);
|
||||
temp_i2 += vx1[0];
|
||||
}
|
||||
}
|
||||
y[jy] += temp_r1 * a_ptr[ja];
|
||||
y[jy+1] += temp_i1 * a_ptr[ja];
|
||||
y[jy] += alpha_r * temp_r2 - alpha_i * temp_i2;
|
||||
y[jy+1] += alpha_r * temp_i2 + alpha_i * temp_r2;
|
||||
jx += inc_x2;
|
||||
jy += inc_y2;
|
||||
ja += 2;
|
||||
a_ptr += lda2;
|
||||
}
|
||||
return(0);
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**************************************************************************************
|
||||
* 2013/09/13 Saar
|
||||
* BLASTEST float : OK
|
||||
* BLASTEST double : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
*
|
||||
**************************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(DOUBLE)
|
||||
|
||||
#define ABS fabs
|
||||
|
||||
#else
|
||||
|
||||
#define ABS fabsf
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
FLOAT scale = 0.0;
|
||||
FLOAT ssq = 1.0;
|
||||
BLASLONG inc_x2;
|
||||
FLOAT temp;
|
||||
|
||||
if (n <= 0 || inc_x <= 0) return(0.0);
|
||||
|
||||
inc_x2 = 2 * inc_x;
|
||||
|
||||
n *= inc_x2;
|
||||
while(i < n)
|
||||
{
|
||||
|
||||
if ( x[i] != 0.0 )
|
||||
{
|
||||
temp = ABS( x[i] );
|
||||
if ( scale < temp )
|
||||
{
|
||||
ssq = 1 + ssq * ( scale / temp ) * ( scale / temp );
|
||||
scale = temp ;
|
||||
}
|
||||
else
|
||||
{
|
||||
ssq += ( temp / scale ) * ( temp / scale );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ( x[i+1] != 0.0 )
|
||||
{
|
||||
temp = ABS( x[i+1] );
|
||||
if ( scale < temp )
|
||||
{
|
||||
ssq = 1 + ssq * ( scale / temp ) * ( scale / temp );
|
||||
scale = temp ;
|
||||
}
|
||||
else
|
||||
{
|
||||
ssq += ( temp / scale ) * ( temp / scale );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
i += inc_x2;
|
||||
}
|
||||
scale = scale * sqrt( ssq );
|
||||
return(scale);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,278 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if !defined(DOUBLE)
|
||||
#define RVV_EFLOAT RVV_E32
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float32xm4_t
|
||||
#define VLEV_FLOAT vlev_float32xm4
|
||||
#define VLSEV_FLOAT vlsev_float32xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float32xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float32xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float32xm4
|
||||
#define VFDOTVV_FLOAT vfdotvv_float32xm4
|
||||
#define ABS fabsf
|
||||
#define MASK_T e32xm4_t
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm4
|
||||
#define VMFGTVF_FLOAT vmfgtvf_e32xm4_float32xm4
|
||||
#define VMFIRSTM vmfirstm_e32xm4
|
||||
#define VFDIVVF_FLOAT vfdivvf_float32xm4
|
||||
#define VMFLTVF_FLOAT vmfltvf_e32xm4_float32xm4
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm4
|
||||
#else
|
||||
#define RVV_EFLOAT RVV_E64
|
||||
#define RVV_M RVV_M4
|
||||
#define FLOAT_V_T float64xm4_t
|
||||
#define VLEV_FLOAT vlev_float64xm4
|
||||
#define VLSEV_FLOAT vlsev_float64xm4
|
||||
#define VFREDSUM_FLOAT vfredsumvs_float64xm4
|
||||
#define VFMACCVV_FLOAT vfmaccvv_float64xm4
|
||||
#define VFMVVF_FLOAT vfmvvf_float64xm4
|
||||
#define VFDOTVV_FLOAT vfdotvv_float64xm4
|
||||
#define ABS fabs
|
||||
#define MASK_T e64xm4_t
|
||||
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm4
|
||||
#define VMFGTVF_FLOAT vmfgtvf_e64xm4_float64xm4
|
||||
#define VMFIRSTM vmfirstm_e64xm4
|
||||
#define VFDIVVF_FLOAT vfdivvf_float64xm4
|
||||
#define VMFLTVF_FLOAT vmfltvf_e64xm4_float64xm4
|
||||
#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm4
|
||||
#endif
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
|
||||
if ( n < 0 ) return(0.0);
|
||||
// if(n == 1) return (ABS(x[0]));
|
||||
|
||||
FLOAT_V_T vr, v0, v_zero;
|
||||
unsigned int gvl = 0;
|
||||
FLOAT scale = 0.0, ssq = 0.0;
|
||||
MASK_T mask;
|
||||
BLASLONG index = 0;
|
||||
if(inc_x == 1){
|
||||
BLASLONG n2 = n * 2;
|
||||
gvl = vsetvli(n2, RVV_EFLOAT, RVV_M);
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
v_zero = VFMVVF_FLOAT(0, gvl);
|
||||
for(i=0,j=0; i<n2/gvl; i++){
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl);
|
||||
//if scale change
|
||||
mask = VMFGTVF_FLOAT(v0, scale, gvl);
|
||||
index = VMFIRSTM(mask, gvl);
|
||||
if(index == -1){//no elements greater than scale
|
||||
if(scale != 0.0){
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, v0, v0, gvl);
|
||||
}
|
||||
}else{//found greater element
|
||||
//ssq in vector vr: vr[0]
|
||||
vr = VFREDSUM_FLOAT(vr, v_zero, gvl);
|
||||
//total ssq before current vector
|
||||
ssq += vr[0];
|
||||
//find max
|
||||
vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl);
|
||||
//update ssq before max_index
|
||||
ssq = ssq * (scale/vr[0])*(scale/vr[0]);
|
||||
//update scale
|
||||
scale = vr[0];
|
||||
//ssq in vector vr
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl);
|
||||
}
|
||||
j += gvl;
|
||||
}
|
||||
//ssq in vector vr: vr[0]
|
||||
vr = VFREDSUM_FLOAT(vr, v_zero, gvl);
|
||||
//total ssq now
|
||||
ssq += vr[0];
|
||||
|
||||
//tail
|
||||
if(j < n2){
|
||||
gvl = vsetvli(n2-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl);
|
||||
//if scale change
|
||||
mask = VMFGTVF_FLOAT(v0, scale, gvl);
|
||||
index = VMFIRSTM(mask, gvl);
|
||||
if(index == -1){//no elements greater than scale
|
||||
if(scale != 0.0)
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
}else{//found greater element
|
||||
//find max
|
||||
vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl);
|
||||
//update ssq before max_index
|
||||
ssq = ssq * (scale/vr[0])*(scale/vr[0]);
|
||||
//update scale
|
||||
scale = vr[0];
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
}
|
||||
vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl);
|
||||
//ssq in vector vr: vr[0]
|
||||
vr = VFREDSUM_FLOAT(vr, v_zero, gvl);
|
||||
//total ssq now
|
||||
ssq += vr[0];
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
|
||||
vr = VFMVVF_FLOAT(0, gvl);
|
||||
v_zero = VFMVVF_FLOAT(0, gvl);
|
||||
unsigned int stride_x = inc_x * sizeof(FLOAT) * 2;
|
||||
int idx = 0, inc_v = inc_x * gvl * 2;
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl);
|
||||
//if scale change
|
||||
mask = VMFGTVF_FLOAT(v0, scale, gvl);
|
||||
index = VMFIRSTM(mask, gvl);
|
||||
if(index == -1){//no elements greater than scale
|
||||
if(scale != 0.0){
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, v0, v0, gvl);
|
||||
}
|
||||
}else{//found greater element
|
||||
//ssq in vector vr: vr[0]
|
||||
vr = VFREDSUM_FLOAT(vr, v_zero, gvl);
|
||||
//total ssq before current vector
|
||||
ssq += vr[0];
|
||||
//find max
|
||||
vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl);
|
||||
//update ssq before max_index
|
||||
ssq = ssq * (scale/vr[0])*(scale/vr[0]);
|
||||
//update scale
|
||||
scale = vr[0];
|
||||
//ssq in vector vr
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl);
|
||||
}
|
||||
|
||||
v0 = VLSEV_FLOAT(&x[idx+1], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl);
|
||||
//if scale change
|
||||
mask = VMFGTVF_FLOAT(v0, scale, gvl);
|
||||
index = VMFIRSTM(mask, gvl);
|
||||
if(index == -1){//no elements greater than scale
|
||||
if(scale != 0.0){
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, v0, v0, gvl);
|
||||
}
|
||||
}else{//found greater element
|
||||
//ssq in vector vr: vr[0]
|
||||
vr = VFREDSUM_FLOAT(vr, v_zero, gvl);
|
||||
//total ssq before current vector
|
||||
ssq += vr[0];
|
||||
//find max
|
||||
vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl);
|
||||
//update ssq before max_index
|
||||
ssq = ssq * (scale/vr[0])*(scale/vr[0]);
|
||||
//update scale
|
||||
scale = vr[0];
|
||||
//ssq in vector vr
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl);
|
||||
}
|
||||
j += gvl;
|
||||
idx += inc_v;
|
||||
}
|
||||
//ssq in vector vr: vr[0]
|
||||
vr = VFREDSUM_FLOAT(vr, v_zero, gvl);
|
||||
//total ssq now
|
||||
ssq += vr[0];
|
||||
|
||||
//tail
|
||||
if(j < n){
|
||||
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
|
||||
v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl);
|
||||
//if scale change
|
||||
mask = VMFGTVF_FLOAT(v0, scale, gvl);
|
||||
index = VMFIRSTM(mask, gvl);
|
||||
if(index == -1){//no elements greater than scale
|
||||
if(scale != 0.0){
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl);
|
||||
}
|
||||
}else{//found greater element
|
||||
//find max
|
||||
vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl);
|
||||
//update ssq before max_index
|
||||
ssq = ssq * (scale/vr[0])*(scale/vr[0]);
|
||||
//update scale
|
||||
scale = vr[0];
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl);
|
||||
}
|
||||
|
||||
v0 = VLSEV_FLOAT(&x[idx+1], stride_x, gvl);
|
||||
//fabs(vector)
|
||||
mask = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl);
|
||||
//if scale change
|
||||
mask = VMFGTVF_FLOAT(v0, scale, gvl);
|
||||
index = VMFIRSTM(mask, gvl);
|
||||
if(index == -1){//no elements greater than scale
|
||||
if(scale != 0.0){
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
vr = VFMACCVV_FLOAT(vr, v0, v0, gvl);
|
||||
}
|
||||
}else{//found greater element
|
||||
//ssq in vector vr: vr[0]
|
||||
vr = VFREDSUM_FLOAT(vr, v_zero, gvl);
|
||||
//total ssq before current vector
|
||||
ssq += vr[0];
|
||||
//find max
|
||||
vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl);
|
||||
//update ssq before max_index
|
||||
ssq = ssq * (scale/vr[0])*(scale/vr[0]);
|
||||
//update scale
|
||||
scale = vr[0];
|
||||
v0 = VFDIVVF_FLOAT(v0, scale, gvl);
|
||||
vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl);
|
||||
}
|
||||
//ssq in vector vr: vr[0]
|
||||
vr = VFREDSUM_FLOAT(vr, v_zero, gvl);
|
||||
//total ssq now
|
||||
ssq += vr[0];
|
||||
}
|
||||
}
|
||||
return(scale * sqrt(ssq));
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order ColMajor
|
||||
* No Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
bptr = b;
|
||||
|
||||
lda *= 2;
|
||||
ldb *= 2;
|
||||
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
ia = 0;
|
||||
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[ia] = alpha_r * aptr[ia] - alpha_i * aptr[ia+1];
|
||||
bptr[ia+1] = alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia+=2;
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order ColMajor
|
||||
* No Trans, conjugate
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
bptr = b;
|
||||
lda *= 2;
|
||||
ldb *= 2;
|
||||
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
ia = 0;
|
||||
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[ia] = alpha_r * aptr[ia] + alpha_i * aptr[ia+1];
|
||||
bptr[ia+1] = - alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia += 2;
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order ColMajor
|
||||
* Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia,ib;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
|
||||
lda *= 2;
|
||||
ldb *= 2;
|
||||
ib = 0;
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
bptr = &b[ib];
|
||||
ia = 0;
|
||||
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[0] = alpha_r * aptr[ia] - alpha_i * aptr[ia+1];
|
||||
bptr[1] = alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia += 2;
|
||||
bptr += ldb;
|
||||
}
|
||||
aptr += lda;
|
||||
ib += 2;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order ColMajor
|
||||
* Trans, conjugate
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia,ib;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
|
||||
lda *= 2;
|
||||
ldb *= 2;
|
||||
ib = 0;
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
bptr = &b[ib];
|
||||
ia = 0;
|
||||
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[0] = alpha_r * aptr[ia] + alpha_i * aptr[ia+1];
|
||||
bptr[1] = - alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia += 2;
|
||||
bptr += ldb;
|
||||
}
|
||||
aptr += lda;
|
||||
ib += 2;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order rowMajor
|
||||
* No Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
bptr = b;
|
||||
|
||||
lda *=2;
|
||||
ldb *=2;
|
||||
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
ia = 0;
|
||||
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[ia] = alpha_r * aptr[ia] - alpha_i * aptr[ia+1];
|
||||
bptr[ia+1] = alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia += 2;
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order rowMajor
|
||||
* No Trans , conjugate
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
bptr = b;
|
||||
|
||||
lda *=2;
|
||||
ldb *=2;
|
||||
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
ia = 0;
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[ia] = alpha_r * aptr[ia] + alpha_i * aptr[ia+1];
|
||||
bptr[ia+1] = - alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia += 2;
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order rowMajor
|
||||
* Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia,ib;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
|
||||
lda *= 2;
|
||||
ldb *= 2;
|
||||
ib = 0;
|
||||
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
bptr = &b[ib];
|
||||
ia = 0;
|
||||
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[0] = alpha_r * aptr[ia] - alpha_i * aptr[ia+1];
|
||||
bptr[1] = alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia += 2;
|
||||
bptr += ldb;
|
||||
}
|
||||
aptr += lda;
|
||||
ib += 2;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order rowMajor
|
||||
* Trans, conjugate
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia,ib;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
|
||||
lda *= 2;
|
||||
ldb *= 2;
|
||||
ib = 0;
|
||||
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
bptr = &b[ib];
|
||||
ia = 0;
|
||||
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[0] = alpha_r * aptr[ia] + alpha_i * aptr[ia+1];
|
||||
bptr[1] = - alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia += 2;
|
||||
bptr += ldb;
|
||||
}
|
||||
aptr += lda;
|
||||
ib += 2;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue