From 5729088994b42077c7aaa4b2e713a6e3bf79b312 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Mon, 16 Sep 2013 14:41:37 +0200 Subject: [PATCH] Initial checkin of port for ARM --- Makefile.arm | 3 + Makefile.system | 88 ++++++++++++++++-- c_check | 5 +- common.h | 21 +++++ common_arm.h | 167 +++++++++++++++++++++++++++++++++ ctest.c | 16 ++++ getarch.c | 15 +++ kernel/Makefile.L3 | 198 +++------------------------------------- kernel/arm/KERNEL | 46 ++++++++++ kernel/arm/KERNEL.ARMV7 | 137 +++++++++++++++++++++++++++ kernel/arm/Makefile | 2 + kernel/arm/amax.c | 65 +++++++++++++ kernel/arm/amin.c | 65 +++++++++++++ kernel/arm/asum.c | 59 ++++++++++++ kernel/arm/axpy.c | 56 ++++++++++++ kernel/arm/copy.c | 51 +++++++++++ kernel/arm/dot.c | 56 ++++++++++++ kernel/arm/gemv_n.c | 58 ++++++++++++ kernel/arm/gemv_t.c | 59 ++++++++++++ kernel/arm/iamax.c | 67 ++++++++++++++ kernel/arm/iamin.c | 67 ++++++++++++++ kernel/arm/imax.c | 58 ++++++++++++ kernel/arm/imin.c | 56 ++++++++++++ kernel/arm/izamax.c | 73 +++++++++++++++ kernel/arm/izamin.c | 73 +++++++++++++++ kernel/arm/max.c | 55 +++++++++++ kernel/arm/min.c | 55 +++++++++++ kernel/arm/nrm2.c | 80 ++++++++++++++++ kernel/arm/rot.c | 54 +++++++++++ kernel/arm/scal.c | 50 ++++++++++ kernel/arm/swap.c | 54 +++++++++++ kernel/arm/zamax.c | 73 +++++++++++++++ kernel/arm/zamin.c | 73 +++++++++++++++ kernel/arm/zasum.c | 63 +++++++++++++ kernel/arm/zaxpy.c | 64 +++++++++++++ kernel/arm/zcopy.c | 55 +++++++++++ kernel/arm/zdot.c | 70 ++++++++++++++ kernel/arm/zgemv_n.c | 117 ++++++++++++++++++++++++ kernel/arm/zgemv_t.c | 123 +++++++++++++++++++++++++ kernel/arm/znrm2.c | 98 ++++++++++++++++++++ kernel/arm/zrot.c | 60 ++++++++++++ kernel/arm/zscal.c | 56 ++++++++++++ kernel/arm/zswap.c | 62 +++++++++++++ param.h | 44 +++++++++ 44 files changed, 2671 insertions(+), 196 deletions(-) create mode 100644 Makefile.arm create mode 100644 common_arm.h create mode 100644 kernel/arm/KERNEL create mode 100644 kernel/arm/KERNEL.ARMV7 create mode 100644 kernel/arm/Makefile create mode 100644 kernel/arm/amax.c create mode 100644 kernel/arm/amin.c create mode 100644 kernel/arm/asum.c create mode 100644 kernel/arm/axpy.c create mode 100644 kernel/arm/copy.c create mode 100644 kernel/arm/dot.c create mode 100644 kernel/arm/gemv_n.c create mode 100644 kernel/arm/gemv_t.c create mode 100644 kernel/arm/iamax.c create mode 100644 kernel/arm/iamin.c create mode 100644 kernel/arm/imax.c create mode 100644 kernel/arm/imin.c create mode 100644 kernel/arm/izamax.c create mode 100644 kernel/arm/izamin.c create mode 100644 kernel/arm/max.c create mode 100644 kernel/arm/min.c create mode 100644 kernel/arm/nrm2.c create mode 100644 kernel/arm/rot.c create mode 100644 kernel/arm/scal.c create mode 100644 kernel/arm/swap.c create mode 100644 kernel/arm/zamax.c create mode 100644 kernel/arm/zamin.c create mode 100644 kernel/arm/zasum.c create mode 100644 kernel/arm/zaxpy.c create mode 100644 kernel/arm/zcopy.c create mode 100644 kernel/arm/zdot.c create mode 100644 kernel/arm/zgemv_n.c create mode 100644 kernel/arm/zgemv_t.c create mode 100644 kernel/arm/znrm2.c create mode 100644 kernel/arm/zrot.c create mode 100644 kernel/arm/zscal.c create mode 100644 kernel/arm/zswap.c diff --git a/Makefile.arm b/Makefile.arm new file mode 100644 index 000000000..05ea9c679 --- /dev/null +++ b/Makefile.arm @@ -0,0 +1,3 @@ +ifdef BINARY64 +else +endif diff --git a/Makefile.system b/Makefile.system index 5c5801361..e5358f65b 100644 --- a/Makefile.system +++ b/Makefile.system @@ -77,6 +77,11 @@ ifndef NO_PARALLEL_MAKE NO_PARALLEL_MAKE=0 endif GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE) + +ifeq ($(HOSTCC), loongcc) +GETARCH_FLAGS += -static +endif + # This operation is expensive, so execution should be once. ifndef GOTOBLAS_MAKEFILE export GOTOBLAS_MAKEFILE = 1 @@ -184,8 +189,15 @@ ifeq ($(GCCMINORVERSIONGTEQ7), 1) CCOMMON_OPT += -DMS_ABI endif endif - endif + +# Ensure the correct stack alignment on Win32 +# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97 +ifeq ($(ARCH), x86) +CCOMMON_OPT += -mincoming-stack-boundary=2 +FCOMMON_OPT += -mincoming-stack-boundary=2 +endif + endif ifeq ($(OSNAME), Interix) @@ -240,7 +252,8 @@ NO_BINARY_MODE = 1 endif ifndef NO_EXPRECISION ifeq ($(F_COMPILER), GFORTRAN) -ifeq ($(C_COMPILER), GCC) +# ifeq logical or. GCC or LSB +ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB)) EXPRECISION = 1 CCOMMON_OPT += -DEXPRECISION -m128bit-long-double FCOMMON_OPT += -m128bit-long-double @@ -257,7 +270,8 @@ endif ifeq ($(ARCH), x86_64) ifndef NO_EXPRECISION ifeq ($(F_COMPILER), GFORTRAN) -ifeq ($(C_COMPILER), GCC) +# ifeq logical or. GCC or LSB +ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB)) EXPRECISION = 1 CCOMMON_OPT += -DEXPRECISION -m128bit-long-double FCOMMON_OPT += -m128bit-long-double @@ -276,7 +290,8 @@ CCOMMON_OPT += -wd981 endif ifeq ($(USE_OPENMP), 1) -ifeq ($(C_COMPILER), GCC) +# ifeq logical or. GCC or LSB +ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB)) CCOMMON_OPT += -fopenmp endif @@ -309,14 +324,16 @@ ifeq ($(ARCH), x86) DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO ifneq ($(NO_AVX), 1) -DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER +DYNAMIC_CORE += SANDYBRIDGE +#BULLDOZER PILEDRIVER endif endif ifeq ($(ARCH), x86_64) DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO ifneq ($(NO_AVX), 1) -DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER +DYNAMIC_CORE += SANDYBRIDGE +#BULLDOZER PILEDRIVER endif endif @@ -346,19 +363,24 @@ NO_BINARY_MODE = 1 BINARY_DEFINED = 1 endif +ifeq ($(ARCH), arm) +NO_BINARY_MODE = 1 +BINARY_DEFINED = 1 +endif # # C Compiler dependent settings # -# ifeq logical or. GCC or CLANG + +# ifeq logical or. GCC or CLANG or LSB # http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or -ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG)) +ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG LSB)) CCOMMON_OPT += -Wall COMMON_PROF += -fno-inline NO_UNINITIALIZED_WARN = -Wno-uninitialized ifeq ($(QUIET_MAKE), 1) -CCOMMON_OPT += $(NO_UNINITIALIZED_WARN) +CCOMMON_OPT += $(NO_UNINITIALIZED_WARN) -Wno-unused endif ifdef NO_BINARY_MODE @@ -445,9 +467,12 @@ endif ifeq ($(F_COMPILER), GFORTRAN) CCOMMON_OPT += -DF_INTERFACE_GFORT FCOMMON_OPT += -Wall +#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc ifneq ($(NO_LAPACK), 1) +ifneq ($(C_COMPILER), LSB) EXTRALIB += -lgfortran endif +endif ifdef NO_BINARY_MODE ifeq ($(ARCH), mips64) ifdef BINARY64 @@ -554,11 +579,28 @@ ifdef INTERFACE64 FCOMMON_OPT += -i8 endif endif + +ifeq ($(ARCH), mips64) +ifndef BINARY64 +FCOMMON_OPT += -n32 +else +FCOMMON_OPT += -n64 +endif +ifeq ($(CORE), LOONGSON3A) +FCOMMON_OPT += -loongson3 -static +endif + +ifeq ($(CORE), LOONGSON3B) +FCOMMON_OPT += -loongson3 -static +endif + +else ifndef BINARY64 FCOMMON_OPT += -m32 else FCOMMON_OPT += -m64 endif +endif ifdef USE_OPENMP FEXTRALIB += -lstdc++ @@ -567,12 +609,30 @@ endif endif ifeq ($(C_COMPILER), OPEN64) + +ifeq ($(ARCH), mips64) +ifndef BINARY64 +CCOMMON_OPT += -n32 +else +CCOMMON_OPT += -n64 +endif +ifeq ($(CORE), LOONGSON3A) +CCOMMON_OPT += -loongson3 -static +endif + +ifeq ($(CORE), LOONGSON3B) +CCOMMON_OPT += -loongson3 -static +endif + +else + ifndef BINARY64 CCOMMON_OPT += -m32 else CCOMMON_OPT += -m64 endif endif +endif ifeq ($(C_COMPILER), SUN) CCOMMON_OPT += -w @@ -781,6 +841,15 @@ override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF) #MAKEOVERRIDES = +LAPACK_CFLAGS = $(CFLAGS) +LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H +ifdef INTERFACE64 +LAPACK_CFLAGS += -DLAPACK_ILP64 +endif +ifeq ($(C_COMPILER), LSB) +LAPACK_CFLAGS += -DLAPACK_COMPLEX_STRUCTURE +endif + ifndef SUFFIX SUFFIX = o endif @@ -832,6 +901,7 @@ export CC export FC export BU export FU +export NEED2UNDERSCORES export USE_THREAD export NUM_THREADS export NUM_CORES diff --git a/c_check b/c_check index acd8df5f4..c1cdd59c4 100644 --- a/c_check +++ b/c_check @@ -33,6 +33,7 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) { } $compiler = ""; +$compiler = LSB if ($data =~ /COMPILER_LSB/); $compiler = CLANG if ($data =~ /COMPILER_CLANG/); $compiler = PGI if ($data =~ /COMPILER_PGI/); $compiler = PATHSCALE if ($data =~ /COMPILER_PATHSCALE/); @@ -62,6 +63,7 @@ $architecture = mips64 if ($data =~ /ARCH_MIPS64/); $architecture = alpha if ($data =~ /ARCH_ALPHA/); $architecture = sparc if ($data =~ /ARCH_SPARC/); $architecture = ia64 if ($data =~ /ARCH_IA64/); +$architecture = arm if ($data =~ /ARCH_ARM/); $defined = 0; @@ -122,7 +124,7 @@ if ($compiler eq "CLANG") { $openmp = "-fopenmp"; } -if ($compiler eq "GCC") { +if ($compiler eq "GCC" || $compiler eq "LSB") { $openmp = "-fopenmp"; } @@ -148,6 +150,7 @@ $architecture = mips64 if ($data =~ /ARCH_MIPS64/); $architecture = alpha if ($data =~ /ARCH_ALPHA/); $architecture = sparc if ($data =~ /ARCH_SPARC/); $architecture = ia64 if ($data =~ /ARCH_IA64/); +$architecture = arm if ($data =~ /ARCH_ARM/); $binformat = bin32; $binformat = bin64 if ($data =~ /BINARY_64/); diff --git a/common.h b/common.h index d46a5230a..418ed25f5 100644 --- a/common.h +++ b/common.h @@ -314,6 +314,23 @@ typedef int blasint; #define YIELDING sched_yield() #endif +/*** +To alloc job_t on heap or statck. +please https://github.com/xianyi/OpenBLAS/issues/246 +***/ +#if defined(OS_WINDOWS) +#define GETRF_MEM_ALLOC_THRESHOLD 32 +#define BLAS3_MEM_ALLOC_THRESHOLD 32 +#endif + +#ifndef GETRF_MEM_ALLOC_THRESHOLD +#define GETRF_MEM_ALLOC_THRESHOLD 80 +#endif + +#ifndef BLAS3_MEM_ALLOC_THRESHOLD +#define BLAS3_MEM_ALLOC_THRESHOLD 160 +#endif + #ifdef QUAD_PRECISION #include "common_quad.h" #endif @@ -346,6 +363,10 @@ typedef int blasint; #include "common_mips64.h" #endif +#ifdef ARCH_ARM +#include "common_arm.h" +#endif + #ifdef OS_LINUX #include "common_linux.h" #endif diff --git a/common_arm.h b/common_arm.h new file mode 100644 index 000000000..1617b2e4c --- /dev/null +++ b/common_arm.h @@ -0,0 +1,167 @@ +/***************************************************************************** +Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the ISCAS nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#ifndef COMMON_ARM +#define COMMON_ARM + +#define MB +#define WMB + +#define INLINE inline + +#define RETURN_BY_COMPLEX + +#ifndef ASSEMBLER + +static void INLINE blas_lock(volatile unsigned long *address){ + +// long int ret, val = 1; +/* + do { + while (*address) {YIELDING;}; + + __asm__ __volatile__( + "1: ll %0, %3\n" + " ori %2, %0, 1\n" + " sc %2, %1\n" + " beqz %2, 1b\n" + " andi %2, %0, 1\n" + " sync\n" + : "=&r" (val), "=m" (address), "=&r" (ret) + : "m" (address) + : "memory"); + + } while (ret); +*/ +} + +static inline unsigned int rpcc(void){ + unsigned long ret=0; + + return ret; +} + +static inline int blas_quickdivide(blasint x, blasint y){ + return x / y; +} + +#if defined(DOUBLE) +#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory") +#else +#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory") +#endif + +#define GET_IMAGE_CANCEL + +#endif + + +#ifndef F_INTERFACE +#define REALNAME ASMNAME +#else +#define REALNAME ASMFNAME +#endif + +#if defined(ASSEMBLER) && !defined(NEEDPARAM) + +#define PROLOGUE \ + .text ;\ + .set arm ;\ + .align 5 ;\ + .globl REALNAME ;\ + .ent REALNAME ;\ + .type REALNAME, @function ;\ +REALNAME: + +#define EPILOGUE \ + .end REALNAME + +#define PROFCODE + +#endif + + +#define SEEK_ADDRESS + +#ifndef PAGESIZE +#define PAGESIZE ( 4 << 10) +#endif +#define HUGE_PAGESIZE ( 4 << 20) + +#define BUFFER_SIZE (16 << 20) + + +#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +#endif diff --git a/ctest.c b/ctest.c index 624cad159..184416339 100644 --- a/ctest.c +++ b/ctest.c @@ -1,3 +1,13 @@ +//LSB (Linux Standard Base) compiler +//only support lsbc++ +#if defined (__LSB_VERSION__) +#if !defined (__cplusplus) +COMPILER_LSB +#else +#error "OpenBLAS only supports lsbcc." +#endif +#endif + #if defined(__clang__) COMPILER_CLANG #endif @@ -114,3 +124,9 @@ ARCH_IA64 #if defined(__LP64) || defined(__LP64__) || defined(__ptr64) || defined(__x86_64__) || defined(__amd64__) || defined(__64BIT__) BINARY_64 #endif + +#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__) +ARCH_ARM +#endif + + diff --git a/getarch.c b/getarch.c index 3ffda6244..3264a76f6 100644 --- a/getarch.c +++ b/getarch.c @@ -679,6 +679,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "generic" #endif +#ifdef FORCE_ARMV7 +#define FORCE +#define ARCHITECTURE "ARM" +#define SUBARCHITECTURE "ARMV7" +#define SUBDIRNAME "arm" +#define ARCHCONFIG "-DARMV7 " \ + "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " +#define LIBNAME "armv7" +#define CORENAME "ARMV7" +#else +#endif + + #ifndef FORCE #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \ diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index f8152ac50..f543cd08d 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -14,6 +14,16 @@ ifeq ($(ARCH), MIPS) USE_GEMM3M = 1 endif +ifeq ($(ARCH), arm) +USE_TRMM = 1 +endif + +ifeq ($(TARGET), LOONGSON3B) +USE_TRMM = 1 +endif + + + SKERNELOBJS += \ sgemm_kernel$(TSUFFIX).$(SUFFIX) \ $(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \ @@ -498,7 +508,8 @@ $(KDIR)xgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMD $(KDIR)xgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND) $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $@ -ifeq ($(TARGET), LOONGSON3B) + +ifdef USE_TRMM $(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ @@ -582,24 +593,6 @@ $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) $(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ - -else - -ifdef STRMMKERNEL - -$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ - -$(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ - -$(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ - -$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ - - else $(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ @@ -613,79 +606,17 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ -endif - -ifdef DTRMMKERNEL - -ifdef DTRMMKERNEL_LN -$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL_LN) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ -else -$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ -endif - -ifdef DTRMMKERNEL_LT -$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL_LT) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ -else -$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ -endif - -ifdef DTRMMKERNEL_RN -$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL_RN) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ -else -$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ -endif - -ifdef DTRMMKERNEL_RT -$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL_RT) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ -else -$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ -endif - -else - -ifdef DTRMMKERNEL_LN -$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL_LN) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ -else $(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ -endif -ifdef DTRMMKERNEL_LT -$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL_LT) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ -else $(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ -endif -ifdef DTRMMKERNEL_RN -$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL_RN) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ -else $(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ -endif -ifdef DTRMMKERNEL_RT -$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL_RT) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ -else $(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ -endif - -endif - -ifdef QTRMMKERNEL $(KDIR)qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ @@ -699,50 +630,6 @@ $(KDIR)qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ -else - -$(KDIR)qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ - -$(KDIR)qtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ - -$(KDIR)qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ - -$(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ - -endif - -ifdef CTRMMKERNEL - -$(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ - -$(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ - -else - $(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ @@ -767,37 +654,6 @@ $(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ -endif - -ifdef ZTRMMKERNEL - -$(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ - -$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ - - -else - $(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ @@ -821,37 +677,10 @@ $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ - -endif endif -ifdef XTRMMKERNEL -$(KDIR)xtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ -$(KDIR)xtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)xtrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)xtrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ - -$(KDIR)xtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)xtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ - -$(KDIR)xtrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ - -$(KDIR)xtrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) - $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ - -else $(KDIR)xtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ @@ -877,9 +706,6 @@ $(KDIR)xtrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(KDIR)xtrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ -endif - - $(KDIR)cgemm3m_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM3MKERNEL) $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN $< -o $@ diff --git a/kernel/arm/KERNEL b/kernel/arm/KERNEL new file mode 100644 index 000000000..aeccfbf4c --- /dev/null +++ b/kernel/arm/KERNEL @@ -0,0 +1,46 @@ +ifndef SNRM2KERNEL +SNRM2KERNEL = nrm2.c +endif + +ifndef DNRM2KERNEL +DNRM2KERNEL = nrm2.c +endif + +ifndef CNRM2KERNEL +CNRM2KERNEL = znrm2.c +endif + +ifndef ZNRM2KERNEL +ZNRM2KERNEL = znrm2.c +endif + +ifndef SCABS_KERNEL +SCABS_KERNEL = ../generic/cabs.c +endif + +ifndef DCABS_KERNEL +DCABS_KERNEL = ../generic/cabs.c +endif + +ifndef QCABS_KERNEL +QCABS_KERNEL = ../generic/cabs.c +endif + +ifndef LSAME_KERNEL +LSAME_KERNEL = ../generic/lsame.c +endif + +ifndef SGEMM_BETA +SGEMM_BETA = ../generic/gemm_beta.c +endif +ifndef DGEMM_BETA +DGEMM_BETA = ../generic/gemm_beta.c +endif +ifndef CGEMM_BETA +CGEMM_BETA = ../generic/zgemm_beta.c +endif +ifndef ZGEMM_BETA +ZGEMM_BETA = ../generic/zgemm_beta.c +endif + + diff --git a/kernel/arm/KERNEL.ARMV7 b/kernel/arm/KERNEL.ARMV7 new file mode 100644 index 000000000..bf6db20ca --- /dev/null +++ b/kernel/arm/KERNEL.ARMV7 @@ -0,0 +1,137 @@ +SAMAXKERNEL = ../arm/amax.c +DAMAXKERNEL = ../arm/amax.c +CAMAXKERNEL = ../arm/zamax.c +ZAMAXKERNEL = ../arm/zamax.c + +SAMINKERNEL = ../arm/amin.c +DAMINKERNEL = ../arm/amin.c +CAMINKERNEL = ../arm/zamin.c +ZAMINKERNEL = ../arm/zamin.c + +SMAXKERNEL = ../arm/max.c +DMAXKERNEL = ../arm/max.c + +SMINKERNEL = ../arm/min.c +DMINKERNEL = ../arm/min.c + +ISAMAXKERNEL = ../arm/iamax.c +IDAMAXKERNEL = ../arm/iamax.c +ICAMAXKERNEL = ../arm/izamax.c +IZAMAXKERNEL = ../arm/izamax.c + +ISAMINKERNEL = ../arm/iamin.c +IDAMINKERNEL = ../arm/iamin.c +ICAMINKERNEL = ../arm/izamin.c +IZAMINKERNEL = ../arm/izamin.c + +ISMAXKERNEL = ../arm/imax.c +IDMAXKERNEL = ../arm/imax.c + +ISMINKERNEL = ../arm/imin.c +IDMINKERNEL = ../arm/imin.c + +SSWAPKERNEL = ../arm/swap.c +DSWAPKERNEL = ../arm/swap.c +CSWAPKERNEL = ../arm/zswap.c +ZSWAPKERNEL = ../arm/zswap.c + +SASUMKERNEL = ../arm/asum.c +DASUMKERNEL = ../arm/asum.c +CASUMKERNEL = ../arm/zasum.c +ZASUMKERNEL = ../arm/zasum.c + +SAXPYKERNEL = ../arm/axpy.c +DAXPYKERNEL = ../arm/axpy.c +CAXPYKERNEL = ../arm/zaxpy.c +ZAXPYKERNEL = ../arm/zaxpy.c + +SCOPYKERNEL = ../arm/copy.c +DCOPYKERNEL = ../arm/copy.c +CCOPYKERNEL = ../arm/zcopy.c +ZCOPYKERNEL = ../arm/zcopy.c + +SDOTKERNEL = ../arm/dot.c +DDOTKERNEL = ../arm/dot.c +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c + +SNRM2KERNEL = ../arm/nrm2.c +DNRM2KERNEL = ../arm/nrm2.c +CNRM2KERNEL = ../arm/znrm2.c +ZNRM2KERNEL = ../arm/znrm2.c + +SROTKERNEL = ../arm/rot.c +DROTKERNEL = ../arm/rot.c +CROTKERNEL = ../arm/zrot.c +ZROTKERNEL = ../arm/zrot.c + +SSCALKERNEL = ../arm/scal.c +DSCALKERNEL = ../arm/scal.c +CSCALKERNEL = ../arm/zscal.c +ZSCALKERNEL = ../arm/zscal.c + +SGEMVNKERNEL = gemv_n.c +DGEMVNKERNEL = gemv_n.c +CGEMVNKERNEL = zgemv_n.c +ZGEMVNKERNEL = zgemv_n.c + +SGEMVTKERNEL = gemv_t.c +DGEMVTKERNEL = gemv_t.c +CGEMVTKERNEL = zgemv_t.c +ZGEMVTKERNEL = zgemv_t.c + +STRMMKERNEL = ../generic/trmmkernel_2x2.c +DTRMMKERNEL = ../generic/trmmkernel_2x2.c +CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c + +SGEMMKERNEL = ../generic/gemmkernel_2x2.c +SGEMMONCOPY = ../generic/gemm_ncopy_2.c +SGEMMOTCOPY = ../generic/gemm_tcopy_2.c +SGEMMONCOPYOBJ = sgemm_oncopy.o +SGEMMOTCOPYOBJ = sgemm_otcopy.o + +DGEMMKERNEL = ../generic/gemmkernel_2x2.c +DGEMMONCOPY = ../generic/gemm_ncopy_2.c +DGEMMOTCOPY = ../generic/gemm_tcopy_2.c +DGEMMONCOPYOBJ = dgemm_oncopy.o +DGEMMOTCOPYOBJ = dgemm_otcopy.o + +CGEMMKERNEL = ../generic/zgemmkernel_2x2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPYOBJ = cgemm_oncopy.o +CGEMMOTCOPYOBJ = cgemm_otcopy.o + +ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPYOBJ = zgemm_oncopy.o +ZGEMMOTCOPYOBJ = zgemm_otcopy.o + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CGEMM3MKERNEL = zgemm3m_kernel_8x4_core2.S +ZGEMM3MKERNEL = zgemm3m_kernel_4x4_core2.S + + + + diff --git a/kernel/arm/Makefile b/kernel/arm/Makefile new file mode 100644 index 000000000..efae70d7b --- /dev/null +++ b/kernel/arm/Makefile @@ -0,0 +1,2 @@ +clean :: + diff --git a/kernel/arm/amax.c b/kernel/arm/amax.c new file mode 100644 index 000000000..922100054 --- /dev/null +++ b/kernel/arm/amax.c @@ -0,0 +1,65 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + + if (n < 0 || inc_x < 1 ) return(maxf); + + maxf=ABS(x[0]); + + while(i < n) + { + if( ABS(x[ix]) > ABS(maxf) ) + { + maxf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(maxf); +} + + diff --git a/kernel/arm/amin.c b/kernel/arm/amin.c new file mode 100644 index 000000000..51be1d276 --- /dev/null +++ b/kernel/arm/amin.c @@ -0,0 +1,65 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + + if (n < 0 || inc_x < 1 ) return(minf); + + minf=ABS(x[0]); + + while(i < n) + { + if( ABS(x[ix]) < ABS(minf) ) + { + minf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(minf); +} + + diff --git a/kernel/arm/asum.c b/kernel/arm/asum.c new file mode 100644 index 000000000..1f3b2334a --- /dev/null +++ b/kernel/arm/asum.c @@ -0,0 +1,59 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT sumf = 0.0; + if (n < 0 || inc_x < 1 ) return(sumf); + + n *= inc_x; + while(i < n) + { + sumf += ABS(x[i]); + i += inc_x; + } + return(sumf); +} + + diff --git a/kernel/arm/axpy.c b/kernel/arm/axpy.c new file mode 100644 index 000000000..76586190d --- /dev/null +++ b/kernel/arm/axpy.c @@ -0,0 +1,56 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix,iy; + + if ( n < 0 ) return(0); + if ( da == 0.0 ) return(0); + + ix = 0; + iy = 0; + + while(i < n) + { + + y[iy] += da * x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/arm/copy.c b/kernel/arm/copy.c new file mode 100644 index 000000000..2a1daabbc --- /dev/null +++ b/kernel/arm/copy.c @@ -0,0 +1,51 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + + if ( n < 0 ) return(0); + + while(i < n) + { + + y[iy] = x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/arm/dot.c b/kernel/arm/dot.c new file mode 100644 index 000000000..66d2b94a7 --- /dev/null +++ b/kernel/arm/dot.c @@ -0,0 +1,56 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +#if defined(DSDOT) +double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#else +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#endif +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + double dot = 0.0 ; + + if ( n < 0 ) return(dot); + + while(i < n) + { + + dot += y[iy] * x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(dot); + +} + + diff --git a/kernel/arm/gemv_n.c b/kernel/arm/gemv_n.c new file mode 100644 index 000000000..aa023ed58 --- /dev/null +++ b/kernel/arm/gemv_n.c @@ -0,0 +1,58 @@ +/************************************************************************************* + * * + * * Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) + * * + * * This program is free software: you can redistribute it and/or modify + * * it under the terms of the GNU General Public License as published by + * * the Free Software Foundation, either version 3 of the License, or + * * (at your option) any later version. + * * + * * This program is distributed in the hope that it will be useful, + * * but WITHOUT ANY WARRANTY; without even the implied warranty of + * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * * GNU General Public License for more details. + * * + * * You should have received a copy of the GNU General Public License + * * along with this program. If not, see http://www.gnu.org/licenses/. + * * + * **************************************************************************************/ + +/************************************************************************************** + * * 2013/09/14 Saar + * * BLASTEST float : OK + * * BLASTEST double : OK + * CTEST : OK + * TEST : OK + * * + * **************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +{ + BLASLONG i; + BLASLONG ix,iy; + BLASLONG j; + FLOAT *a_ptr; + FLOAT temp; + + ix = 0; + a_ptr = a; + + for (j=0; j + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + BLASLONG max=0; + + if (n < 0 || inc_x < 1 ) return(max); + + maxf=ABS(x[0]); + + while(i < n) + { + if( ABS(x[ix]) > ABS(maxf) ) + { + max = i; + maxf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(max+1); +} + + diff --git a/kernel/arm/iamin.c b/kernel/arm/iamin.c new file mode 100644 index 000000000..4f2f37b64 --- /dev/null +++ b/kernel/arm/iamin.c @@ -0,0 +1,67 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + BLASLONG min=0; + + if (n < 0 || inc_x < 1 ) return(min); + + minf=ABS(x[0]); + + while(i < n) + { + if( ABS(x[ix]) < ABS(minf) ) + { + min = i; + minf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(min+1); +} + + diff --git a/kernel/arm/imax.c b/kernel/arm/imax.c new file mode 100644 index 000000000..cc580eb7c --- /dev/null +++ b/kernel/arm/imax.c @@ -0,0 +1,58 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + BLASLONG max=0; + + if (n < 0 || inc_x < 1 ) return(max); + + maxf=x[0]; + + while(i < n) + { + if( x[ix] > maxf ) + { + max = i; + maxf = x[ix]; + } + ix += inc_x; + i++; + } + return(max+1); +} + + diff --git a/kernel/arm/imin.c b/kernel/arm/imin.c new file mode 100644 index 000000000..bea123040 --- /dev/null +++ b/kernel/arm/imin.c @@ -0,0 +1,56 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/08/19 Saar +* BLASTEST float +* BLASTEST double +* +**************************************************************************************/ + +#include "common.h" +#include + + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + BLASLONG min=0; + + if (n < 0 || inc_x < 1 ) return(min); + + minf=x[0]; + + while(i < n) + { + if( x[ix] > minf ) + { + min = i; + minf = x[ix]; + } + ix += inc_x; + i++; + } + return(min+1); +} + + diff --git a/kernel/arm/izamax.c b/kernel/arm/izamax.c new file mode 100644 index 000000000..f88043bb8 --- /dev/null +++ b/kernel/arm/izamax.c @@ -0,0 +1,73 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf[2]; + BLASLONG max=0; + BLASLONG inc_x2; + + if (n < 0 || inc_x < 1 ) return(max); + + inc_x2 = 2 * inc_x; + + maxf[0] = ABS(x[ix]); + maxf[1] = ABS(x[ix+1]); + + while(i < n) + { + if( CABS1(x,ix) > CABS1(maxf,0) ) + { + max = i; + maxf[0] = ABS(x[ix]); + maxf[1] = ABS(x[ix+1]); + } + ix += inc_x2; + i++; + } + return(max+1); +} + + diff --git a/kernel/arm/izamin.c b/kernel/arm/izamin.c new file mode 100644 index 000000000..cd99fa43e --- /dev/null +++ b/kernel/arm/izamin.c @@ -0,0 +1,73 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf[2]; + BLASLONG min=0; + BLASLONG inc_x2; + + if (n < 0 || inc_x < 1 ) return(min); + + inc_x2 = 2 * inc_x; + + minf[0] = ABS(x[ix]); + minf[1] = ABS(x[ix+1]); + + while(i < n) + { + if( CABS1(x,ix) < CABS1(minf,0) ) + { + min = i; + minf[0] = ABS(x[ix]); + minf[1] = ABS(x[ix+1]); + } + ix += inc_x2; + i++; + } + return(min+1); +} + + diff --git a/kernel/arm/max.c b/kernel/arm/max.c new file mode 100644 index 000000000..1f22ea1bf --- /dev/null +++ b/kernel/arm/max.c @@ -0,0 +1,55 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + + if (n < 0 || inc_x < 1 ) return(maxf); + + maxf=x[0]; + + while(i < n) + { + if( x[ix] > maxf ) + { + maxf = x[ix]; + } + ix += inc_x; + i++; + } + return(maxf); +} + + diff --git a/kernel/arm/min.c b/kernel/arm/min.c new file mode 100644 index 000000000..6d870e146 --- /dev/null +++ b/kernel/arm/min.c @@ -0,0 +1,55 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + + if (n < 0 || inc_x < 1 ) return(minf); + + minf=x[0]; + + while(i < n) + { + if( x[ix] < minf ) + { + minf = x[ix]; + } + ix += inc_x; + i++; + } + return(minf); +} + + diff --git a/kernel/arm/nrm2.c b/kernel/arm/nrm2.c new file mode 100644 index 000000000..277536508 --- /dev/null +++ b/kernel/arm/nrm2.c @@ -0,0 +1,80 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/13 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT scale = 0.0; + FLOAT ssq = 1.0; + FLOAT absxi = 0.0; + + + if (n < 0 || inc_x < 1 ) return(0.0); + if ( n == 1 ) return( ABS(x[0]) ); + + n *= inc_x; + while(i < n) + { + + if ( x[i] != 0.0 ) + { + absxi = ABS( x[i] ); + if ( scale < absxi ) + { + ssq = 1 + ssq * ( scale / absxi ) * ( scale / absxi ); + scale = absxi ; + } + else + { + ssq += ( absxi/scale ) * ( absxi/scale ); + } + + } + i += inc_x; + } + scale = scale * sqrt( ssq ); + return(scale); + +} + + diff --git a/kernel/arm/rot.c b/kernel/arm/rot.c new file mode 100644 index 000000000..edbec640d --- /dev/null +++ b/kernel/arm/rot.c @@ -0,0 +1,54 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp; + + if ( n <= 0 ) return(0); + + while(i < n) + { + temp = c*x[ix] + s*y[iy] ; + y[iy] = c*y[iy] - s*x[ix] ; + x[ix] = temp ; + + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/arm/scal.c b/kernel/arm/scal.c new file mode 100644 index 000000000..7ac7801e3 --- /dev/null +++ b/kernel/arm/scal.c @@ -0,0 +1,50 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + + if ( n < 0 || inc_x < 1 ) return(0); + if ( da == 1.0 ) return(0); + + n *= inc_x; + while(i < n) + { + + x[i] = da * x[i] ; + i += inc_x ; + + } + return(0); + +} + + diff --git a/kernel/arm/swap.c b/kernel/arm/swap.c new file mode 100644 index 000000000..518604a22 --- /dev/null +++ b/kernel/arm/swap.c @@ -0,0 +1,54 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/08/20 Saar +* BLASTEST float OK +* BLASTEST double OK +* +**************************************************************************************/ + +#include "common.h" +#include + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp; + + if ( n < 0 ) return(0); + + while(i < n) + { + + temp = x[ix] ; + x[ix] = y[iy] ; + y[iy] = temp ; + + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/arm/zamax.c b/kernel/arm/zamax.c new file mode 100644 index 000000000..c6c8c4d22 --- /dev/null +++ b/kernel/arm/zamax.c @@ -0,0 +1,73 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf[2]; + BLASLONG max=0; + BLASLONG inc_x2; + + if (n < 0 || inc_x < 1 ) return(0.0); + + inc_x2 = 2 * inc_x; + + maxf[0] = ABS(x[ix]); + maxf[1] = ABS(x[ix+1]); + + while(i < n) + { + if( CABS1(x,ix) > CABS1(maxf,0) ) + { + max = i; + maxf[0] = ABS(x[ix]); + maxf[1] = ABS(x[ix+1]); + } + ix += inc_x2; + i++; + } + return(CABS1(maxf,0)); +} + + diff --git a/kernel/arm/zamin.c b/kernel/arm/zamin.c new file mode 100644 index 000000000..9b76a03e0 --- /dev/null +++ b/kernel/arm/zamin.c @@ -0,0 +1,73 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf[2]; + BLASLONG min=0; + BLASLONG inc_x2; + + if (n < 0 || inc_x < 1 ) return(0.0); + + inc_x2 = 2 * inc_x; + + minf[0] = ABS(x[ix]); + minf[1] = ABS(x[ix+1]); + + while(i < n) + { + if( CABS1(x,ix) < CABS1(minf,0) ) + { + min = i; + minf[0] = ABS(x[ix]); + minf[1] = ABS(x[ix+1]); + } + ix += inc_x2; + i++; + } + return(CABS1(minf,0)); +} + + diff --git a/kernel/arm/zasum.c b/kernel/arm/zasum.c new file mode 100644 index 000000000..6b4941dea --- /dev/null +++ b/kernel/arm/zasum.c @@ -0,0 +1,63 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT sumf = 0.0; + BLASLONG inc_x2; + if (n < 0 || inc_x < 1 ) return(sumf); + + inc_x2 = 2 * inc_x; + + n *= inc_x2; + while(i < n) + { + sumf += CABS1(x,i); + i += inc_x2; + } + return(sumf); +} + + diff --git a/kernel/arm/zaxpy.c b/kernel/arm/zaxpy.c new file mode 100644 index 000000000..51886af3f --- /dev/null +++ b/kernel/arm/zaxpy.c @@ -0,0 +1,64 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/15 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix,iy; + + if ( n < 0 ) return(0); + if ( da_r == 0.0 && da_i == 0.0 ) return(0); + + ix = 0; + iy = 0; + + BLASLONG inc_x2 = 2 * inc_x; + BLASLONG inc_y2 = 2 * inc_y; + + while(i < n) + { +#if !defined(CONJ) + y[iy] += ( da_r * x[ix] - da_i * x[ix+1] ) ; + y[iy+1] += ( da_r * x[ix+1] + da_i * x[ix] ) ; +#else + y[iy] += ( da_r * x[ix] + da_i * x[ix+1] ) ; + y[iy+1] -= ( da_r * x[ix+1] - da_i * x[ix] ) ; +#endif + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/arm/zcopy.c b/kernel/arm/zcopy.c new file mode 100644 index 000000000..9961b28b3 --- /dev/null +++ b/kernel/arm/zcopy.c @@ -0,0 +1,55 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + + if ( n < 0 ) return(0); + + BLASLONG inc_x2 = 2 * inc_x; + BLASLONG inc_y2 = 2 * inc_y; + + while(i < n) + { + + y[iy] = x[ix] ; + y[iy+1] = x[ix+1] ; + ix += inc_x2; + iy += inc_y2; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/arm/zdot.c b/kernel/arm/zdot.c new file mode 100644 index 000000000..33f860ccc --- /dev/null +++ b/kernel/arm/zdot.c @@ -0,0 +1,70 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : FAIL +* BLASTEST double : FAIL +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" +#include + +FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT dot[2]; + FLOAT _Complex result; + + dot[0]=0.0; + dot[1]=0.0; + + __real__ result = 0.0 ; + __imag__ result = 0.0 ; + + if ( n < 0 ) return(result); + + BLASLONG inc_x2 = 2 * inc_x ; + BLASLONG inc_y2 = 2 * inc_y ; + + while(i < n) + { +#if !defined(CONJ) + dot[0] += ( x[ix] * y[iy] - x[ix+1] * y[iy+1] ) ; + dot[1] += ( x[ix+1] * y[iy] + x[ix] * y[iy+1] ) ; +#else + dot[0] += ( x[ix] * y[iy] + x[ix+1] * y[iy+1] ) ; + dot[1] -= ( x[ix+1] * y[iy] - x[ix] * y[iy+1] ) ; +#endif + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + __real__ result = dot[0]; + __imag__ result = dot[1]; + return(result); + +} + + diff --git a/kernel/arm/zgemv_n.c b/kernel/arm/zgemv_n.c new file mode 100644 index 000000000..3e2c39063 --- /dev/null +++ b/kernel/arm/zgemv_n.c @@ -0,0 +1,117 @@ +/************************************************************************************* + * * + * * Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) + * * + * * This program is free software: you can redistribute it and/or modify + * * it under the terms of the GNU General Public License as published by + * * the Free Software Foundation, either version 3 of the License, or + * * (at your option) any later version. + * * + * * This program is distributed in the hope that it will be useful, + * * but WITHOUT ANY WARRANTY; without even the implied warranty of + * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * * GNU General Public License for more details. + * * + * * You should have received a copy of the GNU General Public License + * * along with this program. If not, see http://www.gnu.org/licenses/. + * * + * **************************************************************************************/ + +/************************************************************************************** + * * 2013/09/15 Saar + * * BLASTEST float : OK + * * BLASTEST double : OK + * CTEST : OK + * TEST : OK + * * + * **************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +{ + BLASLONG i; + BLASLONG ix,iy; + BLASLONG j; + FLOAT *a_ptr; + FLOAT temp_r,temp_i; + BLASLONG inc_x2,inc_y2; + BLASLONG lda2; + BLASLONG i2; + + if( alpha_r == 0.0 && alpha_i == 0.0 ) return(0); + + lda2 = 2*lda; + + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; + + ix = 0; + a_ptr = a; + +#if !defined(CONJ) + for (j=0; j + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT scale = 0.0; + FLOAT ssq = 1.0; + BLASLONG inc_x2; + FLOAT temp; + + if (n < 0 || inc_x < 1 ) return(0.0); + + inc_x2 = 2 * inc_x; + + n *= inc_x2; + while(i < n) + { + + if ( x[i] != 0.0 ) + { + temp = ABS( x[i] ); + if ( scale < temp ) + { + ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); + scale = temp ; + } + else + { + ssq += ( temp / scale ) * ( temp / scale ); + } + + } + + if ( x[i+1] != 0.0 ) + { + temp = ABS( x[i+1] ); + if ( scale < temp ) + { + ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); + scale = temp ; + } + else + { + ssq += ( temp / scale ) * ( temp / scale ); + } + + } + + + i += inc_x2; + } + scale = scale * sqrt( ssq ); + return(scale); + +} + + diff --git a/kernel/arm/zrot.c b/kernel/arm/zrot.c new file mode 100644 index 000000000..6303c8f56 --- /dev/null +++ b/kernel/arm/zrot.c @@ -0,0 +1,60 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp[2]; + + if ( n <= 0 ) return(0); + + BLASLONG inc_x2 = 2 * inc_x ; + BLASLONG inc_y2 = 2 * inc_y ; + + while(i < n) + { + temp[0] = c*x[ix] + s*y[iy] ; + temp[1] = c*x[ix+1] + s*y[iy+1] ; + y[iy] = c*y[iy] - s*x[ix] ; + y[iy+1] = c*y[iy+1] - s*x[ix+1] ; + x[ix] = temp[0] ; + x[ix+1] = temp[1] ; + + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/arm/zscal.c b/kernel/arm/zscal.c new file mode 100644 index 000000000..ca9c0f7fe --- /dev/null +++ b/kernel/arm/zscal.c @@ -0,0 +1,56 @@ +/************************************************************************************* +* +* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see http://www.gnu.org/licenses/. +* +**************************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG inc_x2; + BLASLONG ip = 0; + FLOAT temp; + + if ( n < 0 || inc_x < 1 ) return(0); + + inc_x2 = 2 * inc_x; + for ( i=0; i + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dummy4, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp[2]; + + if ( n < 0 ) return(0); + + BLASLONG inc_x2 = 2 * inc_x; + BLASLONG inc_y2 = 2 * inc_y; + + while(i < n) + { + + temp[0] = x[ix] ; + temp[1] = x[ix+1] ; + x[ix] = y[iy] ; + x[ix+1] = y[iy+1] ; + y[iy] = temp[0] ; + y[iy+1] = temp[1] ; + + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + return(0); + +} + + diff --git a/param.h b/param.h index 0c3df6951..ac691b829 100644 --- a/param.h +++ b/param.h @@ -1793,6 +1793,50 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif + +#ifdef ARMV7 +#define SNUMOPT 2 +#define DNUMOPT 2 + +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN 0x03fffUL + +#define SGEMM_DEFAULT_UNROLL_M 2 +#define SGEMM_DEFAULT_UNROLL_N 2 + +#define DGEMM_DEFAULT_UNROLL_M 2 +#define DGEMM_DEFAULT_UNROLL_N 2 + +#define CGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_N 2 + +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 + +#define SGEMM_DEFAULT_P 64 +#define DGEMM_DEFAULT_P 24 +#define CGEMM_DEFAULT_P 24 +#define ZGEMM_DEFAULT_P 20 + +#define SGEMM_DEFAULT_Q 192 +#define DGEMM_DEFAULT_Q 128 +#define CGEMM_DEFAULT_Q 128 +#define ZGEMM_DEFAULT_Q 64 + +#define SGEMM_DEFAULT_R 512 +#define DGEMM_DEFAULT_R 512 +#define CGEMM_DEFAULT_R 512 +#define ZGEMM_DEFAULT_R 512 + +#define GEMM_OFFSET_A1 0x10000 +#define GEMM_OFFSET_B1 0x100000 + +#define SYMV_P 16 +#endif + + + #ifdef GENERIC #define SNUMOPT 2