Initial checkin of port for ARM

This commit is contained in:
wernsaar 2013-09-16 14:41:37 +02:00
parent 333c1b8431
commit 5729088994
44 changed files with 2671 additions and 196 deletions

3
Makefile.arm Normal file
View File

@ -0,0 +1,3 @@
ifdef BINARY64
else
endif

View File

@ -77,6 +77,11 @@ ifndef NO_PARALLEL_MAKE
NO_PARALLEL_MAKE=0
endif
GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE)
ifeq ($(HOSTCC), loongcc)
GETARCH_FLAGS += -static
endif
# This operation is expensive, so execution should be once.
ifndef GOTOBLAS_MAKEFILE
export GOTOBLAS_MAKEFILE = 1
@ -184,8 +189,15 @@ ifeq ($(GCCMINORVERSIONGTEQ7), 1)
CCOMMON_OPT += -DMS_ABI
endif
endif
endif
# Ensure the correct stack alignment on Win32
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
ifeq ($(ARCH), x86)
CCOMMON_OPT += -mincoming-stack-boundary=2
FCOMMON_OPT += -mincoming-stack-boundary=2
endif
endif
ifeq ($(OSNAME), Interix)
@ -240,7 +252,8 @@ NO_BINARY_MODE = 1
endif
ifndef NO_EXPRECISION
ifeq ($(F_COMPILER), GFORTRAN)
ifeq ($(C_COMPILER), GCC)
# ifeq logical or. GCC or LSB
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
EXPRECISION = 1
CCOMMON_OPT += -DEXPRECISION -m128bit-long-double
FCOMMON_OPT += -m128bit-long-double
@ -257,7 +270,8 @@ endif
ifeq ($(ARCH), x86_64)
ifndef NO_EXPRECISION
ifeq ($(F_COMPILER), GFORTRAN)
ifeq ($(C_COMPILER), GCC)
# ifeq logical or. GCC or LSB
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
EXPRECISION = 1
CCOMMON_OPT += -DEXPRECISION -m128bit-long-double
FCOMMON_OPT += -m128bit-long-double
@ -276,7 +290,8 @@ CCOMMON_OPT += -wd981
endif
ifeq ($(USE_OPENMP), 1)
ifeq ($(C_COMPILER), GCC)
# ifeq logical or. GCC or LSB
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
CCOMMON_OPT += -fopenmp
endif
@ -309,14 +324,16 @@ ifeq ($(ARCH), x86)
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
DYNAMIC_CORE += SANDYBRIDGE
#BULLDOZER PILEDRIVER
endif
endif
ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
DYNAMIC_CORE += SANDYBRIDGE
#BULLDOZER PILEDRIVER
endif
endif
@ -346,19 +363,24 @@ NO_BINARY_MODE = 1
BINARY_DEFINED = 1
endif
ifeq ($(ARCH), arm)
NO_BINARY_MODE = 1
BINARY_DEFINED = 1
endif
#
# C Compiler dependent settings
#
# ifeq logical or. GCC or CLANG
# ifeq logical or. GCC or CLANG or LSB
# http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG))
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG LSB))
CCOMMON_OPT += -Wall
COMMON_PROF += -fno-inline
NO_UNINITIALIZED_WARN = -Wno-uninitialized
ifeq ($(QUIET_MAKE), 1)
CCOMMON_OPT += $(NO_UNINITIALIZED_WARN)
CCOMMON_OPT += $(NO_UNINITIALIZED_WARN) -Wno-unused
endif
ifdef NO_BINARY_MODE
@ -445,9 +467,12 @@ endif
ifeq ($(F_COMPILER), GFORTRAN)
CCOMMON_OPT += -DF_INTERFACE_GFORT
FCOMMON_OPT += -Wall
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
ifneq ($(NO_LAPACK), 1)
ifneq ($(C_COMPILER), LSB)
EXTRALIB += -lgfortran
endif
endif
ifdef NO_BINARY_MODE
ifeq ($(ARCH), mips64)
ifdef BINARY64
@ -554,11 +579,28 @@ ifdef INTERFACE64
FCOMMON_OPT += -i8
endif
endif
ifeq ($(ARCH), mips64)
ifndef BINARY64
FCOMMON_OPT += -n32
else
FCOMMON_OPT += -n64
endif
ifeq ($(CORE), LOONGSON3A)
FCOMMON_OPT += -loongson3 -static
endif
ifeq ($(CORE), LOONGSON3B)
FCOMMON_OPT += -loongson3 -static
endif
else
ifndef BINARY64
FCOMMON_OPT += -m32
else
FCOMMON_OPT += -m64
endif
endif
ifdef USE_OPENMP
FEXTRALIB += -lstdc++
@ -567,12 +609,30 @@ endif
endif
ifeq ($(C_COMPILER), OPEN64)
ifeq ($(ARCH), mips64)
ifndef BINARY64
CCOMMON_OPT += -n32
else
CCOMMON_OPT += -n64
endif
ifeq ($(CORE), LOONGSON3A)
CCOMMON_OPT += -loongson3 -static
endif
ifeq ($(CORE), LOONGSON3B)
CCOMMON_OPT += -loongson3 -static
endif
else
ifndef BINARY64
CCOMMON_OPT += -m32
else
CCOMMON_OPT += -m64
endif
endif
endif
ifeq ($(C_COMPILER), SUN)
CCOMMON_OPT += -w
@ -781,6 +841,15 @@ override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
#MAKEOVERRIDES =
LAPACK_CFLAGS = $(CFLAGS)
LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H
ifdef INTERFACE64
LAPACK_CFLAGS += -DLAPACK_ILP64
endif
ifeq ($(C_COMPILER), LSB)
LAPACK_CFLAGS += -DLAPACK_COMPLEX_STRUCTURE
endif
ifndef SUFFIX
SUFFIX = o
endif
@ -832,6 +901,7 @@ export CC
export FC
export BU
export FU
export NEED2UNDERSCORES
export USE_THREAD
export NUM_THREADS
export NUM_CORES

View File

@ -33,6 +33,7 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) {
}
$compiler = "";
$compiler = LSB if ($data =~ /COMPILER_LSB/);
$compiler = CLANG if ($data =~ /COMPILER_CLANG/);
$compiler = PGI if ($data =~ /COMPILER_PGI/);
$compiler = PATHSCALE if ($data =~ /COMPILER_PATHSCALE/);
@ -62,6 +63,7 @@ $architecture = mips64 if ($data =~ /ARCH_MIPS64/);
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
$architecture = sparc if ($data =~ /ARCH_SPARC/);
$architecture = ia64 if ($data =~ /ARCH_IA64/);
$architecture = arm if ($data =~ /ARCH_ARM/);
$defined = 0;
@ -122,7 +124,7 @@ if ($compiler eq "CLANG") {
$openmp = "-fopenmp";
}
if ($compiler eq "GCC") {
if ($compiler eq "GCC" || $compiler eq "LSB") {
$openmp = "-fopenmp";
}
@ -148,6 +150,7 @@ $architecture = mips64 if ($data =~ /ARCH_MIPS64/);
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
$architecture = sparc if ($data =~ /ARCH_SPARC/);
$architecture = ia64 if ($data =~ /ARCH_IA64/);
$architecture = arm if ($data =~ /ARCH_ARM/);
$binformat = bin32;
$binformat = bin64 if ($data =~ /BINARY_64/);

View File

@ -314,6 +314,23 @@ typedef int blasint;
#define YIELDING sched_yield()
#endif
/***
To alloc job_t on heap or statck.
please https://github.com/xianyi/OpenBLAS/issues/246
***/
#if defined(OS_WINDOWS)
#define GETRF_MEM_ALLOC_THRESHOLD 32
#define BLAS3_MEM_ALLOC_THRESHOLD 32
#endif
#ifndef GETRF_MEM_ALLOC_THRESHOLD
#define GETRF_MEM_ALLOC_THRESHOLD 80
#endif
#ifndef BLAS3_MEM_ALLOC_THRESHOLD
#define BLAS3_MEM_ALLOC_THRESHOLD 160
#endif
#ifdef QUAD_PRECISION
#include "common_quad.h"
#endif
@ -346,6 +363,10 @@ typedef int blasint;
#include "common_mips64.h"
#endif
#ifdef ARCH_ARM
#include "common_arm.h"
#endif
#ifdef OS_LINUX
#include "common_linux.h"
#endif

167
common_arm.h Normal file
View File

@ -0,0 +1,167 @@
/*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the ISCAS nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#ifndef COMMON_ARM
#define COMMON_ARM
#define MB
#define WMB
#define INLINE inline
#define RETURN_BY_COMPLEX
#ifndef ASSEMBLER
static void INLINE blas_lock(volatile unsigned long *address){
// long int ret, val = 1;
/*
do {
while (*address) {YIELDING;};
__asm__ __volatile__(
"1: ll %0, %3\n"
" ori %2, %0, 1\n"
" sc %2, %1\n"
" beqz %2, 1b\n"
" andi %2, %0, 1\n"
" sync\n"
: "=&r" (val), "=m" (address), "=&r" (ret)
: "m" (address)
: "memory");
} while (ret);
*/
}
static inline unsigned int rpcc(void){
unsigned long ret=0;
return ret;
}
static inline int blas_quickdivide(blasint x, blasint y){
return x / y;
}
#if defined(DOUBLE)
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
#else
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
#endif
#define GET_IMAGE_CANCEL
#endif
#ifndef F_INTERFACE
#define REALNAME ASMNAME
#else
#define REALNAME ASMFNAME
#endif
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
#define PROLOGUE \
.text ;\
.set arm ;\
.align 5 ;\
.globl REALNAME ;\
.ent REALNAME ;\
.type REALNAME, @function ;\
REALNAME:
#define EPILOGUE \
.end REALNAME
#define PROFCODE
#endif
#define SEEK_ADDRESS
#ifndef PAGESIZE
#define PAGESIZE ( 4 << 10)
#endif
#define HUGE_PAGESIZE ( 4 << 20)
#define BUFFER_SIZE (16 << 20)
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif

16
ctest.c
View File

@ -1,3 +1,13 @@
//LSB (Linux Standard Base) compiler
//only support lsbc++
#if defined (__LSB_VERSION__)
#if !defined (__cplusplus)
COMPILER_LSB
#else
#error "OpenBLAS only supports lsbcc."
#endif
#endif
#if defined(__clang__)
COMPILER_CLANG
#endif
@ -114,3 +124,9 @@ ARCH_IA64
#if defined(__LP64) || defined(__LP64__) || defined(__ptr64) || defined(__x86_64__) || defined(__amd64__) || defined(__64BIT__)
BINARY_64
#endif
#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__)
ARCH_ARM
#endif

View File

@ -679,6 +679,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "generic"
#endif
#ifdef FORCE_ARMV7
#define FORCE
#define ARCHITECTURE "ARM"
#define SUBARCHITECTURE "ARMV7"
#define SUBDIRNAME "arm"
#define ARCHCONFIG "-DARMV7 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
#define LIBNAME "armv7"
#define CORENAME "ARMV7"
#else
#endif
#ifndef FORCE
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \

View File

@ -14,6 +14,16 @@ ifeq ($(ARCH), MIPS)
USE_GEMM3M = 1
endif
ifeq ($(ARCH), arm)
USE_TRMM = 1
endif
ifeq ($(TARGET), LOONGSON3B)
USE_TRMM = 1
endif
SKERNELOBJS += \
sgemm_kernel$(TSUFFIX).$(SUFFIX) \
$(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \
@ -498,7 +508,8 @@ $(KDIR)xgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMD
$(KDIR)xgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND)
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $@
ifeq ($(TARGET), LOONGSON3B)
ifdef USE_TRMM
$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
@ -582,24 +593,6 @@ $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
else
ifdef STRMMKERNEL
$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
$(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@
$(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@
else
$(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
@ -613,79 +606,17 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
$(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@
endif
ifdef DTRMMKERNEL
ifdef DTRMMKERNEL_LN
$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL_LN)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
else
$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
endif
ifdef DTRMMKERNEL_LT
$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL_LT)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@
else
$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@
endif
ifdef DTRMMKERNEL_RN
$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL_RN)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@
else
$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@
endif
ifdef DTRMMKERNEL_RT
$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL_RT)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@
else
$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@
endif
else
ifdef DTRMMKERNEL_LN
$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL_LN)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
else
$(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
endif
ifdef DTRMMKERNEL_LT
$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL_LT)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@
else
$(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@
endif
ifdef DTRMMKERNEL_RN
$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL_RN)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@
else
$(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@
endif
ifdef DTRMMKERNEL_RT
$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL_RT)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@
else
$(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@
endif
endif
ifdef QTRMMKERNEL
$(KDIR)qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
@ -699,50 +630,6 @@ $(KDIR)qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
$(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@
else
$(KDIR)qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@
$(KDIR)qtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@
$(KDIR)qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@
$(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@
endif
ifdef CTRMMKERNEL
$(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
$(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
$(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
$(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
$(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
$(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
$(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
$(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
else
$(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
@ -767,37 +654,6 @@ $(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL)
$(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
endif
ifdef ZTRMMKERNEL
$(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
$(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
$(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
$(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
$(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
$(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
$(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
else
$(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
@ -821,37 +677,10 @@ $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
endif
endif
ifdef XTRMMKERNEL
$(KDIR)xtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
$(KDIR)xtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
$(KDIR)xtrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
$(KDIR)xtrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
$(KDIR)xtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
$(KDIR)xtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
$(KDIR)xtrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
$(KDIR)xtrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
else
$(KDIR)xtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
@ -877,9 +706,6 @@ $(KDIR)xtrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL)
$(KDIR)xtrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL)
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
endif
$(KDIR)cgemm3m_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM3MKERNEL)
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN $< -o $@

46
kernel/arm/KERNEL Normal file
View File

@ -0,0 +1,46 @@
ifndef SNRM2KERNEL
SNRM2KERNEL = nrm2.c
endif
ifndef DNRM2KERNEL
DNRM2KERNEL = nrm2.c
endif
ifndef CNRM2KERNEL
CNRM2KERNEL = znrm2.c
endif
ifndef ZNRM2KERNEL
ZNRM2KERNEL = znrm2.c
endif
ifndef SCABS_KERNEL
SCABS_KERNEL = ../generic/cabs.c
endif
ifndef DCABS_KERNEL
DCABS_KERNEL = ../generic/cabs.c
endif
ifndef QCABS_KERNEL
QCABS_KERNEL = ../generic/cabs.c
endif
ifndef LSAME_KERNEL
LSAME_KERNEL = ../generic/lsame.c
endif
ifndef SGEMM_BETA
SGEMM_BETA = ../generic/gemm_beta.c
endif
ifndef DGEMM_BETA
DGEMM_BETA = ../generic/gemm_beta.c
endif
ifndef CGEMM_BETA
CGEMM_BETA = ../generic/zgemm_beta.c
endif
ifndef ZGEMM_BETA
ZGEMM_BETA = ../generic/zgemm_beta.c
endif

137
kernel/arm/KERNEL.ARMV7 Normal file
View File

@ -0,0 +1,137 @@
SAMAXKERNEL = ../arm/amax.c
DAMAXKERNEL = ../arm/amax.c
CAMAXKERNEL = ../arm/zamax.c
ZAMAXKERNEL = ../arm/zamax.c
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c
SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c
SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c
ISAMAXKERNEL = ../arm/iamax.c
IDAMAXKERNEL = ../arm/iamax.c
ICAMAXKERNEL = ../arm/izamax.c
IZAMAXKERNEL = ../arm/izamax.c
ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c
ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c
ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c
SSWAPKERNEL = ../arm/swap.c
DSWAPKERNEL = ../arm/swap.c
CSWAPKERNEL = ../arm/zswap.c
ZSWAPKERNEL = ../arm/zswap.c
SASUMKERNEL = ../arm/asum.c
DASUMKERNEL = ../arm/asum.c
CASUMKERNEL = ../arm/zasum.c
ZASUMKERNEL = ../arm/zasum.c
SAXPYKERNEL = ../arm/axpy.c
DAXPYKERNEL = ../arm/axpy.c
CAXPYKERNEL = ../arm/zaxpy.c
ZAXPYKERNEL = ../arm/zaxpy.c
SCOPYKERNEL = ../arm/copy.c
DCOPYKERNEL = ../arm/copy.c
CCOPYKERNEL = ../arm/zcopy.c
ZCOPYKERNEL = ../arm/zcopy.c
SDOTKERNEL = ../arm/dot.c
DDOTKERNEL = ../arm/dot.c
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
SNRM2KERNEL = ../arm/nrm2.c
DNRM2KERNEL = ../arm/nrm2.c
CNRM2KERNEL = ../arm/znrm2.c
ZNRM2KERNEL = ../arm/znrm2.c
SROTKERNEL = ../arm/rot.c
DROTKERNEL = ../arm/rot.c
CROTKERNEL = ../arm/zrot.c
ZROTKERNEL = ../arm/zrot.c
SSCALKERNEL = ../arm/scal.c
DSCALKERNEL = ../arm/scal.c
CSCALKERNEL = ../arm/zscal.c
ZSCALKERNEL = ../arm/zscal.c
SGEMVNKERNEL = gemv_n.c
DGEMVNKERNEL = gemv_n.c
CGEMVNKERNEL = zgemv_n.c
ZGEMVNKERNEL = zgemv_n.c
SGEMVTKERNEL = gemv_t.c
DGEMVTKERNEL = gemv_t.c
CGEMVTKERNEL = zgemv_t.c
ZGEMVTKERNEL = zgemv_t.c
STRMMKERNEL = ../generic/trmmkernel_2x2.c
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CGEMM3MKERNEL = zgemm3m_kernel_8x4_core2.S
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_core2.S

2
kernel/arm/Makefile Normal file
View File

@ -0,0 +1,2 @@
clean ::

65
kernel/arm/amax.c Normal file
View File

@ -0,0 +1,65 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : NoTest
* TEST : NoTest
*
**************************************************************************************/
#include "common.h"
#include <math.h>
#if defined(DOUBLE)
#define ABS fabs
#else
#define ABS fabsf
#endif
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
BLASLONG ix=0;
FLOAT maxf=0.0;
if (n < 0 || inc_x < 1 ) return(maxf);
maxf=ABS(x[0]);
while(i < n)
{
if( ABS(x[ix]) > ABS(maxf) )
{
maxf = ABS(x[ix]);
}
ix += inc_x;
i++;
}
return(maxf);
}

65
kernel/arm/amin.c Normal file
View File

@ -0,0 +1,65 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : NoTest
* TEST : NoTest
*
**************************************************************************************/
#include "common.h"
#include <math.h>
#if defined(DOUBLE)
#define ABS fabs
#else
#define ABS fabsf
#endif
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
BLASLONG ix=0;
FLOAT minf=0.0;
if (n < 0 || inc_x < 1 ) return(minf);
minf=ABS(x[0]);
while(i < n)
{
if( ABS(x[ix]) < ABS(minf) )
{
minf = ABS(x[ix]);
}
ix += inc_x;
i++;
}
return(minf);
}

59
kernel/arm/asum.c Normal file
View File

@ -0,0 +1,59 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
#include <math.h>
#if defined(DOUBLE)
#define ABS fabs
#else
#define ABS fabsf
#endif
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
FLOAT sumf = 0.0;
if (n < 0 || inc_x < 1 ) return(sumf);
n *= inc_x;
while(i < n)
{
sumf += ABS(x[i]);
i += inc_x;
}
return(sumf);
}

56
kernel/arm/axpy.c Normal file
View File

@ -0,0 +1,56 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
{
BLASLONG i=0;
BLASLONG ix,iy;
if ( n < 0 ) return(0);
if ( da == 0.0 ) return(0);
ix = 0;
iy = 0;
while(i < n)
{
y[iy] += da * x[ix] ;
ix += inc_x ;
iy += inc_y ;
i++ ;
}
return(0);
}

51
kernel/arm/copy.c Normal file
View File

@ -0,0 +1,51 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
if ( n < 0 ) return(0);
while(i < n)
{
y[iy] = x[ix] ;
ix += inc_x ;
iy += inc_y ;
i++ ;
}
return(0);
}

56
kernel/arm/dot.c Normal file
View File

@ -0,0 +1,56 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
#if defined(DSDOT)
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#else
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#endif
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
double dot = 0.0 ;
if ( n < 0 ) return(dot);
while(i < n)
{
dot += y[iy] * x[ix] ;
ix += inc_x ;
iy += inc_y ;
i++ ;
}
return(dot);
}

58
kernel/arm/gemv_n.c Normal file
View File

@ -0,0 +1,58 @@
/*************************************************************************************
* *
* * Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
* *
* * This program is free software: you can redistribute it and/or modify
* * it under the terms of the GNU General Public License as published by
* * the Free Software Foundation, either version 3 of the License, or
* * (at your option) any later version.
* *
* * This program is distributed in the hope that it will be useful,
* * but WITHOUT ANY WARRANTY; without even the implied warranty of
* * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* * GNU General Public License for more details.
* *
* * You should have received a copy of the GNU General Public License
* * along with this program. If not, see http://www.gnu.org/licenses/.
* *
* **************************************************************************************/
/**************************************************************************************
* * 2013/09/14 Saar
* * BLASTEST float : OK
* * BLASTEST double : OK
* CTEST : OK
* TEST : OK
* *
* **************************************************************************************/
#include "common.h"
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
{
BLASLONG i;
BLASLONG ix,iy;
BLASLONG j;
FLOAT *a_ptr;
FLOAT temp;
ix = 0;
a_ptr = a;
for (j=0; j<n; j++)
{
temp = alpha * x[ix];
iy = 0;
for (i=0; i<m; i++)
{
y[iy] += temp * a_ptr[i];
iy += inc_y;
}
a_ptr += lda;
ix += inc_x;
}
}

59
kernel/arm/gemv_t.c Normal file
View File

@ -0,0 +1,59 @@
/*************************************************************************************
* *
* * Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
* *
* * This program is free software: you can redistribute it and/or modify
* * it under the terms of the GNU General Public License as published by
* * the Free Software Foundation, either version 3 of the License, or
* * (at your option) any later version.
* *
* * This program is distributed in the hope that it will be useful,
* * but WITHOUT ANY WARRANTY; without even the implied warranty of
* * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* * GNU General Public License for more details.
* *
* * You should have received a copy of the GNU General Public License
* * along with this program. If not, see http://www.gnu.org/licenses/.
* *
* **************************************************************************************/
/**************************************************************************************
* * 2013/09/14 Saar
* * BLASTEST float : OK
* * BLASTEST double : OK
* CTEST : OK
* TEST : OK
* *
* **************************************************************************************/
#include "common.h"
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
{
BLASLONG i;
BLASLONG ix,iy;
BLASLONG j;
FLOAT *a_ptr;
FLOAT temp;
iy = 0;
a_ptr = a;
for (j=0; j<n; j++)
{
temp = 0.0;
ix = 0;
for (i=0; i<m; i++)
{
temp += a_ptr[i] * x[ix];
ix += inc_x;
}
y[iy] += alpha * temp;
iy += inc_y;
a_ptr += lda;
}
}

67
kernel/arm/iamax.c Normal file
View File

@ -0,0 +1,67 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : NoTest
* BLASTEST double : NoTest
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
#include <math.h>
#if defined(DOUBLE)
#define ABS fabs
#else
#define ABS fabsf
#endif
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
BLASLONG ix=0;
FLOAT maxf=0.0;
BLASLONG max=0;
if (n < 0 || inc_x < 1 ) return(max);
maxf=ABS(x[0]);
while(i < n)
{
if( ABS(x[ix]) > ABS(maxf) )
{
max = i;
maxf = ABS(x[ix]);
}
ix += inc_x;
i++;
}
return(max+1);
}

67
kernel/arm/iamin.c Normal file
View File

@ -0,0 +1,67 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : NoTest
* BLASTEST double : NoTest
* CTEST : NoTest
* TEST : NoTest
*
**************************************************************************************/
#include "common.h"
#include <math.h>
#if defined(DOUBLE)
#define ABS fabs
#else
#define ABS fabsf
#endif
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
BLASLONG ix=0;
FLOAT minf=0.0;
BLASLONG min=0;
if (n < 0 || inc_x < 1 ) return(min);
minf=ABS(x[0]);
while(i < n)
{
if( ABS(x[ix]) < ABS(minf) )
{
min = i;
minf = ABS(x[ix]);
}
ix += inc_x;
i++;
}
return(min+1);
}

58
kernel/arm/imax.c Normal file
View File

@ -0,0 +1,58 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : NoTest
* BLASTEST double : NoTest
* CTEST : NoTest
* TEST : NoTest
*
**************************************************************************************/
#include "common.h"
#include <math.h>
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
BLASLONG ix=0;
FLOAT maxf=0.0;
BLASLONG max=0;
if (n < 0 || inc_x < 1 ) return(max);
maxf=x[0];
while(i < n)
{
if( x[ix] > maxf )
{
max = i;
maxf = x[ix];
}
ix += inc_x;
i++;
}
return(max+1);
}

56
kernel/arm/imin.c Normal file
View File

@ -0,0 +1,56 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/08/19 Saar
* BLASTEST float
* BLASTEST double
*
**************************************************************************************/
#include "common.h"
#include <math.h>
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
BLASLONG ix=0;
FLOAT minf=0.0;
BLASLONG min=0;
if (n < 0 || inc_x < 1 ) return(min);
minf=x[0];
while(i < n)
{
if( x[ix] > minf )
{
min = i;
minf = x[ix];
}
ix += inc_x;
i++;
}
return(min+1);
}

73
kernel/arm/izamax.c Normal file
View File

@ -0,0 +1,73 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : NoTest
* BLASTEST double : NoTest
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
#include <math.h>
#if defined(DOUBLE)
#define ABS fabs
#else
#define ABS fabsf
#endif
#define CABS1(x,i) ABS(x[i])+ABS(x[i+1])
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
BLASLONG ix=0;
FLOAT maxf[2];
BLASLONG max=0;
BLASLONG inc_x2;
if (n < 0 || inc_x < 1 ) return(max);
inc_x2 = 2 * inc_x;
maxf[0] = ABS(x[ix]);
maxf[1] = ABS(x[ix+1]);
while(i < n)
{
if( CABS1(x,ix) > CABS1(maxf,0) )
{
max = i;
maxf[0] = ABS(x[ix]);
maxf[1] = ABS(x[ix+1]);
}
ix += inc_x2;
i++;
}
return(max+1);
}

73
kernel/arm/izamin.c Normal file
View File

@ -0,0 +1,73 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : NoTest
* BLASTEST double : NoTest
* CTEST : NoTest
* TEST : NoTest
*
**************************************************************************************/
#include "common.h"
#include <math.h>
#if defined(DOUBLE)
#define ABS fabs
#else
#define ABS fabsf
#endif
#define CABS1(x,i) ABS(x[i])+ABS(x[i+1])
BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
BLASLONG ix=0;
FLOAT minf[2];
BLASLONG min=0;
BLASLONG inc_x2;
if (n < 0 || inc_x < 1 ) return(min);
inc_x2 = 2 * inc_x;
minf[0] = ABS(x[ix]);
minf[1] = ABS(x[ix+1]);
while(i < n)
{
if( CABS1(x,ix) < CABS1(minf,0) )
{
min = i;
minf[0] = ABS(x[ix]);
minf[1] = ABS(x[ix+1]);
}
ix += inc_x2;
i++;
}
return(min+1);
}

55
kernel/arm/max.c Normal file
View File

@ -0,0 +1,55 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : NoTest
* BLASTEST double : NoTest
* CTEST : NoTest
* TEST : NoTest
*
**************************************************************************************/
#include "common.h"
#include <math.h>
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
BLASLONG ix=0;
FLOAT maxf=0.0;
if (n < 0 || inc_x < 1 ) return(maxf);
maxf=x[0];
while(i < n)
{
if( x[ix] > maxf )
{
maxf = x[ix];
}
ix += inc_x;
i++;
}
return(maxf);
}

55
kernel/arm/min.c Normal file
View File

@ -0,0 +1,55 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : NoTest
* BLASTEST double : NoTest
* CTEST : NoTest
* TEST : NoTest
*
**************************************************************************************/
#include "common.h"
#include <math.h>
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
BLASLONG ix=0;
FLOAT minf=0.0;
if (n < 0 || inc_x < 1 ) return(minf);
minf=x[0];
while(i < n)
{
if( x[ix] < minf )
{
minf = x[ix];
}
ix += inc_x;
i++;
}
return(minf);
}

80
kernel/arm/nrm2.c Normal file
View File

@ -0,0 +1,80 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/13 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
#include <math.h>
#if defined(DOUBLE)
#define ABS fabs
#else
#define ABS fabsf
#endif
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
FLOAT scale = 0.0;
FLOAT ssq = 1.0;
FLOAT absxi = 0.0;
if (n < 0 || inc_x < 1 ) return(0.0);
if ( n == 1 ) return( ABS(x[0]) );
n *= inc_x;
while(i < n)
{
if ( x[i] != 0.0 )
{
absxi = ABS( x[i] );
if ( scale < absxi )
{
ssq = 1 + ssq * ( scale / absxi ) * ( scale / absxi );
scale = absxi ;
}
else
{
ssq += ( absxi/scale ) * ( absxi/scale );
}
}
i += inc_x;
}
scale = scale * sqrt( ssq );
return(scale);
}

54
kernel/arm/rot.c Normal file
View File

@ -0,0 +1,54 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
FLOAT temp;
if ( n <= 0 ) return(0);
while(i < n)
{
temp = c*x[ix] + s*y[iy] ;
y[iy] = c*y[iy] - s*x[ix] ;
x[ix] = temp ;
ix += inc_x ;
iy += inc_y ;
i++ ;
}
return(0);
}

50
kernel/arm/scal.c Normal file
View File

@ -0,0 +1,50 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
{
BLASLONG i=0;
if ( n < 0 || inc_x < 1 ) return(0);
if ( da == 1.0 ) return(0);
n *= inc_x;
while(i < n)
{
x[i] = da * x[i] ;
i += inc_x ;
}
return(0);
}

54
kernel/arm/swap.c Normal file
View File

@ -0,0 +1,54 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/08/20 Saar
* BLASTEST float OK
* BLASTEST double OK
*
**************************************************************************************/
#include "common.h"
#include <stdio.h>
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
FLOAT temp;
if ( n < 0 ) return(0);
while(i < n)
{
temp = x[ix] ;
x[ix] = y[iy] ;
y[iy] = temp ;
ix += inc_x ;
iy += inc_y ;
i++ ;
}
return(0);
}

73
kernel/arm/zamax.c Normal file
View File

@ -0,0 +1,73 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : NoTest
* TEST : NoTest
*
**************************************************************************************/
#include "common.h"
#include <math.h>
#if defined(DOUBLE)
#define ABS fabs
#else
#define ABS fabsf
#endif
#define CABS1(x,i) ABS(x[i])+ABS(x[i+1])
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
BLASLONG ix=0;
FLOAT maxf[2];
BLASLONG max=0;
BLASLONG inc_x2;
if (n < 0 || inc_x < 1 ) return(0.0);
inc_x2 = 2 * inc_x;
maxf[0] = ABS(x[ix]);
maxf[1] = ABS(x[ix+1]);
while(i < n)
{
if( CABS1(x,ix) > CABS1(maxf,0) )
{
max = i;
maxf[0] = ABS(x[ix]);
maxf[1] = ABS(x[ix+1]);
}
ix += inc_x2;
i++;
}
return(CABS1(maxf,0));
}

73
kernel/arm/zamin.c Normal file
View File

@ -0,0 +1,73 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : NoTest
* TEST : NoTest
*
**************************************************************************************/
#include "common.h"
#include <math.h>
#if defined(DOUBLE)
#define ABS fabs
#else
#define ABS fabsf
#endif
#define CABS1(x,i) ABS(x[i])+ABS(x[i+1])
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
BLASLONG ix=0;
FLOAT minf[2];
BLASLONG min=0;
BLASLONG inc_x2;
if (n < 0 || inc_x < 1 ) return(0.0);
inc_x2 = 2 * inc_x;
minf[0] = ABS(x[ix]);
minf[1] = ABS(x[ix+1]);
while(i < n)
{
if( CABS1(x,ix) < CABS1(minf,0) )
{
min = i;
minf[0] = ABS(x[ix]);
minf[1] = ABS(x[ix+1]);
}
ix += inc_x2;
i++;
}
return(CABS1(minf,0));
}

63
kernel/arm/zasum.c Normal file
View File

@ -0,0 +1,63 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
#include <math.h>
#if defined(DOUBLE)
#define ABS fabs
#else
#define ABS fabsf
#endif
#define CABS1(x,i) ABS(x[i])+ABS(x[i+1])
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
FLOAT sumf = 0.0;
BLASLONG inc_x2;
if (n < 0 || inc_x < 1 ) return(sumf);
inc_x2 = 2 * inc_x;
n *= inc_x2;
while(i < n)
{
sumf += CABS1(x,i);
i += inc_x2;
}
return(sumf);
}

64
kernel/arm/zaxpy.c Normal file
View File

@ -0,0 +1,64 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/15 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
{
BLASLONG i=0;
BLASLONG ix,iy;
if ( n < 0 ) return(0);
if ( da_r == 0.0 && da_i == 0.0 ) return(0);
ix = 0;
iy = 0;
BLASLONG inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y;
while(i < n)
{
#if !defined(CONJ)
y[iy] += ( da_r * x[ix] - da_i * x[ix+1] ) ;
y[iy+1] += ( da_r * x[ix+1] + da_i * x[ix] ) ;
#else
y[iy] += ( da_r * x[ix] + da_i * x[ix+1] ) ;
y[iy+1] -= ( da_r * x[ix+1] - da_i * x[ix] ) ;
#endif
ix += inc_x2 ;
iy += inc_y2 ;
i++ ;
}
return(0);
}

55
kernel/arm/zcopy.c Normal file
View File

@ -0,0 +1,55 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
if ( n < 0 ) return(0);
BLASLONG inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y;
while(i < n)
{
y[iy] = x[ix] ;
y[iy+1] = x[ix+1] ;
ix += inc_x2;
iy += inc_y2;
i++ ;
}
return(0);
}

70
kernel/arm/zdot.c Normal file
View File

@ -0,0 +1,70 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : FAIL
* BLASTEST double : FAIL
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
#include <complex.h>
FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
FLOAT dot[2];
FLOAT _Complex result;
dot[0]=0.0;
dot[1]=0.0;
__real__ result = 0.0 ;
__imag__ result = 0.0 ;
if ( n < 0 ) return(result);
BLASLONG inc_x2 = 2 * inc_x ;
BLASLONG inc_y2 = 2 * inc_y ;
while(i < n)
{
#if !defined(CONJ)
dot[0] += ( x[ix] * y[iy] - x[ix+1] * y[iy+1] ) ;
dot[1] += ( x[ix+1] * y[iy] + x[ix] * y[iy+1] ) ;
#else
dot[0] += ( x[ix] * y[iy] + x[ix+1] * y[iy+1] ) ;
dot[1] -= ( x[ix+1] * y[iy] - x[ix] * y[iy+1] ) ;
#endif
ix += inc_x2 ;
iy += inc_y2 ;
i++ ;
}
__real__ result = dot[0];
__imag__ result = dot[1];
return(result);
}

117
kernel/arm/zgemv_n.c Normal file
View File

@ -0,0 +1,117 @@
/*************************************************************************************
* *
* * Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
* *
* * This program is free software: you can redistribute it and/or modify
* * it under the terms of the GNU General Public License as published by
* * the Free Software Foundation, either version 3 of the License, or
* * (at your option) any later version.
* *
* * This program is distributed in the hope that it will be useful,
* * but WITHOUT ANY WARRANTY; without even the implied warranty of
* * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* * GNU General Public License for more details.
* *
* * You should have received a copy of the GNU General Public License
* * along with this program. If not, see http://www.gnu.org/licenses/.
* *
* **************************************************************************************/
/**************************************************************************************
* * 2013/09/15 Saar
* * BLASTEST float : OK
* * BLASTEST double : OK
* CTEST : OK
* TEST : OK
* *
* **************************************************************************************/
#include "common.h"
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
{
BLASLONG i;
BLASLONG ix,iy;
BLASLONG j;
FLOAT *a_ptr;
FLOAT temp_r,temp_i;
BLASLONG inc_x2,inc_y2;
BLASLONG lda2;
BLASLONG i2;
if( alpha_r == 0.0 && alpha_i == 0.0 ) return(0);
lda2 = 2*lda;
inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y;
ix = 0;
a_ptr = a;
#if !defined(CONJ)
for (j=0; j<n; j++)
{
#if !defined(XCONJ)
temp_r = alpha_r * x[ix] - alpha_i * x[ix+1];
temp_i = alpha_r * x[ix+1] + alpha_i * x[ix];
#else
temp_r = alpha_r * x[ix] + alpha_i * x[ix+1];
temp_i = alpha_r * x[ix+1] - alpha_i * x[ix];
#endif
iy = 0;
for (i=0; i<m; i++)
{
i2 = 2*i;
#if !defined(XCONJ)
y[iy] += temp_r * a_ptr[i2] - temp_i * a_ptr[i2+1];
y[iy+1] += temp_r * a_ptr[i2+1] + temp_i * a_ptr[i2];
#else
y[iy] += temp_r * a_ptr[i2] + temp_i * a_ptr[i2+1];
y[iy+1] += temp_r * a_ptr[i2+1] - temp_i * a_ptr[i2];
#endif
iy += inc_y2;
}
a_ptr += lda2;
ix += inc_x2;
}
#else
for (j=0; j<n; j++)
{
#if !defined(XCONJ)
temp_r = alpha_r * x[ix] - alpha_i * x[ix+1];
temp_i = alpha_r * x[ix+1] + alpha_i * x[ix];
#else
temp_r = alpha_r * x[ix] + alpha_i * x[ix+1];
temp_i = alpha_r * x[ix+1] - alpha_i * x[ix];
#endif
iy = 0;
for (i=0; i<m; i++)
{
i2 = 2*i;
#if !defined(XCONJ)
y[iy] += temp_r * a_ptr[i2] + temp_i * a_ptr[i2+1];
y[iy+1] -= temp_r * a_ptr[i2+1] - temp_i * a_ptr[i2];
#else
y[iy] += temp_r * a_ptr[i2] - temp_i * a_ptr[i2+1];
y[iy+1] -= temp_r * a_ptr[i2+1] + temp_i * a_ptr[i2];
#endif
iy += inc_y2;
}
a_ptr += lda2;
ix += inc_x2;
}
#endif
return(0);
}

123
kernel/arm/zgemv_t.c Normal file
View File

@ -0,0 +1,123 @@
/*************************************************************************************
* *
* * Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
* *
* * This program is free software: you can redistribute it and/or modify
* * it under the terms of the GNU General Public License as published by
* * the Free Software Foundation, either version 3 of the License, or
* * (at your option) any later version.
* *
* * This program is distributed in the hope that it will be useful,
* * but WITHOUT ANY WARRANTY; without even the implied warranty of
* * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* * GNU General Public License for more details.
* *
* * You should have received a copy of the GNU General Public License
* * along with this program. If not, see http://www.gnu.org/licenses/.
* *
* **************************************************************************************/
/**************************************************************************************
* * 2013/09/15 Saar
* * BLASTEST float : OK
* * BLASTEST double : OK
* CTEST : OK
* TEST : OK
* *
* **************************************************************************************/
#include "common.h"
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
{
BLASLONG i;
BLASLONG ix,iy;
BLASLONG j;
FLOAT *a_ptr;
FLOAT temp_r,temp_i;
BLASLONG inc_x2,inc_y2;
BLASLONG lda2;
BLASLONG i2;
if( alpha_r == 0.0 && alpha_i == 0.0 ) return(0);
lda2 = 2*lda;
inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y;
iy = 0;
a_ptr = a;
#if !defined(CONJ)
for (j=0; j<n; j++)
{
temp_r = 0.0;
temp_i = 0.0;
ix = 0;
for (i=0; i<m; i++)
{
i2 = 2*i;
#if !defined(XCONJ)
temp_r += a_ptr[i2] * x[ix] - a_ptr[i2+1] * x[ix+1];
temp_i += a_ptr[i2] * x[ix+1] + a_ptr[i2+1] * x[ix];
#else
temp_r += a_ptr[i2] * x[ix] + a_ptr[i2+1] * x[ix+1];
temp_i += a_ptr[i2] * x[ix+1] - a_ptr[i2+1] * x[ix];
#endif
ix += inc_x2;
}
#if !defined(XCONJ)
y[iy] += alpha_r * temp_r - alpha_i * temp_i;
y[iy+1] += alpha_r * temp_i + alpha_i * temp_r;
#else
y[iy] += alpha_r * temp_r + alpha_i * temp_i;
y[iy+1] -= alpha_r * temp_i - alpha_i * temp_r;
#endif
a_ptr += lda2;
iy += inc_y2;
}
#else
for (j=0; j<n; j++)
{
temp_r = 0.0;
temp_i = 0.0;
ix = 0;
for (i=0; i<m; i++)
{
i2 = 2*i;
#if !defined(XCONJ)
temp_r += a_ptr[i2] * x[ix] + a_ptr[i2+1] * x[ix+1];
temp_i += a_ptr[i2] * x[ix+1] - a_ptr[i2+1] * x[ix];
#else
temp_r += a_ptr[i2] * x[ix] - a_ptr[i2+1] * x[ix+1];
temp_i += a_ptr[i2] * x[ix+1] + a_ptr[i2+1] * x[ix];
#endif
ix += inc_x2;
}
#if !defined(XCONJ)
y[iy] += alpha_r * temp_r - alpha_i * temp_i;
y[iy+1] += alpha_r * temp_i + alpha_i * temp_r;
#else
y[iy] += alpha_r * temp_r + alpha_i * temp_i;
y[iy+1] -= alpha_r * temp_i - alpha_i * temp_r;
#endif
a_ptr += lda2;
iy += inc_y2;
}
#endif
return(0);
}

98
kernel/arm/znrm2.c Normal file
View File

@ -0,0 +1,98 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/13 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
#include <math.h>
#if defined(DOUBLE)
#define ABS fabs
#else
#define ABS fabsf
#endif
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0;
FLOAT scale = 0.0;
FLOAT ssq = 1.0;
BLASLONG inc_x2;
FLOAT temp;
if (n < 0 || inc_x < 1 ) return(0.0);
inc_x2 = 2 * inc_x;
n *= inc_x2;
while(i < n)
{
if ( x[i] != 0.0 )
{
temp = ABS( x[i] );
if ( scale < temp )
{
ssq = 1 + ssq * ( scale / temp ) * ( scale / temp );
scale = temp ;
}
else
{
ssq += ( temp / scale ) * ( temp / scale );
}
}
if ( x[i+1] != 0.0 )
{
temp = ABS( x[i+1] );
if ( scale < temp )
{
ssq = 1 + ssq * ( scale / temp ) * ( scale / temp );
scale = temp ;
}
else
{
ssq += ( temp / scale ) * ( temp / scale );
}
}
i += inc_x2;
}
scale = scale * sqrt( ssq );
return(scale);
}

60
kernel/arm/zrot.c Normal file
View File

@ -0,0 +1,60 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
FLOAT temp[2];
if ( n <= 0 ) return(0);
BLASLONG inc_x2 = 2 * inc_x ;
BLASLONG inc_y2 = 2 * inc_y ;
while(i < n)
{
temp[0] = c*x[ix] + s*y[iy] ;
temp[1] = c*x[ix+1] + s*y[iy+1] ;
y[iy] = c*y[iy] - s*x[ix] ;
y[iy+1] = c*y[iy+1] - s*x[ix+1] ;
x[ix] = temp[0] ;
x[ix+1] = temp[1] ;
ix += inc_x2 ;
iy += inc_y2 ;
i++ ;
}
return(0);
}

56
kernel/arm/zscal.c Normal file
View File

@ -0,0 +1,56 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
{
BLASLONG i=0;
BLASLONG inc_x2;
BLASLONG ip = 0;
FLOAT temp;
if ( n < 0 || inc_x < 1 ) return(0);
inc_x2 = 2 * inc_x;
for ( i=0; i<n; i++ )
{
temp = da_r * x[ip] - da_i * x[ip+1] ;
x[ip+1] = da_r * x[ip+1] + da_i * x[ip] ;
x[ip] = temp;
ip += inc_x2;
}
return(0);
}

62
kernel/arm/zswap.c Normal file
View File

@ -0,0 +1,62 @@
/*************************************************************************************
*
* Copyright (C) 2012-2013 Werner Saar (wernsaar@googlemail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
**************************************************************************************/
/**************************************************************************************
* 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
*
**************************************************************************************/
#include "common.h"
#include <stdio.h>
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dummy4, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
FLOAT temp[2];
if ( n < 0 ) return(0);
BLASLONG inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y;
while(i < n)
{
temp[0] = x[ix] ;
temp[1] = x[ix+1] ;
x[ix] = y[iy] ;
x[ix+1] = y[iy+1] ;
y[iy] = temp[0] ;
y[iy+1] = temp[1] ;
ix += inc_x2 ;
iy += inc_y2 ;
i++ ;
}
return(0);
}

44
param.h
View File

@ -1793,6 +1793,50 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 16
#endif
#ifdef ARMV7
#define SNUMOPT 2
#define DNUMOPT 2
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 2
#define SGEMM_DEFAULT_UNROLL_N 2
#define DGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define SGEMM_DEFAULT_P 64
#define DGEMM_DEFAULT_P 24
#define CGEMM_DEFAULT_P 24
#define ZGEMM_DEFAULT_P 20
#define SGEMM_DEFAULT_Q 192
#define DGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 128
#define ZGEMM_DEFAULT_Q 64
#define SGEMM_DEFAULT_R 512
#define DGEMM_DEFAULT_R 512
#define CGEMM_DEFAULT_R 512
#define ZGEMM_DEFAULT_R 512
#define GEMM_OFFSET_A1 0x10000
#define GEMM_OFFSET_B1 0x100000
#define SYMV_P 16
#endif
#ifdef GENERIC
#define SNUMOPT 2