common files modified for ARM

This commit is contained in:
wernsaar 2013-09-28 19:10:32 +02:00
parent 70411af888
commit d13788d1b4
7 changed files with 255 additions and 48 deletions

View File

@ -12,7 +12,7 @@ VERSION = 0.2.8
# You can specify the target architecture, otherwise it's
# automatically detected.
# TARGET = PENRYN
TARGET = ARMV7
# If you want to support multiple architecture in one binary
# DYNAMIC_ARCH = 1
@ -25,20 +25,20 @@ VERSION = 0.2.8
# FC = gfortran
# Even you can specify cross compiler. Meanwhile, please set HOSTCC.
# CC = x86_64-w64-mingw32-gcc
# FC = x86_64-w64-mingw32-gfortran
CC = arm-linux-gnueabihf-gcc
FC = arm-linux-gnueabihf-gfortran
# If you use the cross compiler, please set this host compiler.
# HOSTCC = gcc
HOSTCC = gcc
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
# BINARY=64
#BINARY=32
# About threaded BLAS. It will be automatically detected if you don't
# specify it.
# For force setting for single threaded, specify USE_THREAD = 0
# For force setting for multi threaded, specify USE_THREAD = 1
# USE_THREAD = 0
USE_THREAD = 0
# If you're going to use this library with OpenMP, please comment it in.
# USE_OPENMP = 1
@ -46,7 +46,7 @@ VERSION = 0.2.8
# You can define maximum number of threads. Basically it should be
# less than actual number of cores. If you don't specify one, it's
# automatically detected by the the script.
# NUM_THREADS = 24
NUM_THREADS = 4
# if you don't need generate the shared library, please comment it in.
# NO_SHARED = 1
@ -54,16 +54,12 @@ VERSION = 0.2.8
# If you don't need CBLAS interface, please comment it in.
# NO_CBLAS = 1
# If you only want CBLAS interface without installing Fortran compiler,
# please comment it in.
# ONLY_CBLAS = 1
# If you don't need LAPACK, please comment it in.
# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1.
# NO_LAPACK = 1
#NO_LAPACK = 1
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
# NO_LAPACKE = 1
#NO_LAPACKE = 1
# If you want to use legacy threaded Level 3 implementation.
# USE_SIMPLE_THREADED_LEVEL3 = 1
@ -76,10 +72,10 @@ VERSION = 0.2.8
# Unfortunately most of kernel won't give us high quality buffer.
# BLAS tries to find the best region before entering main function,
# but it will consume time. If you don't like it, you can disable one.
# NO_WARMUP = 1
NO_WARMUP = 1
# If you want to disable CPU/Memory affinity on Linux.
# NO_AFFINITY = 1
NO_AFFINITY = 1
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
# and OS. However, the performance is low.
@ -127,13 +123,13 @@ VERSION = 0.2.8
# Common Optimization Flag;
# The default -O2 is enough.
# COMMON_OPT = -O2
COMMON_OPT = -O0 -marm -mfpu=vfpv3 -fno-omit-frame-pointer
# Profiling flags
COMMON_PROF = -pg
# Build Debug version
# DEBUG = 1
DEBUG = 1
#
# End of user configuration

View File

@ -82,19 +82,12 @@ ifeq ($(HOSTCC), loongcc)
GETARCH_FLAGS += -static
endif
#if don't use Fortran, it will only compile CBLAS.
ifeq ($(ONLY_CBLAS), 1)
NO_LAPACK = 1
else
ONLY_CBLAS = 0
endif
# This operation is expensive, so execution should be once.
ifndef GOTOBLAS_MAKEFILE
export GOTOBLAS_MAKEFILE = 1
# Generating Makefile.conf and config.h
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all)
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) all)
ifndef TARGET_CORE
include $(TOPDIR)/Makefile.conf
@ -331,14 +324,16 @@ ifeq ($(ARCH), x86)
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
DYNAMIC_CORE += SANDYBRIDGE
#BULLDOZER PILEDRIVER
endif
endif
ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
DYNAMIC_CORE += SANDYBRIDGE
#BULLDOZER PILEDRIVER
endif
endif
@ -368,6 +363,10 @@ NO_BINARY_MODE = 1
BINARY_DEFINED = 1
endif
ifeq ($(ARCH), arm)
NO_BINARY_MODE = 1
BINARY_DEFINED = 1
endif
#
# C Compiler dependent settings
#
@ -892,23 +891,6 @@ LIBZIPNAME = $(LIBNAME:.$(LIBSUFFIX)=.zip)
LIBS = $(TOPDIR)/$(LIBNAME)
LIBS_P = $(TOPDIR)/$(LIBNAME_P)
LIB_COMPONENTS = BLAS
ifneq ($(NO_CBLAS), 1)
LIB_COMPONENTS += CBLAS
endif
ifneq ($(NO_LAPACK), 1)
LIB_COMPONENTS += LAPACK
ifneq ($(NO_LAPACKE), 1)
LIB_COMPONENTS += LAPACKE
endif
endif
ifeq ($(ONLY_CBLAS), 1)
LIB_COMPONENTS = CBLAS
endif
export OSNAME
export ARCH
export CORE
@ -935,7 +917,6 @@ export USE_OPENMP
export CROSS
export CROSS_SUFFIX
export NOFORTRAN
export NO_FBLAS
export EXTRALIB
export CEXTRALIB
export FEXTRALIB

View File

@ -363,6 +363,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246
#include "common_mips64.h"
#endif
#ifdef ARCH_ARM
#include "common_arm.h"
#endif
#ifdef OS_LINUX
#include "common_linux.h"
#endif
@ -574,10 +578,9 @@ typedef struct {
#include "common_level2.h"
#include "common_level3.h"
#include "common_lapack.h"
#ifdef CBLAS
# define OPENBLAS_CONST /* see comment in cblas.h */
# include "cblas.h"
/* This header file is generated from "cblas.h" (see Makefile.prebuild). */
#include "cblas_noconst.h"
#endif
#ifndef ASSEMBLER

163
common_arm.h Normal file
View File

@ -0,0 +1,163 @@
/*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the ISCAS nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#ifndef COMMON_ARM
#define COMMON_ARM
#define MB
#define WMB
#define INLINE inline
#define RETURN_BY_COMPLEX
#ifndef ASSEMBLER
static void INLINE blas_lock(volatile unsigned long *address){
// long int ret, val = 1;
/*
do {
while (*address) {YIELDING;};
__asm__ __volatile__(
"1: ll %0, %3\n"
" ori %2, %0, 1\n"
" sc %2, %1\n"
" beqz %2, 1b\n"
" andi %2, %0, 1\n"
" sync\n"
: "=&r" (val), "=m" (address), "=&r" (ret)
: "m" (address)
: "memory");
} while (ret);
*/
}
static inline unsigned int rpcc(void){
unsigned long ret=0;
return ret;
}
static inline int blas_quickdivide(blasint x, blasint y){
return x / y;
}
#if defined(DOUBLE)
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
#else
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
#endif
#define GET_IMAGE_CANCEL
#endif
#ifndef F_INTERFACE
#define REALNAME ASMNAME
#else
#define REALNAME ASMFNAME
#endif
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
#define PROLOGUE \
.arm ;\
.global REALNAME ;\
.func REALNAME ;\
REALNAME:
#define EPILOGUE
#define PROFCODE
#endif
#define SEEK_ADDRESS
#ifndef PAGESIZE
#define PAGESIZE ( 4 << 10)
#endif
#define HUGE_PAGESIZE ( 4 << 20)
#define BUFFER_SIZE (16 << 20)
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif

View File

@ -124,3 +124,9 @@ ARCH_IA64
#if defined(__LP64) || defined(__LP64__) || defined(__ptr64) || defined(__x86_64__) || defined(__amd64__) || defined(__64BIT__)
BINARY_64
#endif
#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__)
ARCH_ARM
#endif

View File

@ -679,6 +679,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "generic"
#endif
#ifdef FORCE_ARMV7
#define FORCE
#define ARCHITECTURE "ARM"
#define SUBARCHITECTURE "ARMV7"
#define SUBDIRNAME "arm"
#define ARCHCONFIG "-DARMV7 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
#define LIBNAME "armv7"
#define CORENAME "ARMV7"
#else
#endif
#ifndef FORCE
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \

43
param.h
View File

@ -1793,6 +1793,49 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 16
#endif
#ifdef ARMV7
#define SNUMOPT 2
#define DNUMOPT 2
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 2
#define SGEMM_DEFAULT_UNROLL_N 2
#define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define SGEMM_DEFAULT_P 64
#define DGEMM_DEFAULT_P 128
#define CGEMM_DEFAULT_P 24
#define ZGEMM_DEFAULT_P 20
#define SGEMM_DEFAULT_Q 192
#define DGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 128
#define ZGEMM_DEFAULT_Q 64
#define SGEMM_DEFAULT_R 512
#define DGEMM_DEFAULT_R 2048
#define CGEMM_DEFAULT_R 512
#define ZGEMM_DEFAULT_R 512
#define SYMV_P 16
#endif
#ifdef GENERIC
#define SNUMOPT 2