common files modified for ARM
This commit is contained in:
parent
70411af888
commit
d13788d1b4
|
@ -12,7 +12,7 @@ VERSION = 0.2.8
|
||||||
|
|
||||||
# You can specify the target architecture, otherwise it's
|
# You can specify the target architecture, otherwise it's
|
||||||
# automatically detected.
|
# automatically detected.
|
||||||
# TARGET = PENRYN
|
TARGET = ARMV7
|
||||||
|
|
||||||
# If you want to support multiple architecture in one binary
|
# If you want to support multiple architecture in one binary
|
||||||
# DYNAMIC_ARCH = 1
|
# DYNAMIC_ARCH = 1
|
||||||
|
@ -25,20 +25,20 @@ VERSION = 0.2.8
|
||||||
# FC = gfortran
|
# FC = gfortran
|
||||||
|
|
||||||
# Even you can specify cross compiler. Meanwhile, please set HOSTCC.
|
# Even you can specify cross compiler. Meanwhile, please set HOSTCC.
|
||||||
# CC = x86_64-w64-mingw32-gcc
|
CC = arm-linux-gnueabihf-gcc
|
||||||
# FC = x86_64-w64-mingw32-gfortran
|
FC = arm-linux-gnueabihf-gfortran
|
||||||
|
|
||||||
# If you use the cross compiler, please set this host compiler.
|
# If you use the cross compiler, please set this host compiler.
|
||||||
# HOSTCC = gcc
|
HOSTCC = gcc
|
||||||
|
|
||||||
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
|
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
|
||||||
# BINARY=64
|
#BINARY=32
|
||||||
|
|
||||||
# About threaded BLAS. It will be automatically detected if you don't
|
# About threaded BLAS. It will be automatically detected if you don't
|
||||||
# specify it.
|
# specify it.
|
||||||
# For force setting for single threaded, specify USE_THREAD = 0
|
# For force setting for single threaded, specify USE_THREAD = 0
|
||||||
# For force setting for multi threaded, specify USE_THREAD = 1
|
# For force setting for multi threaded, specify USE_THREAD = 1
|
||||||
# USE_THREAD = 0
|
USE_THREAD = 0
|
||||||
|
|
||||||
# If you're going to use this library with OpenMP, please comment it in.
|
# If you're going to use this library with OpenMP, please comment it in.
|
||||||
# USE_OPENMP = 1
|
# USE_OPENMP = 1
|
||||||
|
@ -46,7 +46,7 @@ VERSION = 0.2.8
|
||||||
# You can define maximum number of threads. Basically it should be
|
# You can define maximum number of threads. Basically it should be
|
||||||
# less than actual number of cores. If you don't specify one, it's
|
# less than actual number of cores. If you don't specify one, it's
|
||||||
# automatically detected by the the script.
|
# automatically detected by the the script.
|
||||||
# NUM_THREADS = 24
|
NUM_THREADS = 4
|
||||||
|
|
||||||
# if you don't need generate the shared library, please comment it in.
|
# if you don't need generate the shared library, please comment it in.
|
||||||
# NO_SHARED = 1
|
# NO_SHARED = 1
|
||||||
|
@ -54,16 +54,12 @@ VERSION = 0.2.8
|
||||||
# If you don't need CBLAS interface, please comment it in.
|
# If you don't need CBLAS interface, please comment it in.
|
||||||
# NO_CBLAS = 1
|
# NO_CBLAS = 1
|
||||||
|
|
||||||
# If you only want CBLAS interface without installing Fortran compiler,
|
|
||||||
# please comment it in.
|
|
||||||
# ONLY_CBLAS = 1
|
|
||||||
|
|
||||||
# If you don't need LAPACK, please comment it in.
|
# If you don't need LAPACK, please comment it in.
|
||||||
# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1.
|
# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1.
|
||||||
# NO_LAPACK = 1
|
#NO_LAPACK = 1
|
||||||
|
|
||||||
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
|
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
|
||||||
# NO_LAPACKE = 1
|
#NO_LAPACKE = 1
|
||||||
|
|
||||||
# If you want to use legacy threaded Level 3 implementation.
|
# If you want to use legacy threaded Level 3 implementation.
|
||||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||||
|
@ -76,10 +72,10 @@ VERSION = 0.2.8
|
||||||
# Unfortunately most of kernel won't give us high quality buffer.
|
# Unfortunately most of kernel won't give us high quality buffer.
|
||||||
# BLAS tries to find the best region before entering main function,
|
# BLAS tries to find the best region before entering main function,
|
||||||
# but it will consume time. If you don't like it, you can disable one.
|
# but it will consume time. If you don't like it, you can disable one.
|
||||||
# NO_WARMUP = 1
|
NO_WARMUP = 1
|
||||||
|
|
||||||
# If you want to disable CPU/Memory affinity on Linux.
|
# If you want to disable CPU/Memory affinity on Linux.
|
||||||
# NO_AFFINITY = 1
|
NO_AFFINITY = 1
|
||||||
|
|
||||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||||
# and OS. However, the performance is low.
|
# and OS. However, the performance is low.
|
||||||
|
@ -127,13 +123,13 @@ VERSION = 0.2.8
|
||||||
|
|
||||||
# Common Optimization Flag;
|
# Common Optimization Flag;
|
||||||
# The default -O2 is enough.
|
# The default -O2 is enough.
|
||||||
# COMMON_OPT = -O2
|
COMMON_OPT = -O0 -marm -mfpu=vfpv3 -fno-omit-frame-pointer
|
||||||
|
|
||||||
# Profiling flags
|
# Profiling flags
|
||||||
COMMON_PROF = -pg
|
COMMON_PROF = -pg
|
||||||
|
|
||||||
# Build Debug version
|
# Build Debug version
|
||||||
# DEBUG = 1
|
DEBUG = 1
|
||||||
|
|
||||||
#
|
#
|
||||||
# End of user configuration
|
# End of user configuration
|
||||||
|
|
|
@ -82,19 +82,12 @@ ifeq ($(HOSTCC), loongcc)
|
||||||
GETARCH_FLAGS += -static
|
GETARCH_FLAGS += -static
|
||||||
endif
|
endif
|
||||||
|
|
||||||
#if don't use Fortran, it will only compile CBLAS.
|
|
||||||
ifeq ($(ONLY_CBLAS), 1)
|
|
||||||
NO_LAPACK = 1
|
|
||||||
else
|
|
||||||
ONLY_CBLAS = 0
|
|
||||||
endif
|
|
||||||
|
|
||||||
# This operation is expensive, so execution should be once.
|
# This operation is expensive, so execution should be once.
|
||||||
ifndef GOTOBLAS_MAKEFILE
|
ifndef GOTOBLAS_MAKEFILE
|
||||||
export GOTOBLAS_MAKEFILE = 1
|
export GOTOBLAS_MAKEFILE = 1
|
||||||
|
|
||||||
# Generating Makefile.conf and config.h
|
# Generating Makefile.conf and config.h
|
||||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all)
|
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) all)
|
||||||
|
|
||||||
ifndef TARGET_CORE
|
ifndef TARGET_CORE
|
||||||
include $(TOPDIR)/Makefile.conf
|
include $(TOPDIR)/Makefile.conf
|
||||||
|
@ -331,14 +324,16 @@ ifeq ($(ARCH), x86)
|
||||||
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
||||||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||||
ifneq ($(NO_AVX), 1)
|
ifneq ($(NO_AVX), 1)
|
||||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
|
DYNAMIC_CORE += SANDYBRIDGE
|
||||||
|
#BULLDOZER PILEDRIVER
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), x86_64)
|
ifeq ($(ARCH), x86_64)
|
||||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||||
ifneq ($(NO_AVX), 1)
|
ifneq ($(NO_AVX), 1)
|
||||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
|
DYNAMIC_CORE += SANDYBRIDGE
|
||||||
|
#BULLDOZER PILEDRIVER
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -368,6 +363,10 @@ NO_BINARY_MODE = 1
|
||||||
BINARY_DEFINED = 1
|
BINARY_DEFINED = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(ARCH), arm)
|
||||||
|
NO_BINARY_MODE = 1
|
||||||
|
BINARY_DEFINED = 1
|
||||||
|
endif
|
||||||
#
|
#
|
||||||
# C Compiler dependent settings
|
# C Compiler dependent settings
|
||||||
#
|
#
|
||||||
|
@ -892,23 +891,6 @@ LIBZIPNAME = $(LIBNAME:.$(LIBSUFFIX)=.zip)
|
||||||
LIBS = $(TOPDIR)/$(LIBNAME)
|
LIBS = $(TOPDIR)/$(LIBNAME)
|
||||||
LIBS_P = $(TOPDIR)/$(LIBNAME_P)
|
LIBS_P = $(TOPDIR)/$(LIBNAME_P)
|
||||||
|
|
||||||
|
|
||||||
LIB_COMPONENTS = BLAS
|
|
||||||
ifneq ($(NO_CBLAS), 1)
|
|
||||||
LIB_COMPONENTS += CBLAS
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(NO_LAPACK), 1)
|
|
||||||
LIB_COMPONENTS += LAPACK
|
|
||||||
ifneq ($(NO_LAPACKE), 1)
|
|
||||||
LIB_COMPONENTS += LAPACKE
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(ONLY_CBLAS), 1)
|
|
||||||
LIB_COMPONENTS = CBLAS
|
|
||||||
endif
|
|
||||||
|
|
||||||
export OSNAME
|
export OSNAME
|
||||||
export ARCH
|
export ARCH
|
||||||
export CORE
|
export CORE
|
||||||
|
@ -935,7 +917,6 @@ export USE_OPENMP
|
||||||
export CROSS
|
export CROSS
|
||||||
export CROSS_SUFFIX
|
export CROSS_SUFFIX
|
||||||
export NOFORTRAN
|
export NOFORTRAN
|
||||||
export NO_FBLAS
|
|
||||||
export EXTRALIB
|
export EXTRALIB
|
||||||
export CEXTRALIB
|
export CEXTRALIB
|
||||||
export FEXTRALIB
|
export FEXTRALIB
|
||||||
|
|
9
common.h
9
common.h
|
@ -363,6 +363,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
||||||
#include "common_mips64.h"
|
#include "common_mips64.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef ARCH_ARM
|
||||||
|
#include "common_arm.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef OS_LINUX
|
#ifdef OS_LINUX
|
||||||
#include "common_linux.h"
|
#include "common_linux.h"
|
||||||
#endif
|
#endif
|
||||||
|
@ -574,10 +578,9 @@ typedef struct {
|
||||||
#include "common_level2.h"
|
#include "common_level2.h"
|
||||||
#include "common_level3.h"
|
#include "common_level3.h"
|
||||||
#include "common_lapack.h"
|
#include "common_lapack.h"
|
||||||
|
|
||||||
#ifdef CBLAS
|
#ifdef CBLAS
|
||||||
# define OPENBLAS_CONST /* see comment in cblas.h */
|
/* This header file is generated from "cblas.h" (see Makefile.prebuild). */
|
||||||
# include "cblas.h"
|
#include "cblas_noconst.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
|
|
|
@ -0,0 +1,163 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||||
|
be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
/*********************************************************************/
|
||||||
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* All rights reserved. */
|
||||||
|
/* */
|
||||||
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
/* without modification, are permitted provided that the following */
|
||||||
|
/* conditions are met: */
|
||||||
|
/* */
|
||||||
|
/* 1. Redistributions of source code must retain the above */
|
||||||
|
/* copyright notice, this list of conditions and the following */
|
||||||
|
/* disclaimer. */
|
||||||
|
/* */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above */
|
||||||
|
/* copyright notice, this list of conditions and the following */
|
||||||
|
/* disclaimer in the documentation and/or other materials */
|
||||||
|
/* provided with the distribution. */
|
||||||
|
/* */
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||||
|
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||||
|
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||||
|
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||||
|
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||||
|
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||||
|
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||||
|
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||||
|
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||||
|
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||||
|
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||||
|
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/* */
|
||||||
|
/* The views and conclusions contained in the software and */
|
||||||
|
/* documentation are those of the authors and should not be */
|
||||||
|
/* interpreted as representing official policies, either expressed */
|
||||||
|
/* or implied, of The University of Texas at Austin. */
|
||||||
|
/*********************************************************************/
|
||||||
|
|
||||||
|
#ifndef COMMON_ARM
|
||||||
|
#define COMMON_ARM
|
||||||
|
|
||||||
|
#define MB
|
||||||
|
#define WMB
|
||||||
|
|
||||||
|
#define INLINE inline
|
||||||
|
|
||||||
|
#define RETURN_BY_COMPLEX
|
||||||
|
|
||||||
|
#ifndef ASSEMBLER
|
||||||
|
|
||||||
|
static void INLINE blas_lock(volatile unsigned long *address){
|
||||||
|
|
||||||
|
// long int ret, val = 1;
|
||||||
|
/*
|
||||||
|
do {
|
||||||
|
while (*address) {YIELDING;};
|
||||||
|
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"1: ll %0, %3\n"
|
||||||
|
" ori %2, %0, 1\n"
|
||||||
|
" sc %2, %1\n"
|
||||||
|
" beqz %2, 1b\n"
|
||||||
|
" andi %2, %0, 1\n"
|
||||||
|
" sync\n"
|
||||||
|
: "=&r" (val), "=m" (address), "=&r" (ret)
|
||||||
|
: "m" (address)
|
||||||
|
: "memory");
|
||||||
|
|
||||||
|
} while (ret);
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int rpcc(void){
|
||||||
|
unsigned long ret=0;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
|
return x / y;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(DOUBLE)
|
||||||
|
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
|
||||||
|
#else
|
||||||
|
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GET_IMAGE_CANCEL
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef F_INTERFACE
|
||||||
|
#define REALNAME ASMNAME
|
||||||
|
#else
|
||||||
|
#define REALNAME ASMFNAME
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||||
|
|
||||||
|
#define PROLOGUE \
|
||||||
|
.arm ;\
|
||||||
|
.global REALNAME ;\
|
||||||
|
.func REALNAME ;\
|
||||||
|
REALNAME:
|
||||||
|
|
||||||
|
#define EPILOGUE
|
||||||
|
|
||||||
|
#define PROFCODE
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#define SEEK_ADDRESS
|
||||||
|
|
||||||
|
#ifndef PAGESIZE
|
||||||
|
#define PAGESIZE ( 4 << 10)
|
||||||
|
#endif
|
||||||
|
#define HUGE_PAGESIZE ( 4 << 20)
|
||||||
|
|
||||||
|
#define BUFFER_SIZE (16 << 20)
|
||||||
|
|
||||||
|
|
||||||
|
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||||
|
|
||||||
|
#ifndef MAP_ANONYMOUS
|
||||||
|
#define MAP_ANONYMOUS MAP_ANON
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
6
ctest.c
6
ctest.c
|
@ -124,3 +124,9 @@ ARCH_IA64
|
||||||
#if defined(__LP64) || defined(__LP64__) || defined(__ptr64) || defined(__x86_64__) || defined(__amd64__) || defined(__64BIT__)
|
#if defined(__LP64) || defined(__LP64__) || defined(__ptr64) || defined(__x86_64__) || defined(__amd64__) || defined(__64BIT__)
|
||||||
BINARY_64
|
BINARY_64
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__)
|
||||||
|
ARCH_ARM
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
15
getarch.c
15
getarch.c
|
@ -679,6 +679,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define CORENAME "generic"
|
#define CORENAME "generic"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_ARMV7
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ARM"
|
||||||
|
#define SUBARCHITECTURE "ARMV7"
|
||||||
|
#define SUBDIRNAME "arm"
|
||||||
|
#define ARCHCONFIG "-DARMV7 " \
|
||||||
|
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||||
|
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||||
|
#define LIBNAME "armv7"
|
||||||
|
#define CORENAME "ARMV7"
|
||||||
|
#else
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifndef FORCE
|
#ifndef FORCE
|
||||||
|
|
||||||
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
|
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
|
||||||
|
|
43
param.h
43
param.h
|
@ -1793,6 +1793,49 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SYMV_P 16
|
#define SYMV_P 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef ARMV7
|
||||||
|
#define SNUMOPT 2
|
||||||
|
#define DNUMOPT 2
|
||||||
|
|
||||||
|
#define GEMM_DEFAULT_OFFSET_A 0
|
||||||
|
#define GEMM_DEFAULT_OFFSET_B 0
|
||||||
|
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_M 8
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_P 64
|
||||||
|
#define DGEMM_DEFAULT_P 128
|
||||||
|
#define CGEMM_DEFAULT_P 24
|
||||||
|
#define ZGEMM_DEFAULT_P 20
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_Q 192
|
||||||
|
#define DGEMM_DEFAULT_Q 128
|
||||||
|
#define CGEMM_DEFAULT_Q 128
|
||||||
|
#define ZGEMM_DEFAULT_Q 64
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_R 512
|
||||||
|
#define DGEMM_DEFAULT_R 2048
|
||||||
|
#define CGEMM_DEFAULT_R 512
|
||||||
|
#define ZGEMM_DEFAULT_R 512
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define SYMV_P 16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef GENERIC
|
#ifdef GENERIC
|
||||||
|
|
||||||
#define SNUMOPT 2
|
#define SNUMOPT 2
|
||||||
|
|
Loading…
Reference in New Issue