added cpu detection and target ARMV6, used in raspberry pi
This commit is contained in:
parent
bf04544902
commit
410afda9b4
|
@ -1,7 +1,12 @@
|
|||
|
||||
ifeq ($(CORE), ARMV7)
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV6)
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
endif
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,262 @@
|
|||
/**************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_ARMV6 1
|
||||
#define CPU_ARMV7 2
|
||||
#define CPU_CORTEXA15 3
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKOWN",
|
||||
"ARMV6",
|
||||
"ARMV7",
|
||||
"CORTEXA15"
|
||||
};
|
||||
|
||||
|
||||
int get_feature(char *search)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
FILE *infile;
|
||||
char buffer[2048], *p,*t;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("Features", buffer, 8))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
|
||||
if( p == NULL ) return;
|
||||
|
||||
t = strtok(p," ");
|
||||
while( t = strtok(NULL," "))
|
||||
{
|
||||
if (!strcmp(t, search)) { return(1); }
|
||||
}
|
||||
|
||||
#endif
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
int detect(void)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
|
||||
FILE *infile;
|
||||
char buffer[512], *p;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("model name", buffer, 10))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
if(p != NULL)
|
||||
{
|
||||
|
||||
if (strstr(p, "ARMv7"))
|
||||
{
|
||||
if ( get_feature("vfpv4"))
|
||||
return CPU_ARMV7;
|
||||
|
||||
if ( get_feature("vfpv3"))
|
||||
return CPU_ARMV7;
|
||||
|
||||
if ( get_feature("vfp"))
|
||||
return CPU_ARMV6;
|
||||
|
||||
|
||||
}
|
||||
|
||||
if (strstr(p, "ARMv6"))
|
||||
{
|
||||
if ( get_feature("vfp"))
|
||||
return CPU_ARMV6;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
return CPU_UNKNOWN;
|
||||
}
|
||||
|
||||
char *get_corename(void)
|
||||
{
|
||||
return cpuname[detect()];
|
||||
}
|
||||
|
||||
void get_architecture(void)
|
||||
{
|
||||
printf("ARM");
|
||||
}
|
||||
|
||||
void get_subarchitecture(void)
|
||||
{
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("ARMV7");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("ARMV6");
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("UNKNOWN");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void get_subdirname(void)
|
||||
{
|
||||
printf("arm");
|
||||
}
|
||||
|
||||
void get_cpuconfig(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("#define ARMV7\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define HAVE_VFPV3\n");
|
||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
||||
if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("#define ARMV6\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void get_libname(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("armv7\n");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("armv6\n");
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void get_features(void)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
FILE *infile;
|
||||
char buffer[2048], *p,*t;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("Features", buffer, 8))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
|
||||
if( p == NULL ) return;
|
||||
|
||||
t = strtok(p," ");
|
||||
while( t = strtok(NULL," "))
|
||||
{
|
||||
if (!strcmp(t, "vfp")) { printf("HAVE_VFP=1\n"); continue; }
|
||||
if (!strcmp(t, "vfpv3")) { printf("HAVE_VFPV3=1\n"); continue; }
|
||||
if (!strcmp(t, "vfpv4")) { printf("HAVE_VFPV4=1\n"); continue; }
|
||||
if (!strcmp(t, "neon")) { printf("HAVE_NEON=1\n"); continue; }
|
||||
}
|
||||
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
39
getarch.c
39
getarch.c
|
@ -687,23 +687,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ARCHCONFIG "-DARMV7 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DHAVE_VFPV3 -DHAVE_VFP"
|
||||
#define LIBNAME "armv7"
|
||||
#define CORENAME "ARMV7"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ARMV6
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM"
|
||||
#define SUBARCHITECTURE "ARMV6"
|
||||
#define SUBDIRNAME "arm"
|
||||
#define ARCHCONFIG "-DARMV6 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DHAVE_VFP"
|
||||
#define LIBNAME "armv6"
|
||||
#define CORENAME "ARMV6"
|
||||
#else
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#ifndef FORCE
|
||||
|
||||
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
|
||||
defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__)
|
||||
defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__)
|
||||
|
||||
#ifndef POWER
|
||||
#define POWER
|
||||
#endif
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__i386__) || (__x86_64__)
|
||||
#include "cpuid_x86.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
|
@ -734,12 +753,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef __arm__
|
||||
#include "cpuid_arm.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef OPENBLAS_SUPPORTED
|
||||
#error "This arch/CPU is not supported by OpenBLAS."
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#endif
|
||||
|
||||
static int get_num_cores(void) {
|
||||
|
@ -788,7 +811,7 @@ int main(int argc, char *argv[]){
|
|||
#ifdef FORCE
|
||||
printf("CORE=%s\n", CORENAME);
|
||||
#else
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__)
|
||||
printf("CORE=%s\n", get_corename());
|
||||
#endif
|
||||
#endif
|
||||
|
@ -803,6 +826,12 @@ int main(int argc, char *argv[]){
|
|||
|
||||
printf("NUM_CORES=%d\n", get_num_cores());
|
||||
|
||||
#if defined(__arm__) && !defined(FORCE)
|
||||
get_features();
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
#ifndef FORCE
|
||||
get_sse();
|
||||
|
|
|
@ -0,0 +1,134 @@
|
|||
SAMAXKERNEL = amax.c
|
||||
DAMAXKERNEL = amax.c
|
||||
CAMAXKERNEL = zamax.c
|
||||
ZAMAXKERNEL = zamax.c
|
||||
|
||||
SAMINKERNEL = amin.c
|
||||
DAMINKERNEL = amin.c
|
||||
CAMINKERNEL = zamin.c
|
||||
ZAMINKERNEL = zamin.c
|
||||
|
||||
SMAXKERNEL = max.c
|
||||
DMAXKERNEL = max.c
|
||||
|
||||
SMINKERNEL = min.c
|
||||
DMINKERNEL = min.c
|
||||
|
||||
ISAMAXKERNEL = iamax.c
|
||||
IDAMAXKERNEL = iamax.c
|
||||
ICAMAXKERNEL = izamax.c
|
||||
IZAMAXKERNEL = izamax.c
|
||||
|
||||
ISAMINKERNEL = iamin.c
|
||||
IDAMINKERNEL = iamin.c
|
||||
ICAMINKERNEL = izamin.c
|
||||
IZAMINKERNEL = izamin.c
|
||||
|
||||
ISMAXKERNEL = imax.c
|
||||
IDMAXKERNEL = imax.c
|
||||
|
||||
ISMINKERNEL = imin.c
|
||||
IDMINKERNEL = imin.c
|
||||
|
||||
SASUMKERNEL = asum.c
|
||||
DASUMKERNEL = asum.c
|
||||
CASUMKERNEL = zasum.c
|
||||
ZASUMKERNEL = zasum.c
|
||||
|
||||
SAXPYKERNEL = axpy.c
|
||||
DAXPYKERNEL = axpy.c
|
||||
CAXPYKERNEL = zaxpy.c
|
||||
ZAXPYKERNEL = zaxpy.c
|
||||
|
||||
SCOPYKERNEL = copy.c
|
||||
DCOPYKERNEL = copy.c
|
||||
CCOPYKERNEL = zcopy.c
|
||||
ZCOPYKERNEL = zcopy.c
|
||||
|
||||
SDOTKERNEL = dot.c
|
||||
DDOTKERNEL = dot.c
|
||||
CDOTKERNEL = zdot.c
|
||||
ZDOTKERNEL = zdot.c
|
||||
|
||||
SNRM2KERNEL = nrm2.c
|
||||
DNRM2KERNEL = nrm2.c
|
||||
CNRM2KERNEL = znrm2.c
|
||||
ZNRM2KERNEL = znrm2.c
|
||||
|
||||
SROTKERNEL = rot.c
|
||||
DROTKERNEL = rot.c
|
||||
CROTKERNEL = zrot.c
|
||||
ZROTKERNEL = zrot.c
|
||||
|
||||
SSCALKERNEL = scal.c
|
||||
DSCALKERNEL = scal.c
|
||||
CSCALKERNEL = zscal.c
|
||||
ZSCALKERNEL = zscal.c
|
||||
|
||||
SSWAPKERNEL = swap.c
|
||||
DSWAPKERNEL = swap.c
|
||||
CSWAPKERNEL = zswap.c
|
||||
ZSWAPKERNEL = zswap.c
|
||||
|
||||
SGEMVNKERNEL = gemv_n.c
|
||||
DGEMVNKERNEL = gemv_n.c
|
||||
CGEMVNKERNEL = zgemv_n.c
|
||||
ZGEMVNKERNEL = zgemv_n.c
|
||||
|
||||
SGEMVTKERNEL = gemv_t.c
|
||||
DGEMVTKERNEL = gemv_t.c
|
||||
CGEMVTKERNEL = zgemv_t.c
|
||||
ZGEMVTKERNEL = zgemv_t.c
|
||||
|
||||
STRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
|
||||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
|
||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
|
||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
|
||||
|
||||
|
40
param.h
40
param.h
|
@ -1831,6 +1831,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
|
||||
|
||||
#define SYMV_P 16
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(ARMV6)
|
||||
#define SNUMOPT 2
|
||||
#define DNUMOPT 2
|
||||
|
||||
#define GEMM_DEFAULT_OFFSET_A 0
|
||||
#define GEMM_DEFAULT_OFFSET_B 0
|
||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 2
|
||||
#define SGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define DGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define SGEMM_DEFAULT_P 128
|
||||
#define DGEMM_DEFAULT_P 128
|
||||
#define CGEMM_DEFAULT_P 96
|
||||
#define ZGEMM_DEFAULT_P 64
|
||||
|
||||
#define SGEMM_DEFAULT_Q 240
|
||||
#define DGEMM_DEFAULT_Q 120
|
||||
#define CGEMM_DEFAULT_Q 120
|
||||
#define ZGEMM_DEFAULT_Q 120
|
||||
|
||||
#define SGEMM_DEFAULT_R 12288
|
||||
#define DGEMM_DEFAULT_R 8192
|
||||
#define CGEMM_DEFAULT_R 4096
|
||||
#define ZGEMM_DEFAULT_R 4096
|
||||
|
||||
|
||||
#define SYMV_P 16
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in New Issue