added cpu detection and target ARMV6, used in raspberry pi

This commit is contained in:
wernsaar 2013-11-21 20:18:51 +01:00
parent bf04544902
commit 410afda9b4
5 changed files with 477 additions and 7 deletions

View File

@ -1,7 +1,12 @@
ifeq ($(CORE), ARMV7)
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
endif
ifeq ($(CORE), ARMV6)
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
endif

262
cpuid_arm.c Normal file
View File

@ -0,0 +1,262 @@
/**************************************************************************
Copyright (c) 2013, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <string.h>
#define CPU_UNKNOWN 0
#define CPU_ARMV6 1
#define CPU_ARMV7 2
#define CPU_CORTEXA15 3
static char *cpuname[] = {
"UNKOWN",
"ARMV6",
"ARMV7",
"CORTEXA15"
};
int get_feature(char *search)
{
#ifdef linux
FILE *infile;
char buffer[2048], *p,*t;
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile))
{
if (!strncmp("Features", buffer, 8))
{
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if( p == NULL ) return;
t = strtok(p," ");
while( t = strtok(NULL," "))
{
if (!strcmp(t, search)) { return(1); }
}
#endif
return(0);
}
int detect(void)
{
#ifdef linux
FILE *infile;
char buffer[512], *p;
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile))
{
if (!strncmp("model name", buffer, 10))
{
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if(p != NULL)
{
if (strstr(p, "ARMv7"))
{
if ( get_feature("vfpv4"))
return CPU_ARMV7;
if ( get_feature("vfpv3"))
return CPU_ARMV7;
if ( get_feature("vfp"))
return CPU_ARMV6;
}
if (strstr(p, "ARMv6"))
{
if ( get_feature("vfp"))
return CPU_ARMV6;
}
}
#endif
return CPU_UNKNOWN;
}
char *get_corename(void)
{
return cpuname[detect()];
}
void get_architecture(void)
{
printf("ARM");
}
void get_subarchitecture(void)
{
int d = detect();
switch (d)
{
case CPU_ARMV7:
printf("ARMV7");
break;
case CPU_ARMV6:
printf("ARMV6");
break;
default:
printf("UNKNOWN");
break;
}
}
void get_subdirname(void)
{
printf("arm");
}
void get_cpuconfig(void)
{
int d = detect();
switch (d)
{
case CPU_ARMV7:
printf("#define ARMV7\n");
printf("#define HAVE_VFP\n");
printf("#define HAVE_VFPV3\n");
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
break;
case CPU_ARMV6:
printf("#define ARMV6\n");
printf("#define HAVE_VFP\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
break;
}
}
void get_libname(void)
{
int d = detect();
switch (d)
{
case CPU_ARMV7:
printf("armv7\n");
break;
case CPU_ARMV6:
printf("armv6\n");
break;
}
}
void get_features(void)
{
#ifdef linux
FILE *infile;
char buffer[2048], *p,*t;
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile))
{
if (!strncmp("Features", buffer, 8))
{
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if( p == NULL ) return;
t = strtok(p," ");
while( t = strtok(NULL," "))
{
if (!strcmp(t, "vfp")) { printf("HAVE_VFP=1\n"); continue; }
if (!strcmp(t, "vfpv3")) { printf("HAVE_VFPV3=1\n"); continue; }
if (!strcmp(t, "vfpv4")) { printf("HAVE_VFPV4=1\n"); continue; }
if (!strcmp(t, "neon")) { printf("HAVE_NEON=1\n"); continue; }
}
#endif
return;
}

View File

@ -687,23 +687,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHCONFIG "-DARMV7 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
"-DHAVE_VFPV3 -DHAVE_VFP"
#define LIBNAME "armv7"
#define CORENAME "ARMV7"
#else
#endif
#ifdef FORCE_ARMV6
#define FORCE
#define ARCHITECTURE "ARM"
#define SUBARCHITECTURE "ARMV6"
#define SUBDIRNAME "arm"
#define ARCHCONFIG "-DARMV6 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
"-DHAVE_VFP"
#define LIBNAME "armv6"
#define CORENAME "ARMV6"
#else
#endif
#ifndef FORCE
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__)
defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__)
#ifndef POWER
#define POWER
#endif
#define OPENBLAS_SUPPORTED
#endif
#if defined(__i386__) || (__x86_64__)
#include "cpuid_x86.c"
#define OPENBLAS_SUPPORTED
@ -734,12 +753,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define OPENBLAS_SUPPORTED
#endif
#ifdef __arm__
#include "cpuid_arm.c"
#define OPENBLAS_SUPPORTED
#endif
#ifndef OPENBLAS_SUPPORTED
#error "This arch/CPU is not supported by OpenBLAS."
#endif
#else
#endif
static int get_num_cores(void) {
@ -788,7 +811,7 @@ int main(int argc, char *argv[]){
#ifdef FORCE
printf("CORE=%s\n", CORENAME);
#else
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__)
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__)
printf("CORE=%s\n", get_corename());
#endif
#endif
@ -803,6 +826,12 @@ int main(int argc, char *argv[]){
printf("NUM_CORES=%d\n", get_num_cores());
#if defined(__arm__) && !defined(FORCE)
get_features();
#endif
#if defined(__i386__) || defined(__x86_64__)
#ifndef FORCE
get_sse();

134
kernel/arm/KERNEL.ARMV6 Normal file
View File

@ -0,0 +1,134 @@
SAMAXKERNEL = amax.c
DAMAXKERNEL = amax.c
CAMAXKERNEL = zamax.c
ZAMAXKERNEL = zamax.c
SAMINKERNEL = amin.c
DAMINKERNEL = amin.c
CAMINKERNEL = zamin.c
ZAMINKERNEL = zamin.c
SMAXKERNEL = max.c
DMAXKERNEL = max.c
SMINKERNEL = min.c
DMINKERNEL = min.c
ISAMAXKERNEL = iamax.c
IDAMAXKERNEL = iamax.c
ICAMAXKERNEL = izamax.c
IZAMAXKERNEL = izamax.c
ISAMINKERNEL = iamin.c
IDAMINKERNEL = iamin.c
ICAMINKERNEL = izamin.c
IZAMINKERNEL = izamin.c
ISMAXKERNEL = imax.c
IDMAXKERNEL = imax.c
ISMINKERNEL = imin.c
IDMINKERNEL = imin.c
SASUMKERNEL = asum.c
DASUMKERNEL = asum.c
CASUMKERNEL = zasum.c
ZASUMKERNEL = zasum.c
SAXPYKERNEL = axpy.c
DAXPYKERNEL = axpy.c
CAXPYKERNEL = zaxpy.c
ZAXPYKERNEL = zaxpy.c
SCOPYKERNEL = copy.c
DCOPYKERNEL = copy.c
CCOPYKERNEL = zcopy.c
ZCOPYKERNEL = zcopy.c
SDOTKERNEL = dot.c
DDOTKERNEL = dot.c
CDOTKERNEL = zdot.c
ZDOTKERNEL = zdot.c
SNRM2KERNEL = nrm2.c
DNRM2KERNEL = nrm2.c
CNRM2KERNEL = znrm2.c
ZNRM2KERNEL = znrm2.c
SROTKERNEL = rot.c
DROTKERNEL = rot.c
CROTKERNEL = zrot.c
ZROTKERNEL = zrot.c
SSCALKERNEL = scal.c
DSCALKERNEL = scal.c
CSCALKERNEL = zscal.c
ZSCALKERNEL = zscal.c
SSWAPKERNEL = swap.c
DSWAPKERNEL = swap.c
CSWAPKERNEL = zswap.c
ZSWAPKERNEL = zswap.c
SGEMVNKERNEL = gemv_n.c
DGEMVNKERNEL = gemv_n.c
CGEMVNKERNEL = zgemv_n.c
ZGEMVNKERNEL = zgemv_n.c
SGEMVTKERNEL = gemv_t.c
DGEMVTKERNEL = gemv_t.c
CGEMVTKERNEL = zgemv_t.c
ZGEMVTKERNEL = zgemv_t.c
STRMMKERNEL = ../generic/trmmkernel_2x2.c
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

40
param.h
View File

@ -1831,6 +1831,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 16
#endif
#if defined(ARMV6)
#define SNUMOPT 2
#define DNUMOPT 2
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 2
#define SGEMM_DEFAULT_UNROLL_N 2
#define DGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define SGEMM_DEFAULT_P 128
#define DGEMM_DEFAULT_P 128
#define CGEMM_DEFAULT_P 96
#define ZGEMM_DEFAULT_P 64
#define SGEMM_DEFAULT_Q 240
#define DGEMM_DEFAULT_Q 120
#define CGEMM_DEFAULT_Q 120
#define ZGEMM_DEFAULT_Q 120
#define SGEMM_DEFAULT_R 12288
#define DGEMM_DEFAULT_R 8192
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
#define SYMV_P 16
#endif