Merge branch 'develop'

This commit is contained in:
Zhang Xianyi 2014-12-03 23:01:33 +08:00
commit 37aee1f9b1
49 changed files with 2949 additions and 283 deletions

View File

@ -117,5 +117,9 @@ In chronological order:
* Isaac Dunham <https://github.com/idunham> * Isaac Dunham <https://github.com/idunham>
* [2014-08-03] Fixed link error on Linux/musl * [2014-08-03] Fixed link error on Linux/musl
* Dave Nuechterlein
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
ARMv8 support.
* [Your name or handle] <[email or website]> * [Your name or handle] <[email or website]>
* [Date] [Brief summary of your changes] * [Date] [Brief summary of your changes]

View File

@ -1,4 +1,25 @@
OpenBLAS ChangeLog OpenBLAS ChangeLog
====================================================================
Version 0.2.13
3-Dec-2014
common:
* Add SYMBOLPREFIX and SYMBOLSUFFIX makefile options
for adding a prefix or suffix to all exported symbol names
in the shared library.(#459, Thanks Tony Kelman)
* Provide OpenBLASConfig.cmake at installation.
* Fix Fortran compiler detection on FreeBSD.
(#470, Thanks Mike Nolta)
x86/x86-64:
* Add generic kernel files for x86-64. make TARGET=GENERIC
* Fix a bug of sgemm kernel on Intel Sandy Bridge.
* Fix c_check bug on some amd64 systems. (#471, Thanks Mike Nolta)
ARM:
* Support APM's X-Gene 1 AArch64 processors.
Optimize trmm and sgemm. (#465, Thanks Dave Nuechterlein)
==================================================================== ====================================================================
Version 0.2.12 Version 0.2.12
13-Oct-2014 13-Oct-2014

View File

@ -1,4 +1,4 @@
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -12,9 +12,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -9,6 +9,8 @@ OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
OPENBLAS_BINARY_DIR := $(PREFIX)/bin OPENBLAS_BINARY_DIR := $(PREFIX)/bin
OPENBLAS_BUILD_DIR := $(CURDIR) OPENBLAS_BUILD_DIR := $(CURDIR)
OPENBLAS_CMAKE_DIR := $(PREFIX)/cmake
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
.PHONY : install .PHONY : install
.NOTPARALLEL : install .NOTPARALLEL : install
@ -21,6 +23,7 @@ install : lib.grd
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) @-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR) @-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@-mkdir -p $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) @echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
#for inc #for inc
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h @echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@ -90,6 +93,23 @@ ifeq ($(OSNAME), CYGWIN_NT)
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR) @-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
endif endif
endif endif
#Generating OpenBLASConfig.cmake
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
ifndef NO_SHARED
#ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
endif
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
endif
ifeq ($(OSNAME), Darwin)
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
endif
else
#only static
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
endif
@echo Install OK! @echo Install OK!

View File

@ -3,7 +3,7 @@
# #
# This library's version # This library's version
VERSION = 0.2.12 VERSION = 0.2.13
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

View File

@ -186,6 +186,8 @@ LD = $(CROSS_SUFFIX)ld
RANLIB = $(CROSS_SUFFIX)ranlib RANLIB = $(CROSS_SUFFIX)ranlib
NM = $(CROSS_SUFFIX)nm NM = $(CROSS_SUFFIX)nm
DLLWRAP = $(CROSS_SUFFIX)dllwrap DLLWRAP = $(CROSS_SUFFIX)dllwrap
OBJCOPY = $(CROSS_SUFFIX)objcopy
OBJCONV = $(CROSS_SUFFIX)objconv
# #
# OS dependent settings # OS dependent settings
@ -845,6 +847,14 @@ else
LIBPREFIX = libopenblas_$(LIBNAMESUFFIX) LIBPREFIX = libopenblas_$(LIBNAMESUFFIX)
endif endif
ifndef SYMBOLPREFIX
SYMBOLPREFIX =
endif
ifndef SYMBOLSUFFIX
SYMBOLSUFFIX =
endif
KERNELDIR = $(TOPDIR)/kernel/$(ARCH) KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
include $(TOPDIR)/Makefile.$(ARCH) include $(TOPDIR)/Makefile.$(ARCH)

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@ -27,7 +28,6 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/ **********************************************************************************/
/*********************************************************************/ /*********************************************************************/

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@ -27,7 +28,6 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/ **********************************************************************************/
/*********************************************************************/ /*********************************************************************/
@ -119,9 +119,9 @@ static inline int blas_quickdivide(blasint x, blasint y){
} }
#if defined(DOUBLE) #if defined(DOUBLE)
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory") #define GET_IMAGE(res) __asm__ __volatile__("str d1, %0" : "=m"(res) : : "memory")
#else #else
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory") #define GET_IMAGE(res) __asm__ __volatile__("str s1, %0" : "=m"(res) : : "memory")
#endif #endif
#define GET_IMAGE_CANCEL #define GET_IMAGE_CANCEL
@ -138,7 +138,6 @@ static inline int blas_quickdivide(blasint x, blasint y){
#if defined(ASSEMBLER) && !defined(NEEDPARAM) #if defined(ASSEMBLER) && !defined(NEEDPARAM)
#define PROLOGUE \ #define PROLOGUE \
.arm ;\
.global REALNAME ;\ .global REALNAME ;\
.func REALNAME ;\ .func REALNAME ;\
REALNAME: REALNAME:

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@ -27,7 +28,6 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/ **********************************************************************************/
/*********************************************************************/ /*********************************************************************/

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

217
cpuid_arm64.c Normal file
View File

@ -0,0 +1,217 @@
/**************************************************************************
Copyright (c) 2013, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <string.h>
#define CPU_UNKNOWN 0
#define CPU_ARMV8 1
static char *cpuname[] = {
"UNKOWN",
"ARMV8"
};
int get_feature(char *search)
{
#ifdef linux
FILE *infile;
char buffer[2048], *p,*t;
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile))
{
if (!strncmp("Features", buffer, 8))
{
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if( p == NULL ) return;
t = strtok(p," ");
while( t = strtok(NULL," "))
{
if (!strcmp(t, search)) { return(1); }
}
#endif
return(0);
}
int detect(void)
{
#ifdef linux
FILE *infile;
char buffer[512], *p;
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile))
{
if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)))
{
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if(p != NULL)
{
if (strstr(p, "AArch64"))
{
return CPU_ARMV8;
}
}
#endif
return CPU_UNKNOWN;
}
char *get_corename(void)
{
return cpuname[detect()];
}
void get_architecture(void)
{
printf("ARM");
}
void get_subarchitecture(void)
{
int d = detect();
switch (d)
{
case CPU_ARMV8:
printf("ARMV8");
break;
default:
printf("UNKNOWN");
break;
}
}
void get_subdirname(void)
{
printf("arm64");
}
void get_cpuconfig(void)
{
int d = detect();
switch (d)
{
case CPU_ARMV8:
printf("#define ARMV8\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L2_SIZE 262144\n");
printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
break;
}
}
void get_libname(void)
{
int d = detect();
switch (d)
{
case CPU_ARMV8:
printf("armv8\n");
break;
}
}
void get_features(void)
{
#ifdef linux
FILE *infile;
char buffer[2048], *p,*t;
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile))
{
if (!strncmp("Features", buffer, 8))
{
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if( p == NULL ) return;
t = strtok(p," ");
while( t = strtok(NULL," "))
{
}
#endif
return;
}

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -853,11 +853,24 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
if (get_vendor() == VENDOR_INTEL) { if (get_vendor() == VENDOR_INTEL) {
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx); cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
if (cpuid_level >= 0x80000006) { if (cpuid_level >= 0x80000006) {
cpuid(0x80000006, &eax, &ebx, &ecx, &edx); if(L2.size<=0){
//If we didn't detect L2 correctly before,
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
L2.size = BITMASK(ecx, 16, 0xffff); L2.size = BITMASK(ecx, 16, 0xffff);
L2.associative = BITMASK(ecx, 12, 0x0f); L2.associative = BITMASK(ecx, 12, 0x0f);
L2.linesize = BITMASK(ecx, 0, 0xff);
switch (L2.associative){
case 0x06:
L2.associative = 8;
break;
case 0x08:
L2.associative = 16;
break;
}
L2.linesize = BITMASK(ecx, 0, 0xff);
}
} }
} }
@ -916,10 +929,22 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
if (L2ITB.associative == 0xff) L2ITB.associative = 0; if (L2ITB.associative == 0xff) L2ITB.associative = 0;
L2ITB.linesize = BITMASK(ebx, 0, 0xff); L2ITB.linesize = BITMASK(ebx, 0, 0xff);
L2.size = BITMASK(ecx, 16, 0xffff); if(L2.size <= 0){
L2.associative = BITMASK(ecx, 12, 0xf); //If we didn't detect L2 correctly before,
if (L2.associative == 0xff) L2.associative = 0; L2.size = BITMASK(ecx, 16, 0xffff);
L2.linesize = BITMASK(ecx, 0, 0xff); L2.associative = BITMASK(ecx, 12, 0xf);
switch (L2.associative){
case 0x06:
L2.associative = 8;
break;
case 0x08:
L2.associative = 16;
break;
}
if (L2.associative == 0xff) L2.associative = 0;
L2.linesize = BITMASK(ecx, 0, 0xff);
}
L3.size = BITMASK(edx, 18, 0x3fff) * 512; L3.size = BITMASK(edx, 18, 0x3fff) * 512;
L3.associative = BITMASK(edx, 12, 0xf); L3.associative = BITMASK(edx, 12, 0xf);

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -13,7 +13,7 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of its contributors may
be used to endorse or promote products derived from this software be used to endorse or promote products derived from this software
without specific prior written permission. without specific prior written permission.
@ -40,6 +40,8 @@ static int parallel = 1;
static int parallel = 0; static int parallel = 0;
#endif #endif
#ifdef NEEDBUNDERSCORE
int CNAME() { int CNAME() {
return parallel; return parallel;
} }
@ -48,5 +50,10 @@ int NAME() {
return parallel; return parallel;
} }
#else
//The CNAME and NAME are the same.
int NAME() {
return parallel;
}
#endif

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -88,12 +88,18 @@ dll : ../$(LIBDLLNAME)
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB) -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB)
libopenblas.def : gensymbol libopenblas.def : gensymbol
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F) perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F)
libgoto_hpl.def : gensymbol libgoto_hpl.def : gensymbol
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F) perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F)
ifeq (, $(SYMBOLPREFIX)$(SYMBOLSUFFIX))
$(LIBDYNNAME) : ../$(LIBNAME) osx.def $(LIBDYNNAME) : ../$(LIBNAME) osx.def
else
../$(LIBNAME).renamed : ../$(LIBNAME) objconv.def
$(OBJCONV) @objconv.def ../$(LIBNAME) ../$(LIBNAME).renamed
$(LIBDYNNAME) : ../$(LIBNAME).renamed osx.def
endif
$(FC) $(FFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) $(FC) $(FFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
dllinit.$(SUFFIX) : dllinit.c dllinit.$(SUFFIX) : dllinit.c
@ -103,16 +109,22 @@ ifeq ($(OSNAME), Linux)
so : ../$(LIBSONAME) so : ../$(LIBSONAME)
ifeq (, $(SYMBOLPREFIX)$(SYMBOLSUFFIX))
../$(LIBSONAME) : ../$(LIBNAME) linktest.c ../$(LIBSONAME) : ../$(LIBNAME) linktest.c
else
../$(LIBNAME).renamed : ../$(LIBNAME) objcopy.def
$(OBJCOPY) --redefine-syms objcopy.def ../$(LIBNAME) ../$(LIBNAME).renamed
../$(LIBSONAME) : ../$(LIBNAME).renamed linktest.c
endif
ifneq ($(C_COMPILER), LSB) ifneq ($(C_COMPILER), LSB)
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ -Wl,--whole-archive $< -Wl,--no-whole-archive \
-Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB) -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB)
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
else else
#for LSB #for LSB
env LSBCC_SHAREDLIBS=gfortran $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ env LSBCC_SHAREDLIBS=gfortran $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ -Wl,--whole-archive $< -Wl,--no-whole-archive \
-Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB) -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB)
$(FC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. $(FC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
endif endif
@ -125,9 +137,15 @@ ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD))
so : ../$(LIBSONAME) so : ../$(LIBSONAME)
ifeq (, $(SYMBOLPREFIX)$(SYMBOLSUFFIX))
../$(LIBSONAME) : ../$(LIBNAME) linktest.c ../$(LIBSONAME) : ../$(LIBNAME) linktest.c
else
../$(LIBNAME).renamed : ../$(LIBNAME) objcopy.def
$(OBJCOPY) --redefine-syms objcopy.def ../$(LIBNAME) ../$(LIBNAME).renamed
../$(LIBSONAME) : ../$(LIBNAME).renamed linktest.c
endif
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ -Wl,--whole-archive $< -Wl,--no-whole-archive \
$(FEXTRALIB) $(EXTRALIB) $(FEXTRALIB) $(EXTRALIB)
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
rm -f linktest rm -f linktest
@ -178,17 +196,23 @@ static : ../$(LIBNAME)
rm -f goto.$(SUFFIX) rm -f goto.$(SUFFIX)
osx.def : gensymbol ../Makefile.system ../getarch.c osx.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F) perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F)
aix.def : gensymbol ../Makefile.system ../getarch.c aix.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F) perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F)
objcopy.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F)
objconv.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F)
test : linktest.c test : linktest.c
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
rm -f linktest rm -f linktest
linktest.c : gensymbol ../Makefile.system ../getarch.c linktest.c : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > linktest.c perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > linktest.c
clean :: clean ::
@rm -f *.def *.dylib __.SYMDEF* @rm -f *.def *.dylib __.SYMDEF*

View File

@ -2784,22 +2784,26 @@ $bu = $ARGV[2];
$bu = "" if (($bu eq "0") || ($bu eq "1")); $bu = "" if (($bu eq "0") || ($bu eq "1"));
$symbolprefix = $ARGV[9];
$symbolsuffix = $ARGV[10];
if ($ARGV[0] eq "osx"){ if ($ARGV[0] eq "osx"){
@underscore_objs = (@underscore_objs, @misc_common_objs); @underscore_objs = (@underscore_objs, @misc_common_objs);
@no_underscore_objs = (@no_underscore_objs, @misc_common_objs); @no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
foreach $objs (@underscore_objs) { foreach $objs (@underscore_objs) {
print "_", $objs, $bu, "\n"; print "_", $symbolprefix, $objs, $bu, $symbolsuffix, "\n";
} }
foreach $objs (@need_2underscore_objs) { foreach $objs (@need_2underscore_objs) {
print "_", $objs, $bu, $bu, "\n"; print "_", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "\n";
} }
# if ($ARGV[4] == 0) { # if ($ARGV[4] == 0) {
foreach $objs (@no_underscore_objs) { foreach $objs (@no_underscore_objs) {
print "_", $objs, "\n"; print "_", $symbolprefix, $objs, $symbolsuffix, "\n";
} }
# } # }
exit(0); exit(0);
@ -2811,16 +2815,58 @@ if ($ARGV[0] eq "aix"){
@no_underscore_objs = (@no_underscore_objs, @misc_common_objs); @no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
foreach $objs (@underscore_objs) { foreach $objs (@underscore_objs) {
print $objs, $bu, "\n"; print $symbolprefix, $objs, $bu, $symbolsuffix, "\n";
} }
foreach $objs (@need_2underscore_objs) { foreach $objs (@need_2underscore_objs) {
print $objs, $bu, $bu, "\n"; print $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "\n";
} }
# if ($ARGV[4] == 0) { # if ($ARGV[4] == 0) {
foreach $objs (@no_underscore_objs) { foreach $objs (@no_underscore_objs) {
print $objs, "\n"; print $symbolprefix, $objs, $symbolsuffix, "\n";
}
# }
exit(0);
}
if ($ARGV[0] eq "objcopy"){
@underscore_objs = (@underscore_objs, @misc_common_objs);
@no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
foreach $objs (@underscore_objs) {
print $objs, $bu, " ", $symbolprefix, $objs, $bu, $symbolsuffix, "\n";
}
foreach $objs (@need_2underscore_objs) {
print $objs, $bu, $bu, " ", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "\n";
}
# if ($ARGV[4] == 0) {
foreach $objs (@no_underscore_objs) {
print $objs, " ", $symbolprefix, $objs, $symbolsuffix, "\n";
}
# }
exit(0);
}
if ($ARGV[0] eq "objconv"){
@underscore_objs = (@underscore_objs, @misc_common_objs);
@no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
foreach $objs (@underscore_objs) {
print "-nr:_", $objs, $bu, ":_", $symbolprefix, $objs, $bu, $symbolsuffix, "\n";
}
foreach $objs (@need_2underscore_objs) {
print "-nr:_", $objs, $bu, $bu, ":_", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "\n";
}
# if ($ARGV[4] == 0) {
foreach $objs (@no_underscore_objs) {
print "-nr:_", $objs, ":_", $symbolprefix, $objs, $symbolsuffix, "\n";
} }
# } # }
exit(0); exit(0);
@ -2835,22 +2881,22 @@ if ($ARGV[0] eq "win2k"){
foreach $objs (@underscore_objs) { foreach $objs (@underscore_objs) {
$uppercase = $objs; $uppercase = $objs;
$uppercase =~ tr/[a-z]/[A-Z]/; $uppercase =~ tr/[a-z]/[A-Z]/;
print "\t$objs=$objs","_ \@", $count, "\n"; print "\t",$symbolprefix, $objs, $symbolsuffix, "=$objs","_ \@", $count, "\n";
$count ++; $count ++;
print "\t",$objs, "_=$objs","_ \@", $count, "\n"; print "\t",$symbolprefix, $objs, "_", $symbolsuffix, "=$objs","_ \@", $count, "\n";
$count ++; $count ++;
print "\t$uppercase=$objs", "_ \@", $count, "\n"; print "\t",$symbolprefix, $uppercase, $symbolsuffix, "=$objs", "_ \@", $count, "\n";
$count ++; $count ++;
} }
foreach $objs (@need_2underscore_objs) { foreach $objs (@need_2underscore_objs) {
$uppercase = $objs; $uppercase = $objs;
$uppercase =~ tr/[a-z]/[A-Z]/; $uppercase =~ tr/[a-z]/[A-Z]/;
print "\t$objs=$objs","__ \@", $count, "\n"; print "\t",$symbolprefix, $objs, $symbolsuffix, "=$objs","__ \@", $count, "\n";
$count ++; $count ++;
print "\t",$objs, "__=$objs","__ \@", $count, "\n"; print "\t",$symbolprefix, $objs, "__", $symbolsuffix, "=$objs","__ \@", $count, "\n";
$count ++; $count ++;
print "\t$uppercase=$objs", "__ \@", $count, "\n"; print "\t",$symbolprefix, $uppercase, $symbolsuffix, "=$objs", "__ \@", $count, "\n";
$count ++; $count ++;
} }
@ -2859,15 +2905,15 @@ if ($ARGV[0] eq "win2k"){
$uppercase = $objs; $uppercase = $objs;
$uppercase =~ tr/[a-z]/[A-Z]/; $uppercase =~ tr/[a-z]/[A-Z]/;
print "\t",$objs, "_=$objs","_ \@", $count, "\n"; print "\t",$symbolprefix, $objs, "_", $symbolsuffix, "=$objs","_ \@", $count, "\n";
$count ++; $count ++;
print "\t$uppercase=$objs", "_ \@", $count, "\n"; print "\t",$symbolprefix, $uppercase, $symbolsuffix, "=$objs", "_ \@", $count, "\n";
$count ++; $count ++;
} }
foreach $objs (@no_underscore_objs) { foreach $objs (@no_underscore_objs) {
print "\t",$objs,"=$objs"," \@", $count, "\n"; print "\t",$symbolprefix,$objs,$symbolsuffix,"=$objs"," \@", $count, "\n";
$count ++; $count ++;
} }
@ -2880,11 +2926,11 @@ if ($ARGV[0] eq "win2khpl"){
foreach $objs (@hplobjs) { foreach $objs (@hplobjs) {
$uppercase = $objs; $uppercase = $objs;
$uppercase =~ tr/[a-z]/[A-Z]/; $uppercase =~ tr/[a-z]/[A-Z]/;
print "\t$objs=$objs","_ \@", $count, "\n"; print "\t",$symbolprefix, $objs, $symbolsuffix, "=$objs","_ \@", $count, "\n";
$count ++; $count ++;
print "\t",$objs, "_=$objs","_ \@", $count, "\n"; print "\t",$symbolprefix, $objs, "_", $symbolsuffix, "=$objs","_ \@", $count, "\n";
$count ++; $count ++;
print "\t$uppercase=$objs", "_ \@", $count, "\n"; print "\t",$symbolprefix, $uppercase, $symbolsuffix, "=$objs", "_ \@", $count, "\n";
$count ++; $count ++;
} }
@ -2905,24 +2951,24 @@ if ($ARGV[0] eq "microsoft"){
foreach $objs (@underscore_objs) { foreach $objs (@underscore_objs) {
$uppercase = $objs; $uppercase = $objs;
$uppercase =~ tr/[a-z]/[A-Z]/; $uppercase =~ tr/[a-z]/[A-Z]/;
print "\t$objs = $objs","_\n"; print "\t",$symbolprefix, $objs, $symbolsuffix, " = $objs","_\n";
$count ++; $count ++;
print "\t$objs\_ = $objs","_\n"; print "\t",$symbolprefix, $objs, "\_", $symbolsuffix, " = $objs","_\n";
$count ++; $count ++;
print "\t$uppercase = $objs","_\n"; print "\t",$symbolprefix, $uppercase, $symbolsuffix, " = $objs","_\n";
$count ++; $count ++;
print "\t$uppercase\_ = $objs","_\n"; print "\t",$symbolprefix, $uppercase, "\_", $symbolsuffix, " = $objs","_\n";
$count ++; $count ++;
} }
foreach $objs (@need_2underscore_objs) { foreach $objs (@need_2underscore_objs) {
$uppercase = $objs; $uppercase = $objs;
$uppercase =~ tr/[a-z]/[A-Z]/; $uppercase =~ tr/[a-z]/[A-Z]/;
print "\t$objs=$objs","__ \@", $count, "\n"; print "\t",$symbolprefix, $objs, $symbolsuffix, "=$objs","__ \@", $count, "\n";
$count ++; $count ++;
print "\t",$objs, "__=$objs","__ \@", $count, "\n"; print "\t",$symbolprefix, $objs, "__", $symbolsuffix, "=$objs","__ \@", $count, "\n";
$count ++; $count ++;
print "\t$uppercase=$objs", "__ \@", $count, "\n"; print "\t",$symbolprefix, $uppercase, $symbolsuffix, "=$objs", "__ \@", $count, "\n";
$count ++; $count ++;
} }
@ -2936,16 +2982,16 @@ if ($ARGV[0] eq "linktest"){
print "int main(void){\n"; print "int main(void){\n";
foreach $objs (@underscore_objs) { foreach $objs (@underscore_objs) {
print $objs, $bu, "();\n" if $objs ne "xerbla"; print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
} }
foreach $objs (@need_2underscore_objs) { foreach $objs (@need_2underscore_objs) {
print $objs, $bu, $bu, "();\n"; print $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "();\n";
} }
# if ($ARGV[4] == 0) { # if ($ARGV[4] == 0) {
foreach $objs (@no_underscore_objs) { foreach $objs (@no_underscore_objs) {
print $objs, "();\n"; print $symbolprefix, $objs, $symbolsuffix, "();\n";
} }
# } # }

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@ -746,12 +747,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SUBARCHITECTURE "ARMV8" #define SUBARCHITECTURE "ARMV8"
#define SUBDIRNAME "arm64" #define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DARMV8 " \ #define ARCHCONFIG "-DARMV8 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 "
"-DHAVE_VFP -DHAVE_VFPV3 -DHAVE_VFPV4"
#define LIBNAME "armv8" #define LIBNAME "armv8"
#define CORENAME "ARMV8" #define CORENAME "XGENE1"
#else #else
#endif #endif
@ -801,6 +801,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define OPENBLAS_SUPPORTED #define OPENBLAS_SUPPORTED
#endif #endif
#ifdef __aarch64__
#include "cpuid_arm64.c"
#define OPENBLAS_SUPPORTED
#endif
#ifndef OPENBLAS_SUPPORTED #ifndef OPENBLAS_SUPPORTED
#error "This arch/CPU is not supported by OpenBLAS." #error "This arch/CPU is not supported by OpenBLAS."
@ -856,7 +861,7 @@ int main(int argc, char *argv[]){
#ifdef FORCE #ifdef FORCE
printf("CORE=%s\n", CORENAME); printf("CORE=%s\n", CORENAME);
#else #else
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
printf("CORE=%s\n", get_corename()); printf("CORE=%s\n", get_corename());
#endif #endif
#endif #endif
@ -956,7 +961,7 @@ int main(int argc, char *argv[]){
#ifdef FORCE #ifdef FORCE
printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); printf("#define CHAR_CORENAME \"%s\"\n", CORENAME);
#else #else
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); printf("#define CHAR_CORENAME \"%s\"\n", get_corename());
#endif #endif
#endif #endif

View File

@ -28,6 +28,10 @@ ifeq ($(TARGET), LOONGSON3B)
USE_TRMM = 1 USE_TRMM = 1
endif endif
ifeq ($(TARGET), GENERIC)
USE_TRMM = 1
endif
SKERNELOBJS += \ SKERNELOBJS += \

View File

@ -80,14 +80,14 @@ DGEMVTKERNEL = ../arm/gemv_t.c
CGEMVTKERNEL = ../arm/zgemv_t.c CGEMVTKERNEL = ../arm/zgemv_t.c
ZGEMVTKERNEL = ../arm/zgemv_t.c ZGEMVTKERNEL = ../arm/zgemv_t.c
STRMMKERNEL = ../generic/trmmkernel_2x2.c STRMMKERNEL = ../generic/trmmkernel_4x4.c
DTRMMKERNEL = ../generic/trmmkernel_2x2.c DTRMMKERNEL = ../generic/trmmkernel_2x2.c
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
SGEMMKERNEL = ../generic/gemmkernel_2x2.c SGEMMKERNEL = sgemm_kernel_4x4.S
SGEMMONCOPY = ../generic/gemm_ncopy_2.c SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o SGEMMOTCOPYOBJ = sgemm_otcopy.o

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,875 @@
#include "common.h"
#include <stdbool.h>
int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc ,BLASLONG offset)
{
BLASLONG i,j,k;
FLOAT *C0,*C1,*C2,*C3,*ptrba,*ptrbb;
FLOAT res0_0;
FLOAT res0_1;
FLOAT res0_2;
FLOAT res0_3;
FLOAT res1_0;
FLOAT res1_1;
FLOAT res1_2;
FLOAT res1_3;
FLOAT res2_0;
FLOAT res2_1;
FLOAT res2_2;
FLOAT res2_3;
FLOAT res3_0;
FLOAT res3_1;
FLOAT res3_2;
FLOAT res3_3;
FLOAT a0;
FLOAT a1;
FLOAT b0;
FLOAT b1;
FLOAT b2;
FLOAT b3;
BLASLONG off, temp;
bool left;
bool transposed;
bool backwards;
#ifdef LEFT
left = true;
#else
left = false;
#endif
#ifdef TRANSA
transposed = true;
#else
transposed = false;
#endif
backwards = left != transposed;
if (!left) {
off = -offset;
}
for (j=0; j<bn/4; j+=1) // do blocks of the Mx4 loops
{
C0 = C;
C1 = C0+ldc;
C2 = C1+ldc;
C3 = C2+ldc;
if (left) {
off = offset;
}
ptrba = ba;
for (i=0; i<bm/4; i+=1) // do blocks of 4x4
{
ptrbb = bb;
if (backwards)
{
ptrba += off*4; // number of values in A
ptrbb += off*4; // number of values in B
}
res0_0 = 0;
res0_1 = 0;
res0_2 = 0;
res0_3 = 0;
res1_0 = 0;
res1_1 = 0;
res1_2 = 0;
res1_3 = 0;
res2_0 = 0;
res2_1 = 0;
res2_2 = 0;
res2_3 = 0;
res3_0 = 0;
res3_1 = 0;
res3_2 = 0;
res3_3 = 0;
temp = backwards ? bk-off :
left ? off + 4 : // number of values in A
off + 4; // number of values in B
for (k=0; k<temp; k++)
{
b0 = ptrbb[0];
b1 = ptrbb[1];
b2 = ptrbb[2];
b3 = ptrbb[3];
a0 = ptrba[0];
res0_0 += a0*b0;
res1_0 += a0*b1;
res2_0 += a0*b2;
res3_0 += a0*b3;
a1 = ptrba[1];
res0_1 += a1*b0;
res1_1 += a1*b1;
res2_1 += a1*b2;
res3_1 += a1*b3;
a0 = ptrba[2];
res0_2 += a0*b0;
res1_2 += a0*b1;
res2_2 += a0*b2;
res3_2 += a0*b3;
a1 = ptrba[3];
res0_3 += a1*b0;
res1_3 += a1*b1;
res2_3 += a1*b2;
res3_3 += a1*b3;
ptrba = ptrba+4;
ptrbb = ptrbb+4;
}
res0_0 *= alpha;
res0_1 *= alpha;
res0_2 *= alpha;
res0_3 *= alpha;
res1_0 *= alpha;
res1_1 *= alpha;
res1_2 *= alpha;
res1_3 *= alpha;
res2_0 *= alpha;
res2_1 *= alpha;
res2_2 *= alpha;
res2_3 *= alpha;
res3_0 *= alpha;
res3_1 *= alpha;
res3_2 *= alpha;
res3_3 *= alpha;
C0[0] = res0_0;
C0[1] = res0_1;
C0[2] = res0_2;
C0[3] = res0_3;
C1[0] = res1_0;
C1[1] = res1_1;
C1[2] = res1_2;
C1[3] = res1_3;
C2[0] = res2_0;
C2[1] = res2_1;
C2[2] = res2_2;
C2[3] = res2_3;
C3[0] = res3_0;
C3[1] = res3_1;
C3[2] = res3_2;
C3[3] = res3_3;
if (!backwards) {
temp = bk-off;
temp = left ? temp - 4 : // number of values in A
temp - 4; // number of values in B
ptrba += temp*4; // number of values in A
ptrbb += temp*4; // number of values in B
}
#ifdef LEFT
off += 4; // number of values in A
#endif
C0 = C0+4;
C1 = C1+4;
C2 = C2+4;
C3 = C3+4;
}
if ( bm & 2 ) // do any 2x4 loop
{
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
ptrbb = bb;
#else
ptrba += off*2;
ptrbb = bb + off*4;
#endif
res0_0 = 0;
res0_1 = 0;
res1_0 = 0;
res1_1 = 0;
res2_0 = 0;
res2_1 = 0;
res3_0 = 0;
res3_1 = 0;
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
temp = bk-off;
#elif defined(LEFT)
temp = off+2; // number of values in A
#else
temp = off+4; // number of values in B
#endif
for (k=0; k<temp; k++)
{
b0 = ptrbb[0];
b1 = ptrbb[1];
b2 = ptrbb[2];
b3 = ptrbb[3];
a0 = ptrba[0];
res0_0 += a0*b0;
res1_0 += a0*b1;
res2_0 += a0*b2;
res3_0 += a0*b3;
a1 = ptrba[1];
res0_1 += a1*b0;
res1_1 += a1*b1;
res2_1 += a1*b2;
res3_1 += a1*b3;
ptrba = ptrba+2;
ptrbb = ptrbb+4;
}
res0_0 *= alpha;
res0_1 *= alpha;
res1_0 *= alpha;
res1_1 *= alpha;
res2_0 *= alpha;
res2_1 *= alpha;
res3_0 *= alpha;
res3_1 *= alpha;
C0[0] = res0_0;
C0[1] = res0_1;
C1[0] = res1_0;
C1[1] = res1_1;
C2[0] = res2_0;
C2[1] = res2_1;
C3[0] = res3_0;
C3[1] = res3_1;
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
temp = bk - off;
#ifdef LEFT
temp -= 2; // number of values in A
#else
temp -= 4; // number of values in B
#endif
ptrba += temp*2;
ptrbb += temp*4;
#endif
#ifdef LEFT
off += 2; // number of values in A
#endif
C0 = C0+2;
C1 = C1+2;
C2 = C2+2;
C3 = C3+2;
}
if ( bm & 1 ) // do any 1x4 loop
{
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
ptrbb = bb;
#else
ptrba += off*1;
ptrbb = bb + off*4;
#endif
res0_0 = 0;
res1_0 = 0;
res2_0 = 0;
res3_0 = 0;
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
temp = bk-off;
#elif defined(LEFT)
temp = off+1; // number of values in A
#else
temp = off+4; // number of values in B
#endif
for (k=0; k<temp; k++)
{
b0 = ptrbb[0];
b1 = ptrbb[1];
b2 = ptrbb[2];
b3 = ptrbb[3];
a0 = ptrba[0];
res0_0 += a0*b0;
res1_0 += a0*b1;
res2_0 += a0*b2;
res3_0 += a0*b3;
ptrba = ptrba+1;
ptrbb = ptrbb+4;
}
res0_0 *= alpha;
res1_0 *= alpha;
res2_0 *= alpha;
res3_0 *= alpha;
C0[0] = res0_0;
C1[0] = res1_0;
C2[0] = res2_0;
C3[0] = res3_0;
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
temp = bk - off;
#ifdef LEFT
temp -= 1; // number of values in A
#else
temp -= 4; // number of values in B
#endif
ptrba += temp*1;
ptrbb += temp*4;
#endif
#ifdef LEFT
off += 1; // number of values in A
#endif
C0 = C0+1;
C1 = C1+1;
C2 = C2+1;
C3 = C3+1;
}
#if defined(TRMMKERNEL) && !defined(LEFT)
off += 4;
#endif
k = (bk<<2);
bb = bb+k;
i = (ldc<<2);
C = C+i;
}
for (j=0; j<(bn&2); j+=2) // do the Mx2 loops
{
C0 = C;
C1 = C0+ldc;
#if defined(TRMMKERNEL) && defined(LEFT)
off = offset;
#endif
ptrba = ba;
for (i=0; i<bm/4; i+=1) // do blocks of 4x2
{
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
ptrbb = bb;
#else
ptrba += off*4;
ptrbb = bb + off*2;
#endif
res0_0 = 0;
res0_1 = 0;
res0_2 = 0;
res0_3 = 0;
res1_0 = 0;
res1_1 = 0;
res1_2 = 0;
res1_3 = 0;
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
temp = bk-off;
#elif defined(LEFT)
temp = off+4; // number of values in A
#else
temp = off+2; // number of values in B
#endif
for (k=0; k<temp; k++)
{
b0 = ptrbb[0];
b1 = ptrbb[1];
a0 = ptrba[0];
res0_0 += a0*b0;
res1_0 += a0*b1;
a1 = ptrba[1];
res0_1 += a1*b0;
res1_1 += a1*b1;
a0 = ptrba[2];
res0_2 += a0*b0;
res1_2 += a0*b1;
a1 = ptrba[3];
res0_3 += a1*b0;
res1_3 += a1*b1;
ptrba = ptrba+4;
ptrbb = ptrbb+2;
}
res0_0 *= alpha;
res0_1 *= alpha;
res0_2 *= alpha;
res0_3 *= alpha;
res1_0 *= alpha;
res1_1 *= alpha;
res1_2 *= alpha;
res1_3 *= alpha;
C0[0] = res0_0;
C0[1] = res0_1;
C0[2] = res0_2;
C0[3] = res0_3;
C1[0] = res1_0;
C1[1] = res1_1;
C1[2] = res1_2;
C1[3] = res1_3;
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
temp = bk - off;
#ifdef LEFT
temp -= 4; // number of values in A
#else
temp -= 2; // number of values in B
#endif
ptrba += temp*4;
ptrbb += temp*2;
#endif
#ifdef LEFT
off += 4; // number of values in A
#endif
C0 = C0+4;
C1 = C1+4;
}
if ( bm & 2 ) // do any 2x2 loop
{
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
ptrbb = bb;
#else
ptrba += off*2;
ptrbb = bb + off*2;
#endif
res0_0 = 0;
res0_1 = 0;
res1_0 = 0;
res1_1 = 0;
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
temp = bk-off;
#elif defined(LEFT)
temp = off+2; // number of values in A
#else
temp = off+2; // number of values in B
#endif
for (k=0; k<temp; k++)
{
b0 = ptrbb[0];
b1 = ptrbb[1];
a0 = ptrba[0];
res0_0 += a0*b0;
res1_0 += a0*b1;
a1 = ptrba[1];
res0_1 += a1*b0;
res1_1 += a1*b1;
ptrba = ptrba+2;
ptrbb = ptrbb+2;
}
res0_0 *= alpha;
res0_1 *= alpha;
res1_0 *= alpha;
res1_1 *= alpha;
C0[0] = res0_0;
C0[1] = res0_1;
C1[0] = res1_0;
C1[1] = res1_1;
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
temp = bk - off;
#ifdef LEFT
temp -= 2; // number of values in A
#else
temp -= 2; // number of values in B
#endif
ptrba += temp*2;
ptrbb += temp*2;
#endif
#ifdef LEFT
off += 2; // number of values in A
#endif
C0 = C0+2;
C1 = C1+2;
}
if ( bm & 1 ) // do any 1x2 loop
{
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
ptrbb = bb;
#else
ptrba += off*1;
ptrbb = bb + off*2;
#endif
res0_0 = 0;
res1_0 = 0;
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
temp = bk-off;
#elif defined(LEFT)
temp = off+1; // number of values in A
#else
temp = off+2; // number of values in B
#endif
for (k=0; k<temp; k++)
{
b0 = ptrbb[0];
b1 = ptrbb[1];
a0 = ptrba[0];
res0_0 += a0*b0;
res1_0 += a0*b1;
ptrba = ptrba+1;
ptrbb = ptrbb+2;
}
res0_0 *= alpha;
res1_0 *= alpha;
C0[0] = res0_0;
C1[0] = res1_0;
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
temp = bk - off;
#ifdef LEFT
temp -= 1; // number of values in A
#else
temp -= 2; // number of values in B
#endif
ptrba += temp*1;
ptrbb += temp*2;
#endif
#ifdef LEFT
off += 1; // number of values in A
#endif
C0 = C0+1;
C1 = C1+1;
}
#if defined(TRMMKERNEL) && !defined(LEFT)
off += 2;
#endif
k = (bk<<1);
bb = bb+k;
i = (ldc<<1);
C = C+i;
}
for (j=0; j<(bn&1); j+=1) // do the Mx1 loops
{
C0 = C;
#if defined(TRMMKERNEL) && defined(LEFT)
off = offset;
#endif
ptrba = ba;
for (i=0; i<bm/4; i+=1) // do blocks of 4x1 loops
{
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
ptrbb = bb;
#else
ptrba += off*4;
ptrbb = bb + off*1;
#endif
res0_0 = 0;
res0_1 = 0;
res0_2 = 0;
res0_3 = 0;
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
temp = bk-off;
#elif defined(LEFT)
temp = off+4; // number of values in A
#else
temp = off+1; // number of values in B
#endif
for (k=0; k<temp; k++)
{
b0 = ptrbb[0];
a0 = ptrba[0];
res0_0 += a0*b0;
a1 = ptrba[1];
res0_1 += a1*b0;
a0 = ptrba[2];
res0_2 += a0*b0;
a1 = ptrba[3];
res0_3 += a1*b0;
ptrba = ptrba+4;
ptrbb = ptrbb+1;
}
res0_0 *= alpha;
res0_1 *= alpha;
res0_2 *= alpha;
res0_3 *= alpha;
C0[0] = res0_0;
C0[1] = res0_1;
C0[2] = res0_2;
C0[3] = res0_3;
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
temp = bk - off;
#ifdef LEFT
temp -= 4; // number of values in A
#else
temp -= 1; // number of values in B
#endif
ptrba += temp*4;
ptrbb += temp*1;
#endif
#ifdef LEFT
off += 4; // number of values in A
#endif
C0 = C0+4;
}
if ( bm & 2 ) // do any 2x1 loop
{
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
ptrbb = bb;
#else
ptrba += off*2;
ptrbb = bb + off*1;
#endif
res0_0 = 0;
res0_1 = 0;
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
temp = bk-off;
#elif defined(LEFT)
temp = off+2; // number of values in A
#else
temp = off+1; // number of values in B
#endif
for (k=0; k<temp; k++)
{
b0 = ptrbb[0];
a0 = ptrba[0];
res0_0 += a0*b0;
a1 = ptrba[1];
res0_1 += a1*b0;
ptrba = ptrba+2;
ptrbb = ptrbb+1;
}
res0_0 *= alpha;
res0_1 *= alpha;
C0[0] = res0_0;
C0[1] = res0_1;
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
temp = bk - off;
#ifdef LEFT
temp -= 2; // number of values in A
#else
temp -= 1; // number of values in B
#endif
ptrba += temp*2;
ptrbb += temp*1;
#endif
#ifdef LEFT
off += 2; // number of values in A
#endif
C0 = C0+2;
}
if ( bm & 1 ) // do any 1x1 loop
{
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
ptrbb = bb;
#else
ptrba += off*1;
ptrbb = bb + off*1;
#endif
res0_0 = 0;
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
temp = bk-off;
#elif defined(LEFT)
temp = off+1; // number of values in A
#else
temp = off+1; // number of values in B
#endif
for (k=0; k<temp; k++)
{
b0 = ptrbb[0];
a0 = ptrba[0];
res0_0 += a0*b0;
ptrba = ptrba+1;
ptrbb = ptrbb+1;
}
res0_0 *= alpha;
C0[0] = res0_0;
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
temp = bk - off;
#ifdef LEFT
temp -= 1; // number of values in A
#else
temp -= 1; // number of values in B
#endif
ptrba += temp*1;
ptrbb += temp*1;
#endif
#ifdef LEFT
off += 1; // number of values in A
#endif
C0 = C0+1;
}
#if defined(TRMMKERNEL) && !defined(LEFT)
off += 1;
#endif
k = (bk<<0);
bb = bb+k;
C = C+ldc;
}
return 0;
}

View File

@ -1,21 +1,22 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,21 +1,22 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,21 +1,22 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,21 +1,22 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -0,0 +1,52 @@
STRMMKERNEL = ../generic/trmmkernel_2x2.c
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
#Todo: CGEMM3MKERNEL should be 4x4 blocksizes.
CGEMM3MKERNEL = zgemm3m_kernel_8x4_sse3.S
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_sse3.S

View File

@ -1,21 +1,22 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,21 +1,22 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,21 +1,22 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,21 +1,22 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
met: met:
1. Redistributions of source code must retain the above copyright 1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright 2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

27
param.h
View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@ -2039,8 +2040,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL #define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 2 #define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 2 #define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 2 #define DGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_N 2 #define DGEMM_DEFAULT_UNROLL_N 2
@ -2122,25 +2123,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x0ffffUL #define GEMM_DEFAULT_ALIGN 0x0ffffUL
#define SGEMM_DEFAULT_UNROLL_N 4 #define SGEMM_DEFAULT_UNROLL_N 2
#define DGEMM_DEFAULT_UNROLL_N 4 #define DGEMM_DEFAULT_UNROLL_N 2
#define QGEMM_DEFAULT_UNROLL_N 2 #define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 2 #define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2 #define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1 #define XGEMM_DEFAULT_UNROLL_N 1
#ifdef ARCH_X86 #ifdef ARCH_X86
#define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_M 2 #define DGEMM_DEFAULT_UNROLL_M 2
#define QGEMM_DEFAULT_UNROLL_M 2 #define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 2 #define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 1 #define ZGEMM_DEFAULT_UNROLL_M 2
#define XGEMM_DEFAULT_UNROLL_M 1 #define XGEMM_DEFAULT_UNROLL_M 1
#else #else
#define SGEMM_DEFAULT_UNROLL_M 8 #define SGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_M 4 #define DGEMM_DEFAULT_UNROLL_M 2
#define QGEMM_DEFAULT_UNROLL_M 2 #define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 4 #define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 2 #define ZGEMM_DEFAULT_UNROLL_M 2
#define XGEMM_DEFAULT_UNROLL_M 1 #define XGEMM_DEFAULT_UNROLL_M 1
#endif #endif

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011-2012, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2014, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@ -120,4 +121,4 @@ void test_fork_safety(void)
CU_ASSERT(WEXITSTATUS (child_status) == 0); CU_ASSERT(WEXITSTATUS (child_status) == 0);
} }
} }
#endif #endif

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -13,9 +13,10 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the ISCAS nor the names of its contributors may 3. Neither the name of the OpenBLAS project nor the names of
be used to endorse or promote products derived from this software its contributors may be used to endorse or promote products
without specific prior written permission. derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE