Compare commits

..

10 Commits

Author SHA1 Message Date
Zhang Xianyi
51ce5ef447 Merge branch 'develop' 2014-12-03 23:14:21 +08:00
Zhang Xianyi
37aee1f9b1 Merge branch 'develop' 2014-12-03 23:01:33 +08:00
Zhang Xianyi
7e4e195e82 Merge branch 'develop' 2014-10-13 17:10:41 +08:00
Zhang Xianyi
a7126c2ce4 Merge branch 'develop' 2014-08-18 11:16:14 +08:00
Zhang Xianyi
f20c0f9819 Merge branch 'develop' 2014-07-17 15:15:57 +08:00
Zhang Xianyi
21b5347fbe Merge branch 'develop' 2014-07-16 18:04:30 +08:00
Zhang Xianyi
f9991fd5f6 Merge branch 'develop' 2014-07-09 08:48:00 +08:00
Zhang Xianyi
da3d70420a Merge branch 'develop' 2014-06-29 10:46:22 +08:00
Zhang Xianyi
f773f492f3 Merge branch 'develop' 2014-06-10 21:55:47 +08:00
Zhang Xianyi
3e068e78e2 Merge branch 'release-0.2.9' 2014-03-06 17:45:31 +08:00
265 changed files with 1867 additions and 30026 deletions

View File

@@ -121,11 +121,5 @@ In chronological order:
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
ARMv8 support.
* Dan Kortschak
* [2015-01-07] Added test for drotmg bug #484.
* Ton van den Heuvel <https://github.com/ton>
* [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().
* [Your name or handle] <[email or website]>
* [Date] [Brief summary of your changes]

View File

@@ -1,24 +1,4 @@
OpenBLAS ChangeLog
====================================================================
Version 0.2.14
24-Mar-2015
common:
* Improve OpenBLASConfig.cmake. (#474, #475. Thanks, xantares.)
* Improve ger and gemv for small matrices by stack allocation.
e.g. make -DMAX_STACK_ALLOC=2048 (#482. Thanks, Jerome Robert.)
* Introduce openblas_get_num_threads and openblas_get_num_procs.
(#497. Thanks, Erik Schnetter.)
* Add ATLAS-style ?geadd function. (#509. Thanks, Martin Köhler.)
* Fix c/zsyr bug with negative incx. (#492.)
* Fix race condition during shutdown causing a crash in
gotoblas_set_affinity(). (#508. Thanks, Ton van den Heuvel.)
x86/x86-64:
* Support AMD Streamroller.
ARM:
* Add Cortex-A9 and Cortex-A15 targets.
====================================================================
Version 0.2.13
3-Dec-2014

View File

@@ -9,10 +9,10 @@
If you want to allocate 64 large pages,
$shell> echo 0 > /proc/sys/vm/nr_hugepages # need to be reset
$shell> echo 65 > /proc/sys/vm/nr_hugepages # add 1 extra page
$shell> echo 3355443200 > /proc/sys/kernel/shmmax # just large number
$shell> echo 3355443200 > /proc/sys/kernel/shmall
$shell> echo 0 > /pros/sys/vm/nr_hugepages # need to be reset
$shell> echo 65 > /pros/sys/vm/nr_hugepages # add 1 extra page
$shell> echo 3355443200 > /pros/sys/kernel/shmmax # just large number
$shell> echo 3355443200 > /pros/sys/kernel/shmall
Also may add a few lines into /etc/security/limits.conf file.

View File

@@ -20,8 +20,6 @@ ifneq ($(NO_LAPACK), 1)
SUBDIRS += lapack
endif
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS))
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
.PHONY : all libs netlib test ctest shared install
@@ -133,7 +131,7 @@ ifeq ($(CORE), UNKOWN)
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
endif
ifeq ($(NOFORTRAN), 1)
$(info OpenBLAS: Detecting fortran compiler failed. Cannot compile LAPACK. Only compile BLAS.)
$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.)
endif
ifeq ($(NO_STATIC), 1)
ifeq ($(NO_SHARED), 1)
@@ -233,7 +231,7 @@ ifndef NOFORTRAN
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "NOOPT = $(LAPACK_FFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc

View File

@@ -1,23 +1,8 @@
# ifeq logical or
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
ifeq ($(OSNAME), Android)
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
else
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
endif
endif
ifeq ($(CORE), ARMV7)
ifeq ($(OSNAME), Android)
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
else
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
endif
endif
ifeq ($(CORE), ARMV6)
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6

View File

@@ -9,7 +9,7 @@ OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
OPENBLAS_BINARY_DIR := $(PREFIX)/bin
OPENBLAS_BUILD_DIR := $(CURDIR)
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
OPENBLAS_CMAKE_DIR := $(PREFIX)/cmake
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
.PHONY : install
@@ -46,11 +46,11 @@ ifndef NO_CBLAS
endif
ifndef NO_LAPACKE
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
endif
#for install static library
@@ -86,8 +86,8 @@ ifeq ($(OSNAME), Darwin)
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
endif
ifeq ($(OSNAME), WINNT)
@-cp $(LIBDLLNAME) $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@-cp $(LIBDLLNAME).a $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
@-cp $(LIBDLLNAME).a $(OPENBLAS_LIBRARY_DIR)
endif
ifeq ($(OSNAME), CYGWIN_NT)
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
@@ -95,8 +95,7 @@ endif
endif
#Generating OpenBLASConfig.cmake
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
ifndef NO_SHARED
#ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))

View File

@@ -3,7 +3,7 @@
#
# This library's version
VERSION = 0.2.14
VERSION = 0.2.13
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@@ -114,9 +114,6 @@ NO_AFFINITY = 1
# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
# QUAD_PRECISION = 1
# Support for integer matrix and vector (e.g. iaxpy)
# INTEGER_PRECISION = 1
# Theads are still working for a while after finishing BLAS operation
# to reduce thread activate/deactivate overhead. You can determine
# time out to improve performance. This number should be from 4 to 30
@@ -162,19 +159,6 @@ COMMON_PROF = -pg
# Build Debug version
# DEBUG = 1
# Improve GEMV and GER for small matrices by stack allocation.
# For details, https://github.com/xianyi/OpenBLAS/pull/482
#
MAX_STACK_ALLOC=2048
# Add a prefix or suffix to all exported symbol names in the shared library.
# Avoid conflicts with other BLAS libraries, especially when using
# 64 bit integer interfaces in OpenBLAS.
# For details, https://github.com/xianyi/OpenBLAS/pull/459
#
# SYMBOLPREFIX=
# SYMBOLSUFFIX=
#
# End of user configuration
#

View File

@@ -23,7 +23,6 @@ CC = gcc
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Darwin)
CC = clang
# EXTRALIB += -Wl,-no_compact_unwind
endif
endif
@@ -62,12 +61,6 @@ endif
ifeq ($(TARGET), PILEDRIVER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET), STEAMROLLER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET), EXCAVATOR)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
endif
@@ -92,12 +85,6 @@ endif
ifeq ($(TARGET_CORE), PILEDRIVER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET_CORE), STEAMROLLER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET_CORE), EXCAVATOR)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
endif
@@ -202,18 +189,12 @@ DLLWRAP = $(CROSS_SUFFIX)dllwrap
OBJCOPY = $(CROSS_SUFFIX)objcopy
OBJCONV = $(CROSS_SUFFIX)objconv
# For detect fortran failed, only build BLAS.
ifeq ($(NOFORTRAN), 1)
NO_LAPACK = 1
endif
#
# OS dependent settings
#
ifeq ($(OSNAME), Darwin)
export MACOSX_DEPLOYMENT_TARGET=10.6
export MACOSX_DEPLOYMENT_TARGET=10.2
MD5SUM = md5 -r
endif
@@ -309,10 +290,6 @@ CCOMMON_OPT += -DQUAD_PRECISION
NO_EXPRECISION = 1
endif
ifdef INTEGER_PRECISION
CCOMMON_OPT += -DINTEGER_PRECISION
endif
ifneq ($(ARCH), x86)
ifneq ($(ARCH), x86_64)
NO_EXPRECISION = 1
@@ -328,10 +305,6 @@ ifdef SANITY_CHECK
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
endif
ifdef MAX_STACK_ALLOC
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
endif
#
# Architecture dependent settings
#
@@ -381,12 +354,6 @@ endif
ifeq ($(USE_OPENMP), 1)
#check
ifeq ($(USE_THREAD), 0)
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
endif
# ifeq logical or. GCC or LSB
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
CCOMMON_OPT += -fopenmp
@@ -425,7 +392,7 @@ endif
ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
endif
ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += HASWELL
@@ -595,7 +562,7 @@ else
FCOMMON_OPT += -m32
endif
endif
ifeq ($(USE_OPENMP), 1)
ifdef USE_OPENMP
FCOMMON_OPT += -fopenmp
endif
endif
@@ -607,14 +574,14 @@ ifneq ($(INTERFACE64), 0)
FCOMMON_OPT += -i8
endif
endif
ifeq ($(USE_OPENMP), 1)
ifdef USE_OPENMP
FCOMMON_OPT += -openmp
endif
endif
ifeq ($(F_COMPILER), FUJITSU)
CCOMMON_OPT += -DF_INTERFACE_FUJITSU
ifeq ($(USE_OPENMP), 1)
ifdef USE_OPENMP
FCOMMON_OPT += -openmp
endif
endif
@@ -632,7 +599,7 @@ endif
else
FCOMMON_OPT += -q32
endif
ifeq ($(USE_OPENMP), 1)
ifdef USE_OPENMP
FCOMMON_OPT += -openmp
endif
endif
@@ -650,7 +617,7 @@ FCOMMON_OPT += -tp p7-64
else
FCOMMON_OPT += -tp p7
endif
ifeq ($(USE_OPENMP), 1)
ifdef USE_OPENMP
FCOMMON_OPT += -mp
endif
endif
@@ -679,7 +646,7 @@ FCOMMON_OPT += -mabi=n32
endif
endif
ifeq ($(USE_OPENMP), 1)
ifdef USE_OPENMP
FCOMMON_OPT += -mp
endif
endif
@@ -716,7 +683,7 @@ FCOMMON_OPT += -m64
endif
endif
ifeq ($(USE_OPENMP), 1)
ifdef USE_OPENMP
FEXTRALIB += -lstdc++
FCOMMON_OPT += -mp
endif
@@ -764,14 +731,14 @@ FCOMMON_OPT += -m32
else
FCOMMON_OPT += -m64
endif
ifeq ($(USE_OPENMP), 1)
ifdef USE_OPENMP
FCOMMON_OPT += -xopenmp=parallel
endif
endif
ifeq ($(F_COMPILER), COMPAQ)
CCOMMON_OPT += -DF_INTERFACE_COMPAQ
ifeq ($(USE_OPENMP), 1)
ifdef USE_OPENMP
FCOMMON_OPT += -openmp
endif
endif

View File

@@ -4,7 +4,6 @@ QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
IBLASOBJS_P = $(IBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
@@ -23,18 +22,12 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
endif
ifdef INTEGER_PRECISION
BLASOBJS += $(IBLASOBJS)
BLASOBJS_P += $(IBLASOBJS_P)
endif
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
$(IBLASOBJS) $(IBLASOBJS_P) : override CFLAGS += -DINTEGER -UCOMPLEX
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
@@ -42,7 +35,6 @@ $(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(IBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
libs :: $(BLASOBJS) $(COMMONOBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

View File

@@ -60,7 +60,6 @@ Please read GotoBLAS_01Readme.txt
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar)
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
#### MIPS64:
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.

View File

@@ -32,8 +32,6 @@ ISTANBUL
BOBCAT
BULLDOZER
PILEDRIVER
STEAMROLLER
EXCAVATOR
c)VIA CPU:
SSE_GENERIC
@@ -64,11 +62,6 @@ SPARC
SPARCV7
6.ARM CPU:
CORTEXA15
CORTEXA9
ARMV7
ARMV6
ARMV5
7.ARM 64-bit CPU:
ARMV8

View File

@@ -1,9 +0,0 @@
#!/bin/bash
for f in *.goto *.acml *.mkl *.atlas
do
if [ -f "$f" ]; then
mv $f `echo $f|tr '.' '_'`.exe
fi
done

File diff suppressed because it is too large Load Diff

View File

@@ -1,196 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef ASUM
#ifdef COMPLEX
#ifdef DOUBLE
#define ASUM BLASFUNC(dzasum)
#else
#define ASUM BLASFUNC(scasum)
#endif
#else
#ifdef DOUBLE
#define ASUM BLASFUNC(dasum)
#else
#define ASUM BLASFUNC(sasum)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
FLOAT result;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
result = ASUM (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
#ifdef COMPLEX
fprintf(stderr, " %10.2f MFlops\n", 4. * (double)m / timeg * 1.e-6);
#else
fprintf(stderr, " %10.2f MFlops\n", 2. * (double)m / timeg * 1.e-6);
#endif
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -114,7 +114,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *x, *y;
FLOAT alpha[2] = { 2.0, 2.0 };
@@ -198,4 +198,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -71,14 +71,8 @@ double fabs(double);
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
@@ -105,7 +99,6 @@ int gettimeofday(struct timeval *tv, void *tz){
#endif
static __inline double getmflops(int ratio, int m, double secs){
double mm = (double)m;
@@ -124,7 +117,7 @@ static __inline double getmflops(int ratio, int m, double secs){
}
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
#ifndef COMPLEX
char *trans[] = {"T", "N"};
@@ -280,4 +273,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -1,201 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef COPY
#ifdef COMPLEX
#ifdef DOUBLE
#define COPY BLASFUNC(zcopy)
#else
#define COPY BLASFUNC(ccopy)
#endif
#else
#ifdef DOUBLE
#define COPY BLASFUNC(dcopy)
#else
#define COPY BLASFUNC(scopy)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x, *y;
FLOAT alpha[2] = { 2.0, 2.0 };
blasint m, i;
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
COPY (&m, x, &inc_x, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MBytes\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *x, *y;
FLOAT result;
@@ -192,4 +192,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -139,12 +139,11 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
FLOAT wkopt[4];
char job='V';
char jobr='N';
char *p;
blasint m, i, j, info,lwork;
@@ -203,9 +202,9 @@ int main(int argc, char *argv[]){
lwork = -1;
m=to;
#ifndef COMPLEX
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
#else
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
#endif
lwork = (blasint)wkopt[0];
@@ -227,16 +226,16 @@ int main(int argc, char *argv[]){
lwork = -1;
#ifndef COMPLEX
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
#else
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
#endif
lwork = (blasint)wkopt[0];
#ifndef COMPLEX
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
#else
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
#endif
gettimeofday( &stop, (struct timezone *)0);
@@ -258,4 +257,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -118,15 +118,14 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {0.0, 0.0};
FLOAT beta [] = {1.0, 1.0};
char trans='N';
blasint m, n, i, j;
blasint m, i, j;
int loops = 1;
int has_param_n=0;
int l;
char *p;
@@ -163,61 +162,51 @@ int main(int argc, char *argv[]){
if ( p != NULL )
loops = atoi(p);
if ((p = getenv("OPENBLAS_PARAM_N"))) {
n = atoi(p);
has_param_n=1;
}
#ifdef linux
srandom(getpid());
#endif
for(j = 0; j < m; j++){
for(i = 0; i < to * COMPSIZE; i++){
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
fprintf(stderr, " SIZE Flops Time\n");
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
if ( has_param_n == 1 && n <= m )
n=n;
else
n=m;
fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
gettimeofday( &start, (struct timezone *)0);
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg = time1/loops;
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1);
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
@@ -209,4 +209,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
@@ -266,4 +266,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -35,19 +35,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#undef GER
#ifdef COMPLEX
#ifdef DOUBLE
#define GER BLASFUNC(zgeru)
#else
#define GER BLASFUNC(cgeru)
#endif
#else
#ifdef DOUBLE
#define GER BLASFUNC(dger)
#else
#define GER BLASFUNC(sger)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
@@ -115,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
@@ -221,5 +214,5 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -1,218 +0,0 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
double fabs(double);
#undef GESV
#undef GETRS
#ifndef COMPLEX
#ifdef XDOUBLE
#define GESV BLASFUNC(qgesv)
#elif defined(DOUBLE)
#define GESV BLASFUNC(dgesv)
#else
#define GESV BLASFUNC(sgesv)
#endif
#else
#ifdef XDOUBLE
#define GESV BLASFUNC(xgesv)
#elif defined(DOUBLE)
#define GESV BLASFUNC(zgesv)
#else
#define GESV BLASFUNC(cgesv)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *b;
blasint *ipiv;
blasint m, i, j, info;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops Time\n");
for(m = from; m <= to; m += step){
fprintf(stderr, " %dx%d : ", (int)m, (int)m);
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
b[i + j * m * COMPSIZE] = 0.0;
}
}
for (j = 0; j < m; ++j) {
for (i = 0; i < m * COMPSIZE; ++i) {
b[i] += a[i + j * m * COMPSIZE];
}
}
gettimeofday( &start, (struct timezone *)0);
GESV (&m, &m, a, &m, ipiv, b, &m, &info);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
fprintf(stderr,
"%10.2f MFlops %10.6f s\n",
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -137,7 +137,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a,*work;
FLOAT wkopt[4];
@@ -231,4 +231,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -107,7 +107,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
@@ -189,4 +189,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
@@ -205,4 +205,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -106,7 +106,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
@@ -188,4 +188,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *c;
FLOAT alpha[] = {1.0, 1.0};
@@ -186,4 +186,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -137,7 +137,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *b;
blasint *ipiv;
@@ -270,4 +270,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -52,11 +52,6 @@ C)
awk '/MFlops/ { print $3,int($9) }'|tail --lines=+2
;;
B)
# Copy Benchmark
awk '/MBytes/ { print $1,int($3) }'|tail --lines=+2
;;
*)
awk '/MFlops/ { print $1,int($3) }'|tail --lines=+2

View File

@@ -88,10 +88,6 @@ double fabs(double);
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
@@ -118,7 +114,7 @@ int gettimeofday(struct timeval *tv, void *tz){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
#ifndef COMPLEX
char *trans[] = {"T", "N"};
@@ -282,5 +278,5 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -1,202 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SCAL
#ifdef COMPLEX
#ifdef DOUBLE
#define SCAL BLASFUNC(zscal)
#else
#define SCAL BLASFUNC(cscal)
#endif
#else
#ifdef DOUBLE
#define SCAL BLASFUNC(dscal)
#else
#define SCAL BLASFUNC(sscal)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x, *y;
FLOAT alpha[2] = { 2.0, 2.0 };
blasint m, i;
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
SCAL (&m, alpha, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
#ifdef COMPLEX
fprintf(stderr, " %10.2f MFlops\n", 6. * (double)m / timeg * 1.e-6);
#else
fprintf(stderr, " %10.2f MFlops\n", 1. * (double)m / timeg * 1.e-6);
#endif
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -1,56 +0,0 @@
#!/usr/bin/python
import os
import sys
import time
import numpy
from numpy.random import randn
def run_cgemm(N,l):
A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
B = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
start = time.time();
for i in range(0,l):
ref = numpy.dot(A,B)
end = time.time()
timediff = (end -start)
mflops = ( 8*N*N*N) *l / timediff
mflops *= 1e-6
size = "%dx%d" % (N,N)
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
if __name__ == "__main__":
N=128
NMAX=2048
NINC=128
LOOPS=1
z=0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p);
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range (N,NMAX+NINC,NINC):
run_cgemm(i,LOOPS)

View File

@@ -1,56 +0,0 @@
#!/usr/bin/python
import os
import sys
import time
import numpy
from numpy.random import randn
def run_cgemv(N,l):
A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
B = randn(N).astype('float32') + randn(N).astype('float32') * 1j;
start = time.time();
for i in range(0,l):
ref = numpy.dot(A,B)
end = time.time()
timediff = (end -start)
mflops = ( 8*N*N) *l / timediff
mflops *= 1e-6
size = "%dx%d" % (N,N)
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
if __name__ == "__main__":
N=128
NMAX=2048
NINC=128
LOOPS=1
z=0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p);
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range (N,NMAX+NINC,NINC):
run_cgemv(i,LOOPS)

View File

@@ -1,58 +0,0 @@
#!/usr/bin/python
import os
import sys
import time
import numpy
from numpy.random import randn
from scipy.linalg.blas import daxpy
def run_daxpy(N,l):
x = randn(N).astype('float64')
y = randn(N).astype('float64')
start = time.time();
for i in range(0,l):
y = daxpy(x,y, a=2.0 )
end = time.time()
timediff = (end -start)
mflops = ( 2*N ) *l / timediff
mflops *= 1e-6
size = "%d" % (N)
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
if __name__ == "__main__":
N=128
NMAX=2048
NINC=128
LOOPS=1
z=0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p);
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range (N,NMAX+NINC,NINC):
run_daxpy(i,LOOPS)

View File

@@ -1,56 +0,0 @@
#!/usr/bin/python
import os
import sys
import time
import numpy
from numpy.random import randn
def run_ddot(N,l):
A = randn(N).astype('float64')
B = randn(N).astype('float64')
start = time.time();
for i in range(0,l):
ref = numpy.dot(A,B)
end = time.time()
timediff = (end -start)
mflops = ( 2*N ) *l / timediff
mflops *= 1e-6
size = "%d" % (N)
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
if __name__ == "__main__":
N=128
NMAX=2048
NINC=128
LOOPS=1
z=0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p);
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range (N,NMAX+NINC,NINC):
run_ddot(i,LOOPS)

View File

@@ -1,55 +0,0 @@
#!/usr/bin/python
import os
import sys
import time
import numpy
from numpy.random import randn
def run_deig(N,l):
A = randn(N,N).astype('float64')
start = time.time();
for i in range(0,l):
la,v = numpy.linalg.eig(A)
end = time.time()
timediff = (end -start)
mflops = ( 26.33 *N*N*N) *l / timediff
mflops *= 1e-6
size = "%dx%d" % (N,N)
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
if __name__ == "__main__":
N=128
NMAX=2048
NINC=128
LOOPS=1
z=0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p);
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range (N,NMAX+NINC,NINC):
run_deig(i,LOOPS)

View File

@@ -1,56 +0,0 @@
#!/usr/bin/python
import os
import sys
import time
import numpy
from numpy.random import randn
def run_dgemm(N,l):
A = randn(N,N).astype('float64')
B = randn(N,N).astype('float64')
start = time.time();
for i in range(0,l):
ref = numpy.dot(A,B)
end = time.time()
timediff = (end -start)
mflops = ( 2*N*N*N) *l / timediff
mflops *= 1e-6
size = "%dx%d" % (N,N)
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
if __name__ == "__main__":
N=128
NMAX=2048
NINC=128
LOOPS=1
z=0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p);
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range (N,NMAX+NINC,NINC):
run_dgemm(i,LOOPS)

View File

@@ -1,56 +0,0 @@
#!/usr/bin/python
import os
import sys
import time
import numpy
from numpy.random import randn
def run_dgemv(N,l):
A = randn(N,N).astype('float64')
B = randn(N).astype('float64')
start = time.time();
for i in range(0,l):
ref = numpy.dot(A,B)
end = time.time()
timediff = (end -start)
mflops = ( 2*N*N) *l / timediff
mflops *= 1e-6
size = "%dx%d" % (N,N)
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
if __name__ == "__main__":
N=128
NMAX=2048
NINC=128
LOOPS=1
z=0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p);
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range (N,NMAX+NINC,NINC):
run_dgemv(i,LOOPS)

View File

@@ -1,58 +0,0 @@
#!/usr/bin/python
import os
import sys
import time
import numpy
from numpy.random import randn
from scipy.linalg.lapack import dgesv
def run_dgesv(N,l):
a = randn(N,N).astype('float64')
b = randn(N,N).astype('float64')
start = time.time();
for i in range(0,l):
dgesv(a,b,1,1)
end = time.time()
timediff = (end -start)
mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff
mflops *= 1e-6
size = "%dx%d" % (N,N)
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
if __name__ == "__main__":
N=128
NMAX=2048
NINC=128
LOOPS=1
z=0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p);
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range (N,NMAX+NINC,NINC):
run_dgesv(i,LOOPS)

View File

@@ -1,56 +0,0 @@
#!/usr/bin/python
import os
import sys
import time
import numpy
from numpy.random import randn
def run_dsolve(N,l):
A = randn(N,N).astype('float64')
B = randn(N,N).astype('float64')
start = time.time();
for i in range(0,l):
ref = numpy.linalg.solve(A,B)
end = time.time()
timediff = (end -start)
mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff
mflops *= 1e-6
size = "%dx%d" % (N,N)
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
if __name__ == "__main__":
N=128
NMAX=2048
NINC=128
LOOPS=1
z=0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p);
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range (N,NMAX+NINC,NINC):
run_dsolve(i,LOOPS)

View File

@@ -1,56 +0,0 @@
#!/usr/bin/python
import os
import sys
import time
import numpy
from numpy.random import randn
def run_sdot(N,l):
A = randn(N).astype('float32')
B = randn(N).astype('float32')
start = time.time();
for i in range(0,l):
ref = numpy.dot(A,B)
end = time.time()
timediff = (end -start)
mflops = ( 2*N ) *l / timediff
mflops *= 1e-6
size = "%d" % (N)
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
if __name__ == "__main__":
N=128
NMAX=2048
NINC=128
LOOPS=1
z=0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p);
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range (N,NMAX+NINC,NINC):
run_sdot(i,LOOPS)

View File

@@ -1,56 +0,0 @@
#!/usr/bin/python
import os
import sys
import time
import numpy
from numpy.random import randn
def run_sgemm(N,l):
A = randn(N,N).astype('float32')
B = randn(N,N).astype('float32')
start = time.time();
for i in range(0,l):
ref = numpy.dot(A,B)
end = time.time()
timediff = (end -start)
mflops = ( 2*N*N*N) *l / timediff
mflops *= 1e-6
size = "%dx%d" % (N,N)
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
if __name__ == "__main__":
N=128
NMAX=2048
NINC=128
LOOPS=1
z=0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p);
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range (N,NMAX+NINC,NINC):
run_sgemm(i,LOOPS)

View File

@@ -1,56 +0,0 @@
#!/usr/bin/python
import os
import sys
import time
import numpy
from numpy.random import randn
def run_sgemv(N,l):
A = randn(N,N).astype('float32')
B = randn(N).astype('float32')
start = time.time();
for i in range(0,l):
ref = numpy.dot(A,B)
end = time.time()
timediff = (end -start)
mflops = ( 2*N*N) *l / timediff
mflops *= 1e-6
size = "%dx%d" % (N,N)
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
if __name__ == "__main__":
N=128
NMAX=2048
NINC=128
LOOPS=1
z=0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p);
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range (N,NMAX+NINC,NINC):
run_sgemv(i,LOOPS)

View File

@@ -1,56 +0,0 @@
#!/usr/bin/python
import os
import sys
import time
import numpy
from numpy.random import randn
def run_zgemm(N,l):
A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
B = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
start = time.time();
for i in range(0,l):
ref = numpy.dot(A,B)
end = time.time()
timediff = (end -start)
mflops = ( 8*N*N*N) *l / timediff
mflops *= 1e-6
size = "%dx%d" % (N,N)
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
if __name__ == "__main__":
N=128
NMAX=2048
NINC=128
LOOPS=1
z=0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p);
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range (N,NMAX+NINC,NINC):
run_zgemm(i,LOOPS)

View File

@@ -1,56 +0,0 @@
#!/usr/bin/python
import os
import sys
import time
import numpy
from numpy.random import randn
def run_zgemv(N,l):
A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
B = randn(N).astype('float64') + randn(N).astype('float64') * 1j;
start = time.time();
for i in range(0,l):
ref = numpy.dot(A,B)
end = time.time()
timediff = (end -start)
mflops = ( 8*N*N) *l / timediff
mflops *= 1e-6
size = "%dx%d" % (N,N)
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
if __name__ == "__main__":
N=128
NMAX=2048
NINC=128
LOOPS=1
z=0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p);
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range (N,NMAX+NINC,NINC):
run_zgemv(i,LOOPS)

View File

@@ -1,56 +0,0 @@
#!/usr/bin/octave --silent
nfrom = 128 ;
nto = 2048;
nstep = 128;
loops = 1;
arg_list = argv();
for i = 1:nargin
switch(i)
case 1
nfrom = str2num(arg_list{i});
case 2
nto = str2num(arg_list{i});
case 3
nstep = str2num(arg_list{i});
case 4
loops = str2num(arg_list{i});
endswitch
endfor
p = getenv("OPENBLAS_LOOPS");
if p
loops = str2num(p);
endif
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
printf(" SIZE FLOPS TIME\n");
n = nfrom;
while n <= nto
A = single(rand(n,n)) + single(rand(n,n)) * 1i;
B = single(rand(n,n)) + single(rand(n,n)) * 1i;
start = clock();
l=0;
while l < loops
C = A * B;
l = l + 1;
endwhile
timeg = etime(clock(), start);
mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
st1 = sprintf("%dx%d : ", n,n);
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
n = n + nstep;
endwhile

View File

@@ -1,56 +0,0 @@
#!/usr/bin/octave --silent
nfrom = 128 ;
nto = 2048;
nstep = 128;
loops = 1;
arg_list = argv();
for i = 1:nargin
switch(i)
case 1
nfrom = str2num(arg_list{i});
case 2
nto = str2num(arg_list{i});
case 3
nstep = str2num(arg_list{i});
case 4
loops = str2num(arg_list{i});
endswitch
endfor
p = getenv("OPENBLAS_LOOPS");
if p
loops = str2num(p);
endif
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
printf(" SIZE FLOPS TIME\n");
n = nfrom;
while n <= nto
A = single(rand(n,n)) + single(rand(n,n)) * 1i;
B = single(rand(n,1)) + single(rand(n,1)) * 1i;
start = clock();
l=0;
while l < loops
C = A * B;
l = l + 1;
endwhile
timeg = etime(clock(), start);
mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 );
st1 = sprintf("%dx%d : ", n,n);
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
n = n + nstep;
endwhile

View File

@@ -1,56 +0,0 @@
#!/usr/bin/octave --silent
nfrom = 128 ;
nto = 2048;
nstep = 128;
loops = 1;
arg_list = argv();
for i = 1:nargin
switch(i)
case 1
nfrom = str2num(arg_list{i});
case 2
nto = str2num(arg_list{i});
case 3
nstep = str2num(arg_list{i});
case 4
loops = str2num(arg_list{i});
endswitch
endfor
p = getenv("OPENBLAS_LOOPS");
if p
loops = str2num(p);
endif
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
printf(" SIZE FLOPS TIME\n");
n = nfrom;
while n <= nto
A = double(rand(n,n));
start = clock();
l=0;
while l < loops
[V,lambda] = eig(A);
l = l + 1;
endwhile
timeg = etime(clock(), start);
mflops = ( 26.33 *n*n*n ) *loops / ( timeg * 1.0e6 );
st1 = sprintf("%dx%d : ", n,n);
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg );
n = n + nstep;
endwhile

View File

@@ -1,56 +0,0 @@
#!/usr/bin/octave --silent
nfrom = 128 ;
nto = 2048;
nstep = 128;
loops = 1;
arg_list = argv();
for i = 1:nargin
switch(i)
case 1
nfrom = str2num(arg_list{i});
case 2
nto = str2num(arg_list{i});
case 3
nstep = str2num(arg_list{i});
case 4
loops = str2num(arg_list{i});
endswitch
endfor
p = getenv("OPENBLAS_LOOPS");
if p
loops = str2num(p);
endif
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
printf(" SIZE FLOPS TIME\n");
n = nfrom;
while n <= nto
A = double(rand(n,n));
B = double(rand(n,n));
start = clock();
l=0;
while l < loops
C = A * B;
l = l + 1;
endwhile
timeg = etime(clock(), start);
mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
st1 = sprintf("%dx%d : ", n,n);
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
n = n + nstep;
endwhile

View File

@@ -1,56 +0,0 @@
#!/usr/bin/octave --silent
nfrom = 128 ;
nto = 2048;
nstep = 128;
loops = 1;
arg_list = argv();
for i = 1:nargin
switch(i)
case 1
nfrom = str2num(arg_list{i});
case 2
nto = str2num(arg_list{i});
case 3
nstep = str2num(arg_list{i});
case 4
loops = str2num(arg_list{i});
endswitch
endfor
p = getenv("OPENBLAS_LOOPS");
if p
loops = str2num(p);
endif
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
printf(" SIZE FLOPS TIME\n");
n = nfrom;
while n <= nto
A = double(rand(n,n));
B = double(rand(n,1));
start = clock();
l=0;
while l < loops
C = A * B;
l = l + 1;
endwhile
timeg = etime(clock(), start);
mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 );
st1 = sprintf("%dx%d : ", n,n);
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
n = n + nstep;
endwhile

View File

@@ -1,59 +0,0 @@
#!/usr/bin/octave --silent
nfrom = 128 ;
nto = 2048;
nstep = 128;
loops = 1;
arg_list = argv();
for i = 1:nargin
switch(i)
case 1
nfrom = str2num(arg_list{i});
case 2
nto = str2num(arg_list{i});
case 3
nstep = str2num(arg_list{i});
case 4
loops = str2num(arg_list{i});
endswitch
endfor
p = getenv("OPENBLAS_LOOPS");
if p
loops = str2num(p);
endif
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
printf(" SIZE FLOPS TIME\n");
n = nfrom;
while n <= nto
A = double(rand(n,n));
B = double(rand(n,n));
start = clock();
l=0;
while l < loops
x = linsolve(A,B);
#x = A / B;
l = l + 1;
endwhile
timeg = etime(clock(), start);
#r = norm(A*x - B)/norm(B)
mflops = ( 2.0/3.0 *n*n*n + 2.0*n*n*n ) *loops / ( timeg * 1.0e6 );
st1 = sprintf("%dx%d : ", n,n);
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg );
n = n + nstep;
endwhile

View File

@@ -1,56 +0,0 @@
#!/usr/bin/octave --silent
nfrom = 128 ;
nto = 2048;
nstep = 128;
loops = 1;
arg_list = argv();
for i = 1:nargin
switch(i)
case 1
nfrom = str2num(arg_list{i});
case 2
nto = str2num(arg_list{i});
case 3
nstep = str2num(arg_list{i});
case 4
loops = str2num(arg_list{i});
endswitch
endfor
p = getenv("OPENBLAS_LOOPS");
if p
loops = str2num(p);
endif
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
printf(" SIZE FLOPS TIME\n");
n = nfrom;
while n <= nto
A = single(rand(n,n));
B = single(rand(n,n));
start = clock();
l=0;
while l < loops
C = A * B;
l = l + 1;
endwhile
timeg = etime(clock(), start);
mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
st1 = sprintf("%dx%d : ", n,n);
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
n = n + nstep;
endwhile

View File

@@ -1,56 +0,0 @@
#!/usr/bin/octave --silent
nfrom = 128 ;
nto = 2048;
nstep = 128;
loops = 1;
arg_list = argv();
for i = 1:nargin
switch(i)
case 1
nfrom = str2num(arg_list{i});
case 2
nto = str2num(arg_list{i});
case 3
nstep = str2num(arg_list{i});
case 4
loops = str2num(arg_list{i});
endswitch
endfor
p = getenv("OPENBLAS_LOOPS");
if p
loops = str2num(p);
endif
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
printf(" SIZE FLOPS TIME\n");
n = nfrom;
while n <= nto
A = single(rand(n,n));
B = single(rand(n,1));
start = clock();
l=0;
while l < loops
C = A * B;
l = l + 1;
endwhile
timeg = etime(clock(), start);
mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 );
st1 = sprintf("%dx%d : ", n,n);
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
n = n + nstep;
endwhile

View File

@@ -1,56 +0,0 @@
#!/usr/bin/octave --silent
nfrom = 128 ;
nto = 2048;
nstep = 128;
loops = 1;
arg_list = argv();
for i = 1:nargin
switch(i)
case 1
nfrom = str2num(arg_list{i});
case 2
nto = str2num(arg_list{i});
case 3
nstep = str2num(arg_list{i});
case 4
loops = str2num(arg_list{i});
endswitch
endfor
p = getenv("OPENBLAS_LOOPS");
if p
loops = str2num(p);
endif
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
printf(" SIZE FLOPS TIME\n");
n = nfrom;
while n <= nto
A = double(rand(n,n)) + double(rand(n,n)) * 1i;
B = double(rand(n,n)) + double(rand(n,n)) * 1i;
start = clock();
l=0;
while l < loops
C = A * B;
l = l + 1;
endwhile
timeg = etime(clock(), start);
mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
st1 = sprintf("%dx%d : ", n,n);
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
n = n + nstep;
endwhile

View File

@@ -1,56 +0,0 @@
#!/usr/bin/octave --silent
nfrom = 128 ;
nto = 2048;
nstep = 128;
loops = 1;
arg_list = argv();
for i = 1:nargin
switch(i)
case 1
nfrom = str2num(arg_list{i});
case 2
nto = str2num(arg_list{i});
case 3
nstep = str2num(arg_list{i});
case 4
loops = str2num(arg_list{i});
endswitch
endfor
p = getenv("OPENBLAS_LOOPS");
if p
loops = str2num(p);
endif
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
printf(" SIZE FLOPS TIME\n");
n = nfrom;
while n <= nto
A = double(rand(n,n)) + double(rand(n,n)) * 1i;
B = double(rand(n,1)) + double(rand(n,1)) * 1i;
start = clock();
l=0;
while l < loops
C = A * B;
l = l + 1;
endwhile
timeg = etime(clock(), start);
mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 );
st1 = sprintf("%dx%d : ", n,n);
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
n = n + nstep;
endwhile

View File

@@ -1,62 +0,0 @@
#!/usr/bin/Rscript
argv <- commandArgs(trailingOnly = TRUE)
nfrom = 128
nto = 2048
nstep = 128
loops = 1
if ( length(argv) > 0 ) {
for ( z in 1:length(argv) ) {
if ( z == 1 ) {
nfrom <- as.numeric(argv[z])
} else if ( z==2 ) {
nto <- as.numeric(argv[z])
} else if ( z==3 ) {
nstep <- as.numeric(argv[z])
} else if ( z==4 ) {
loops <- as.numeric(argv[z])
}
}
}
p=Sys.getenv("OPENBLAS_LOOPS")
if ( p != "" ) {
loops <- as.numeric(p)
}
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
cat(sprintf(" SIZE Flops Time\n"))
n = nfrom
while ( n <= nto ) {
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
l = 1
start <- proc.time()[3]
while ( l <= loops ) {
ev <- eigen(A)
l = l + 1
}
end <- proc.time()[3]
timeg = end - start
mflops = (26.66 *n*n*n ) * loops / ( timeg * 1.0e6 )
st = sprintf("%.0fx%.0f :",n , n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
n = n + nstep
}

View File

@@ -1,63 +0,0 @@
#!/usr/bin/Rscript
argv <- commandArgs(trailingOnly = TRUE)
nfrom = 128
nto = 2048
nstep = 128
loops = 1
if ( length(argv) > 0 ) {
for ( z in 1:length(argv) ) {
if ( z == 1 ) {
nfrom <- as.numeric(argv[z])
} else if ( z==2 ) {
nto <- as.numeric(argv[z])
} else if ( z==3 ) {
nstep <- as.numeric(argv[z])
} else if ( z==4 ) {
loops <- as.numeric(argv[z])
}
}
}
p=Sys.getenv("OPENBLAS_LOOPS")
if ( p != "" ) {
loops <- as.numeric(p)
}
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
cat(sprintf(" SIZE Flops Time\n"))
n = nfrom
while ( n <= nto ) {
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
l = 1
start <- proc.time()[3]
while ( l <= loops ) {
C <- A %*% B
l = l + 1
}
end <- proc.time()[3]
timeg = end - start
mflops = ( 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
st = sprintf("%.0fx%.0f :",n , n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
n = n + nstep
}

View File

@@ -1,63 +0,0 @@
#!/usr/bin/Rscript
argv <- commandArgs(trailingOnly = TRUE)
nfrom = 128
nto = 2048
nstep = 128
loops = 1
if ( length(argv) > 0 ) {
for ( z in 1:length(argv) ) {
if ( z == 1 ) {
nfrom <- as.numeric(argv[z])
} else if ( z==2 ) {
nto <- as.numeric(argv[z])
} else if ( z==3 ) {
nstep <- as.numeric(argv[z])
} else if ( z==4 ) {
loops <- as.numeric(argv[z])
}
}
}
p=Sys.getenv("OPENBLAS_LOOPS")
if ( p != "" ) {
loops <- as.numeric(p)
}
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
cat(sprintf(" SIZE Flops Time\n"))
n = nfrom
while ( n <= nto ) {
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
l = 1
start <- proc.time()[3]
while ( l <= loops ) {
solve(A,B)
l = l + 1
}
end <- proc.time()[3]
timeg = end - start
mflops = (2.0/3.0 *n*n*n + 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
st = sprintf("%.0fx%.0f :",n , n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
n = n + nstep
}

View File

@@ -1,201 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above swapright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above swapright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE SWAPRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SWAP
#ifdef COMPLEX
#ifdef DOUBLE
#define SWAP BLASFUNC(zswap)
#else
#define SWAP BLASFUNC(cswap)
#endif
#else
#ifdef DOUBLE
#define SWAP BLASFUNC(dswap)
#else
#define SWAP BLASFUNC(sswap)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x, *y;
FLOAT alpha[2] = { 2.0, 2.0 };
blasint m, i;
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
SWAP (&m, x, &inc_x, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MBytes\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
@@ -200,4 +200,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
@@ -215,4 +215,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
@@ -200,4 +200,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *c;
FLOAT alpha[] = {1.0, 1.0};
@@ -196,4 +196,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *b;
FLOAT alpha[] = {1.0, 1.0};
@@ -199,4 +199,4 @@ int main(int argc, char *argv[]){
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
#endif
int main(int argc, char *argv[]){
int MAIN__(int argc, char *argv[]){
FLOAT *a, *b;
FLOAT alpha[] = {1.0, 1.0};
@@ -130,21 +130,11 @@ int main(int argc, char *argv[]){
char trans='N';
char diag ='U';
int l;
int loops = 1;
double timeg;
if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
p = getenv("OPENBLAS_LOOPS");
if ( p != NULL )
loops = atoi(p);
blasint m, i, j;
int from = 1;
@@ -160,7 +150,7 @@ int main(int argc, char *argv[]){
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c Loops = %d\n", from, to, step,side,uplo,trans,diag,loops);
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
@@ -181,39 +171,32 @@ int main(int argc, char *argv[]){
for(m = from; m <= to; m += step)
{
timeg=0.0;
fprintf(stderr, " %6d : ", (int)m);
fprintf(stderr, " %6d : ", (int)m);
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for (l=0; l<loops; l++)
{
gettimeofday( &start, (struct timezone *)0);
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
gettimeofday( &stop, (struct timezone *)0);
gettimeofday( &start, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
gettimeofday( &start, (struct timezone *)0);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
time1 = timeg/loops;
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -1,196 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#define RETURN_BY_STACK 1
#include "common.h"
#undef DOT
#ifdef DOUBLE
#define DOT BLASFUNC(zdotu)
#else
#define DOT BLASFUNC(cdotu)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x, *y;
FLOAT _Complex result;
blasint m, i;
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
DOT (&result, &m, x, &inc_x, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -1,195 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef DOT
#ifdef DOUBLE
#define DOT BLASFUNC(zdotu)
#else
#define DOT BLASFUNC(cdotu)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x, *y;
FLOAT _Complex result;
blasint m, i;
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
result = DOT (&m, x, &inc_x, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -4,8 +4,6 @@
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
$hostarch = "x86_64" if ($hostarch eq "amd64");
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
$hostarch = "arm64" if ($hostarch eq "aarch64");
$binary = $ENV{"BINARY"};
@@ -57,7 +55,6 @@ $os = osf if ($data =~ /OS_OSF/);
$os = WINNT if ($data =~ /OS_WINNT/);
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/);
$os = Interix if ($data =~ /OS_INTERIX/);
$os = Android if ($data =~ /OS_ANDROID/);
$architecture = x86 if ($data =~ /ARCH_X86/);
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
@@ -84,10 +81,6 @@ if (($architecture eq "mips32") || ($architecture eq "mips64")) {
$defined = 1;
}
if (($architecture eq "arm") || ($architecture eq "arm64")) {
$defined = 1;
}
if ($architecture eq "alpha") {
$defined = 1;
$binary = 64;

16
cblas.h
View File

@@ -13,12 +13,6 @@ extern "C" {
void openblas_set_num_threads(int num_threads);
void goto_set_num_threads(int num_threads);
/*Get the number of threads on runtime.*/
int openblas_get_num_threads(void);
/*Get the number of physical processors (cores).*/
int openblas_get_num_procs(void);
/*Get the build configure on runtime.*/
char* openblas_get_config(void);
@@ -347,16 +341,6 @@ void cblas_cimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum
void cblas_zimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, double* a,
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta,
float *c, OPENBLAS_CONST blasint cldc);
void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta,
double *c, OPENBLAS_CONST blasint cldc);
void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta,
float *c, OPENBLAS_CONST blasint cldc);
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
double *c, OPENBLAS_CONST blasint cldc);
#ifdef __cplusplus
}
#endif /* __cplusplus */

View File

@@ -13,12 +13,6 @@ extern "C" {
void openblas_set_num_threads(int num_threads);
void goto_set_num_threads(int num_threads);
/*Get the number of threads on runtime.*/
int openblas_get_num_threads(void);
/*Get the number of physical processors (cores).*/
int openblas_get_num_procs(void);
/*Get the build configure on runtime.*/
char* openblas_get_config(void);
@@ -333,16 +327,6 @@ void cblas_cimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, bl
blasint clda, blasint cldb);
void cblas_zimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double* calpha, double* a,
blasint clda, blasint cldb);
void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float calpha, float *a, blasint clda, float cbeta,
float *c, blasint cldc);
void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double calpha, double *a, blasint clda, double cbeta,
double *c, blasint cldc);
void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float *calpha, float *a, blasint clda, float *cbeta,
float *c, blasint cldc);
void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double *calpha, double *a, blasint clda, double *cbeta,
double *c, blasint cldc);
#ifdef __cplusplus
}
#endif /* __cplusplus */

View File

@@ -93,10 +93,6 @@ extern "C" {
#include <sched.h>
#endif
#ifdef OS_ANDROID
#define NO_SYSV_IPC
#endif
#ifdef OS_WINDOWS
#ifdef ATOM
#define GOTO_ATOM ATOM
@@ -110,9 +106,7 @@ extern "C" {
#endif
#else
#include <sys/mman.h>
#ifndef NO_SYSV_IPC
#include <sys/shm.h>
#endif
#include <sys/time.h>
#include <unistd.h>
#include <math.h>
@@ -276,11 +270,6 @@ typedef int blasint;
#define SIZE 8
#define BASE_SHIFT 3
#define ZBASE_SHIFT 4
#elif defined(INTEGER) //extend for integer matrix
#define FLOAT int
#define SIZE 4
#define BASE_SHIFT 2
#define ZBASE_SHIFT 3
#else
#define FLOAT float
#define SIZE 4
@@ -338,14 +327,6 @@ typedef int blasint;
#endif
#endif
/*
#ifdef STEAMROLLER
#ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
#endif
#endif
*/
#ifndef YIELDING
#define YIELDING sched_yield()
#endif
@@ -510,8 +491,6 @@ void blas_set_parameter(void);
int blas_get_cpu_number(void);
void *blas_memory_alloc (int);
void blas_memory_free (void *);
void *blas_memory_alloc_nolock (int); //use malloc without blas_lock
void blas_memory_free_nolock (void *);
int get_num_procs (void);

View File

@@ -1,5 +1,5 @@
/*****************************************************************************
Copyright (c) 2011-2015, The OpenBLAS Project
Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -30,21 +30,50 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#ifndef COMMON_ARM
#define COMMON_ARM
#if defined(ARMV5) || defined(ARMV6)
#define MB
#define WMB
#else
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
#endif
#define INLINE inline
#define RETURN_BY_COMPLEX
@@ -59,12 +88,9 @@ static void __inline blas_lock(volatile BLASULONG *address){
while (*address) {YIELDING;};
__asm__ __volatile__(
"1: \n\t"
"ldrex r2, [%1] \n\t"
"mov r2, #0 \n\t"
"strex r3, r2, [%1] \n\t"
"cmp r3, #0 \n\t"
"bne 1b \n\t"
"mov %0 , r3 \n\t"
: "=r"(ret), "=r"(address)
: "1"(address)

View File

@@ -1,5 +1,5 @@
/*****************************************************************************
Copyright (c) 2011-2015, The OpenBLAS Project
Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -30,12 +30,49 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#ifndef COMMON_ARM64
#define COMMON_ARM64
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
#define MB
#define WMB
#define INLINE inline
@@ -44,30 +81,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef ASSEMBLER
static void __inline blas_lock(volatile BLASULONG *address){
/*
int register ret;
int register tmp;
do {
while (*address) {YIELDING;};
__asm__ __volatile__(
"1: \n\t"
"ldaxr %2, [%1] \n\t"
"mov %2, #0 \n\t"
"stlxr %w0, %2, [%1] \n\t"
"cbnz %w0, 1b \n\t"
"mov %0 , #0 \n\t"
: "=r"(ret), "=r"(address), "=r"(tmp)
"ldrex r2, [%1] \n\t"
"mov r2, #0 \n\t"
"strex r3, r2, [%1] \n\t"
"mov %0 , r3 \n\t"
: "=r"(ret), "=r"(address)
: "1"(address)
: "memory", "%w0"
//, "%r2" , "%r3"
: "memory", "r2" , "r3"
);
} while (ret);
*/
}
@@ -133,4 +166,3 @@ REALNAME:
#endif
#endif

View File

@@ -220,7 +220,6 @@
#define COMATCOPY_K_CTC comatcopy_k_ctc
#define COMATCOPY_K_RTC comatcopy_k_rtc
#define CGEADD_K cgeadd_k
#else
@@ -403,7 +402,6 @@
#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc
#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc
#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc
#define CGEADD_K gotoblas -> cgeadd_k
#endif

View File

@@ -149,7 +149,6 @@
#define DOMATCOPY_K_RN domatcopy_k_rn
#define DOMATCOPY_K_CT domatcopy_k_ct
#define DOMATCOPY_K_RT domatcopy_k_rt
#define DGEADD_K dgeadd_k
#else
@@ -268,8 +267,6 @@
#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct
#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt
#define DGEADD_K gotoblas -> dgeadd_k
#endif
#define DGEMM_NN dgemm_nn

View File

@@ -1,9 +0,0 @@
#ifndef COMMON_I_H
#define COMMON_I_H
#ifndef DYNAMIC_ARCH
#define IAXPYU_K iaxpy_k
#else
#error
#endif
#endif

View File

@@ -93,7 +93,6 @@ openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdo
void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
void BLASFUNC(iaxpy) (blasint *, int *, int *, blasint *, int *, blasint *);
void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *);
void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
@@ -755,12 +754,6 @@ void BLASFUNC(dimatcopy) (char *, char *, blasint *, blasint *, double *, do
void BLASFUNC(cimatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *);
void BLASFUNC(zimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *);
void BLASFUNC(sgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*);
void BLASFUNC(dgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*);
void BLASFUNC(cgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*);
void BLASFUNC(zgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*);
#ifdef __cplusplus
}

View File

@@ -60,8 +60,6 @@ int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double,
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int iaxpy_k (BLASLONG, BLASLONG, BLASLONG, int,
int *, BLASLONG, int *, BLASLONG, int *, BLASLONG);
int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double,

View File

@@ -1762,11 +1762,6 @@ int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, dou
int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG);
int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG);
int cgeadd_k(BLASLONG, BLASLONG, float, float, float*, BLASLONG, float, float, float *, BLASLONG);
int zgeadd_k(BLASLONG, BLASLONG, double,double, double*, BLASLONG, double, double, double *, BLASLONG);
#ifdef __CUDACC__
}

View File

@@ -47,10 +47,6 @@
#include "common_z.h"
#include "common_x.h"
#ifdef INTEGER_PRECISION
#include "common_i.h"
#endif
#ifndef COMPLEX
#ifdef XDOUBLE
@@ -638,10 +634,7 @@
#define OMATCOPY_K_RN DOMATCOPY_K_RN
#define OMATCOPY_K_CT DOMATCOPY_K_CT
#define OMATCOPY_K_RT DOMATCOPY_K_RT
#define GEADD_K DGEADD_K
#elif defined(INTEGER)
#define AXPYU_K IAXPYU_K
#else
#define AMAX_K SAMAX_K
@@ -939,7 +932,6 @@
#define OMATCOPY_K_CT SOMATCOPY_K_CT
#define OMATCOPY_K_RT SOMATCOPY_K_RT
#define GEADD_K SGEADD_K
#endif
#else
#ifdef XDOUBLE
@@ -1754,7 +1746,6 @@
#define OMATCOPY_K_RNC ZOMATCOPY_K_RNC
#define OMATCOPY_K_CTC ZOMATCOPY_K_CTC
#define OMATCOPY_K_RTC ZOMATCOPY_K_RTC
#define GEADD_K ZGEADD_K
#else
@@ -2168,8 +2159,6 @@
#define OMATCOPY_K_CTC COMATCOPY_K_CTC
#define OMATCOPY_K_RTC COMATCOPY_K_RTC
#define GEADD_K CGEADD_K
#endif
#endif

View File

@@ -855,10 +855,6 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
int (*zgeadd_k) (BLASLONG, BLASLONG, float, double, double *, BLASLONG, double, double, double *, BLASLONG);
} gotoblas_t;

View File

@@ -153,7 +153,6 @@
#define SOMATCOPY_K_CT somatcopy_k_ct
#define SOMATCOPY_K_RT somatcopy_k_rt
#define SGEADD_K sgeadd_k
#else
@@ -275,7 +274,6 @@
#define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct
#define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt
#define SGEADD_K gotoblas -> sgeadd_k
#endif

View File

@@ -65,7 +65,6 @@ extern int blas_omp_linked;
#define BLAS_XDOUBLE 0x0002U
#define BLAS_REAL 0x0000U
#define BLAS_COMPLEX 0x0004U
#define BLAS_INTEGER 0x0008U
#define BLAS_TRANSA 0x0030U /* 2bit */
#define BLAS_TRANSA_N 0x0000U

View File

@@ -171,7 +171,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
#define MMXSTORE movd
#endif
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#if defined(PILEDRIVER) || defined(BULLDOZER)
//Enable some optimazation for barcelona.
#define BARCELONA_OPTIMIZATION
#endif

View File

@@ -226,7 +226,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
#ifdef ASSEMBLER
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#if defined(PILEDRIVER) || defined(BULLDOZER)
//Enable some optimazation for barcelona.
#define BARCELONA_OPTIMIZATION
#endif

View File

@@ -220,7 +220,6 @@
#define ZOMATCOPY_K_CTC zomatcopy_k_ctc
#define ZOMATCOPY_K_RTC zomatcopy_k_rtc
#define ZGEADD_K zgeadd_k
#else
@@ -404,8 +403,6 @@
#define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc
#define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc
#define ZGEADD_K gotoblas -> zgeadd_k
#endif
#define ZGEMM_NN zgemm_nn

12
cpuid.h
View File

@@ -104,12 +104,10 @@
#define CORE_ATOM 18
#define CORE_NANO 19
#define CORE_SANDYBRIDGE 20
#define CORE_BOBCAT 21
#define CORE_BULLDOZER 22
#define CORE_BOBCAT 21
#define CORE_BULLDOZER 22
#define CORE_PILEDRIVER 23
#define CORE_HASWELL 24
#define CORE_STEAMROLLER 25
#define CORE_EXCAVATOR 26
#define CORE_HASWELL 24
#define HAVE_SSE (1 << 0)
#define HAVE_SSE2 (1 << 1)
@@ -202,8 +200,6 @@ typedef struct {
#define CPUTYPE_BOBCAT 45
#define CPUTYPE_BULLDOZER 46
#define CPUTYPE_PILEDRIVER 47
#define CPUTYPE_HASWELL 48
#define CPUTYPE_STEAMROLLER 49
#define CPUTYPE_EXCAVATOR 50
#define CPUTYPE_HASWELL 48
#endif

View File

@@ -30,27 +30,16 @@
#define CPU_UNKNOWN 0
#define CPU_ARMV6 1
#define CPU_ARMV7 2
#define CPU_CORTEXA9 3
#define CPU_CORTEXA15 4
#define CPU_CORTEXA15 3
static char *cpuname[] = {
"UNKOWN",
"ARMV6",
"ARMV7",
"CORTEXA9",
"CORTEXA15"
};
static char *cpuname_lower[] = {
"unknown",
"armv6",
"armv7",
"cortexa9",
"cortexa15"
};
int get_feature(char *search)
{
@@ -96,29 +85,6 @@ int detect(void)
char buffer[512], *p;
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile))
{
if (!strncmp("CPU part", buffer, 8))
{
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if(p != NULL) {
if (strstr(p, "0xc09")) {
return CPU_CORTEXA9;
}
if (strstr(p, "0xc0f")) {
return CPU_CORTEXA15;
}
}
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile))
@@ -176,7 +142,21 @@ void get_architecture(void)
void get_subarchitecture(void)
{
int d = detect();
printf("%s", cpuname[d]);
switch (d)
{
case CPU_ARMV7:
printf("ARMV7");
break;
case CPU_ARMV6:
printf("ARMV6");
break;
default:
printf("UNKNOWN");
break;
}
}
void get_subdirname(void)
@@ -190,36 +170,6 @@ void get_cpuconfig(void)
int d = detect();
switch (d)
{
case CPU_CORTEXA9:
printf("#define CORTEXA9\n");
printf("#define HAVE_VFP\n");
printf("#define HAVE_VFPV3\n");
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 1048576\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 128\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
break;
case CPU_CORTEXA15:
printf("#define CORTEXA15\n");
printf("#define HAVE_VFP\n");
printf("#define HAVE_VFPV3\n");
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 1048576\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 128\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
break;
case CPU_ARMV7:
printf("#define ARMV7\n");
@@ -256,7 +206,18 @@ void get_libname(void)
{
int d = detect();
printf("%s", cpuname_lower[d]);
switch (d)
{
case CPU_ARMV7:
printf("armv7\n");
break;
case CPU_ARMV6:
printf("armv6\n");
break;
}
}

View File

@@ -1098,16 +1098,6 @@ int get_cpuname(void){
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
case 13:
//Broadwell
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
@@ -1122,36 +1112,11 @@ int get_cpuname(void){
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
case 7:
case 15:
//Broadwell
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
}
break;
case 5:
switch (model) {
case 6:
//Broadwell
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
}
break;
}
break;
case 0x7:
@@ -1197,21 +1162,6 @@ int get_cpuname(void){
return CPUTYPE_PILEDRIVER;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
case 0:
switch(exmodel){
case 3:
if(support_avx())
return CPUTYPE_STEAMROLLER;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
case 6:
if(support_avx())
return CPUTYPE_EXCAVATOR;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
}
break;
}
break;
case 5:
@@ -1340,8 +1290,6 @@ static char *cpuname[] = {
"BULLDOZER",
"PILEDRIVER",
"HASWELL",
"STEAMROLLER",
"EXCAVATOR",
};
static char *lowercpuname[] = {
@@ -1393,8 +1341,6 @@ static char *lowercpuname[] = {
"bulldozer",
"piledriver",
"haswell",
"steamroller",
"excavator",
};
static char *corename[] = {
@@ -1423,8 +1369,6 @@ static char *corename[] = {
"BULLDOZER",
"PILEDRIVER",
"HASWELL",
"STEAMROLLER",
"EXCAVATOR",
};
static char *corename_lower[] = {
@@ -1453,8 +1397,6 @@ static char *corename_lower[] = {
"bulldozer",
"piledriver",
"haswell",
"steamroller",
"excavator",
};
@@ -1573,16 +1515,6 @@ int get_coretype(void){
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
case 13:
//broadwell
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
@@ -1597,36 +1529,11 @@ int get_coretype(void){
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
case 7:
case 15:
//broadwell
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
}
break;
case 5:
switch (model) {
case 6:
//broadwell
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
}
break;
}
break;
@@ -1655,25 +1562,7 @@ int get_coretype(void){
return CORE_PILEDRIVER;
else
return CORE_BARCELONA; //OS don't support AVX.
case 0:
switch(exmodel){
case 3:
if(support_avx())
return CORE_STEAMROLLER;
else
return CORE_BARCELONA; //OS don't support AVX.
case 6:
if(support_avx())
return CORE_EXCAVATOR;
else
return CORE_BARCELONA; //OS don't support AVX.
}
break;
}
}else return CORE_BARCELONA;
}
}

View File

@@ -44,10 +44,6 @@ COMPILER_DEC
COMPILER_GNU
#endif
#if defined(__ANDROID__)
OS_ANDROID
#endif
#if defined(__linux__)
OS_LINUX
#endif

View File

@@ -27,18 +27,12 @@ ctestl2o = c_cblas2.o c_c2chke.o auxiliary.o c_xerbla.o constant.o
ctestl3o = c_cblas3.o c_c3chke.o auxiliary.o c_xerbla.o constant.o
ctestl3o_3m = c_cblas3_3m.o c_c3chke_3m.o auxiliary.o c_xerbla.o constant.o
ztestl1o = c_zblas1.o
ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o constant.o
ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o constant.o
ztestl3o_3m = c_zblas3_3m.o c_z3chke_3m.o auxiliary.o c_xerbla.o constant.o
all :: all1 all2 all3
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
@@ -121,8 +115,8 @@ xccblat2: $(ctestl2o) c_cblat2.o $(TOPDIR)/$(LIBNAME)
xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xccblat3 c_cblat3.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xccblat3_3m: $(ctestl3o_3m) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xccblat3_3m: $(ctestl3o) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
# Double complex
xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME)
@@ -133,8 +127,8 @@ xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xzcblat3 c_zblat3.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xzcblat3_3m: $(ztestl3o_3m) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xzcblat3_3m: $(ztestl3o) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
include $(TOPDIR)/Makefile.tail

View File

@@ -46,7 +46,235 @@ void F77_c3chke(char * rout) {
}
if (strncmp( sf,"cblas_cgemm" ,11)==0) {
if (strncmp( sf,"cblas_cgemm3m" ,13)==0) {
cblas_rout = "cblas_cgemm3" ;
cblas_info = 1;
cblas_cgemm3m( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 1;
cblas_cgemm3m( INVALID, CblasNoTrans, CblasTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 1;
cblas_cgemm3m( INVALID, CblasTrans, CblasNoTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 1;
cblas_cgemm3m( INVALID, CblasTrans, CblasTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 2; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, INVALID, CblasNoTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 2; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, INVALID, CblasTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 3; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, INVALID, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 3; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, INVALID, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 1, B, 2, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
} else if (strncmp( sf,"cblas_cgemm" ,11)==0) {
cblas_rout = "cblas_cgemm" ;

File diff suppressed because it is too large Load Diff

View File

@@ -567,3 +567,81 @@ void F77_ctrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
void F77_cgemm3m(int *order, char *transpa, char *transpb, int *m, int *n,
int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
CBLAS_TEST_COMPLEX *c, int *ldc ) {
CBLAS_TEST_COMPLEX *A, *B, *C;
int i,j,LDA, LDB, LDC;
enum CBLAS_TRANSPOSE transa, transb;
get_transpose_type(transpa, &transa);
get_transpose_type(transpb, &transb);
if (*order == TEST_ROW_MJR) {
if (transa == CblasNoTrans) {
LDA = *k+1;
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
else {
LDA = *m+1;
A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*k; i++ )
for( j=0; j<*m; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
if (transb == CblasNoTrans) {
LDB = *n+1;
B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
else {
LDB = *k+1;
B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
LDC = *n+1;
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
}
cblas_cgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
B, LDB, beta, C, LDC );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
c[j*(*ldc)+i].real=C[i*LDC+j].real;
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
}
free(A);
free(B);
free(C);
}
else if (*order == TEST_COL_MJR)
cblas_cgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc );
else
cblas_cgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc );
}

View File

@@ -1,647 +0,0 @@
/*
* Written by D.P. Manley, Digital Equipment Corporation.
* Prefixed "C_" to BLAS routines and their declarations.
*
* Modified by T. H. Do, 4/15/98, SGI/CRAY Research.
*/
#include <stdlib.h>
#include "common.h"
#include "cblas_test.h"
#define TEST_COL_MJR 0
#define TEST_ROW_MJR 1
#define UNDEFINED -1
void F77_cgemm(int *order, char *transpa, char *transpb, int *m, int *n,
int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
CBLAS_TEST_COMPLEX *c, int *ldc ) {
CBLAS_TEST_COMPLEX *A, *B, *C;
int i,j,LDA, LDB, LDC;
enum CBLAS_TRANSPOSE transa, transb;
get_transpose_type(transpa, &transa);
get_transpose_type(transpb, &transb);
if (*order == TEST_ROW_MJR) {
if (transa == CblasNoTrans) {
LDA = *k+1;
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
else {
LDA = *m+1;
A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*k; i++ )
for( j=0; j<*m; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
if (transb == CblasNoTrans) {
LDB = *n+1;
B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
else {
LDB = *k+1;
B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
LDC = *n+1;
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
}
cblas_cgemm( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
B, LDB, beta, C, LDC );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
c[j*(*ldc)+i].real=C[i*LDC+j].real;
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
}
free(A);
free(B);
free(C);
}
else if (*order == TEST_COL_MJR)
cblas_cgemm( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc );
else
cblas_cgemm( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc );
}
void F77_chemm(int *order, char *rtlf, char *uplow, int *m, int *n,
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
CBLAS_TEST_COMPLEX *c, int *ldc ) {
CBLAS_TEST_COMPLEX *A, *B, *C;
int i,j,LDA, LDB, LDC;
enum CBLAS_UPLO uplo;
enum CBLAS_SIDE side;
get_uplo_type(uplow,&uplo);
get_side_type(rtlf,&side);
if (*order == TEST_ROW_MJR) {
if (side == CblasLeft) {
LDA = *m+1;
A= (CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*m; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
else{
LDA = *n+1;
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
LDB = *n+1;
B=(CBLAS_TEST_COMPLEX* )malloc( (*m)*LDB*sizeof(CBLAS_TEST_COMPLEX ) );
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
LDC = *n+1;
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
}
cblas_chemm( CblasRowMajor, side, uplo, *m, *n, alpha, A, LDA, B, LDB,
beta, C, LDC );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
c[j*(*ldc)+i].real=C[i*LDC+j].real;
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
}
free(A);
free(B);
free(C);
}
else if (*order == TEST_COL_MJR)
cblas_chemm( CblasColMajor, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
beta, c, *ldc );
else
cblas_chemm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
beta, c, *ldc );
}
void F77_csymm(int *order, char *rtlf, char *uplow, int *m, int *n,
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
CBLAS_TEST_COMPLEX *c, int *ldc ) {
CBLAS_TEST_COMPLEX *A, *B, *C;
int i,j,LDA, LDB, LDC;
enum CBLAS_UPLO uplo;
enum CBLAS_SIDE side;
get_uplo_type(uplow,&uplo);
get_side_type(rtlf,&side);
if (*order == TEST_ROW_MJR) {
if (side == CblasLeft) {
LDA = *m+1;
A=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*m; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
else{
LDA = *n+1;
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
LDB = *n+1;
B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX ));
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ )
B[i*LDB+j]=b[j*(*ldb)+i];
LDC = *n+1;
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ )
C[i*LDC+j]=c[j*(*ldc)+i];
cblas_csymm( CblasRowMajor, side, uplo, *m, *n, alpha, A, LDA, B, LDB,
beta, C, LDC );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ )
c[j*(*ldc)+i]=C[i*LDC+j];
free(A);
free(B);
free(C);
}
else if (*order == TEST_COL_MJR)
cblas_csymm( CblasColMajor, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
beta, c, *ldc );
else
cblas_csymm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
beta, c, *ldc );
}
void F77_cherk(int *order, char *uplow, char *transp, int *n, int *k,
float *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
float *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) {
int i,j,LDA,LDC;
CBLAS_TEST_COMPLEX *A, *C;
enum CBLAS_UPLO uplo;
enum CBLAS_TRANSPOSE trans;
get_uplo_type(uplow,&uplo);
get_transpose_type(transp,&trans);
if (*order == TEST_ROW_MJR) {
if (trans == CblasNoTrans) {
LDA = *k+1;
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
else{
LDA = *n+1;
A=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
LDC = *n+1;
C=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
}
cblas_cherk(CblasRowMajor, uplo, trans, *n, *k, *alpha, A, LDA, *beta,
C, LDC );
for( j=0; j<*n; j++ )
for( i=0; i<*n; i++ ) {
c[j*(*ldc)+i].real=C[i*LDC+j].real;
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
}
free(A);
free(C);
}
else if (*order == TEST_COL_MJR)
cblas_cherk(CblasColMajor, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
c, *ldc );
else
cblas_cherk(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
c, *ldc );
}
void F77_csyrk(int *order, char *uplow, char *transp, int *n, int *k,
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) {
int i,j,LDA,LDC;
CBLAS_TEST_COMPLEX *A, *C;
enum CBLAS_UPLO uplo;
enum CBLAS_TRANSPOSE trans;
get_uplo_type(uplow,&uplo);
get_transpose_type(transp,&trans);
if (*order == TEST_ROW_MJR) {
if (trans == CblasNoTrans) {
LDA = *k+1;
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
else{
LDA = *n+1;
A=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
LDC = *n+1;
C=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
}
cblas_csyrk(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA, beta,
C, LDC );
for( j=0; j<*n; j++ )
for( i=0; i<*n; i++ ) {
c[j*(*ldc)+i].real=C[i*LDC+j].real;
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
}
free(A);
free(C);
}
else if (*order == TEST_COL_MJR)
cblas_csyrk(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda, beta,
c, *ldc );
else
cblas_csyrk(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, beta,
c, *ldc );
}
void F77_cher2k(int *order, char *uplow, char *transp, int *n, int *k,
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
CBLAS_TEST_COMPLEX *b, int *ldb, float *beta,
CBLAS_TEST_COMPLEX *c, int *ldc ) {
int i,j,LDA,LDB,LDC;
CBLAS_TEST_COMPLEX *A, *B, *C;
enum CBLAS_UPLO uplo;
enum CBLAS_TRANSPOSE trans;
get_uplo_type(uplow,&uplo);
get_transpose_type(transp,&trans);
if (*order == TEST_ROW_MJR) {
if (trans == CblasNoTrans) {
LDA = *k+1;
LDB = *k+1;
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ));
B=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDB*sizeof(CBLAS_TEST_COMPLEX ));
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
else {
LDA = *n+1;
LDB = *n+1;
A=(CBLAS_TEST_COMPLEX* )malloc( LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX ) );
B=(CBLAS_TEST_COMPLEX* )malloc( LDB*(*k)*sizeof(CBLAS_TEST_COMPLEX ) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ){
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
LDC = *n+1;
C=(CBLAS_TEST_COMPLEX* )malloc( (*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
}
cblas_cher2k(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA,
B, LDB, *beta, C, LDC );
for( j=0; j<*n; j++ )
for( i=0; i<*n; i++ ) {
c[j*(*ldc)+i].real=C[i*LDC+j].real;
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
}
free(A);
free(B);
free(C);
}
else if (*order == TEST_COL_MJR)
cblas_cher2k(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda,
b, *ldb, *beta, c, *ldc );
else
cblas_cher2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda,
b, *ldb, *beta, c, *ldc );
}
void F77_csyr2k(int *order, char *uplow, char *transp, int *n, int *k,
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
CBLAS_TEST_COMPLEX *c, int *ldc ) {
int i,j,LDA,LDB,LDC;
CBLAS_TEST_COMPLEX *A, *B, *C;
enum CBLAS_UPLO uplo;
enum CBLAS_TRANSPOSE trans;
get_uplo_type(uplow,&uplo);
get_transpose_type(transp,&trans);
if (*order == TEST_ROW_MJR) {
if (trans == CblasNoTrans) {
LDA = *k+1;
LDB = *k+1;
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
B=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDB*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
else {
LDA = *n+1;
LDB = *n+1;
A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*k)*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ){
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
LDC = *n+1;
C=(CBLAS_TEST_COMPLEX* )malloc( (*n)*LDC*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
}
cblas_csyr2k(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA,
B, LDB, beta, C, LDC );
for( j=0; j<*n; j++ )
for( i=0; i<*n; i++ ) {
c[j*(*ldc)+i].real=C[i*LDC+j].real;
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
}
free(A);
free(B);
free(C);
}
else if (*order == TEST_COL_MJR)
cblas_csyr2k(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc );
else
cblas_csyr2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc );
}
void F77_ctrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a,
int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) {
int i,j,LDA,LDB;
CBLAS_TEST_COMPLEX *A, *B;
enum CBLAS_SIDE side;
enum CBLAS_DIAG diag;
enum CBLAS_UPLO uplo;
enum CBLAS_TRANSPOSE trans;
get_uplo_type(uplow,&uplo);
get_transpose_type(transp,&trans);
get_diag_type(diagn,&diag);
get_side_type(rtlf,&side);
if (*order == TEST_ROW_MJR) {
if (side == CblasLeft) {
LDA = *m+1;
A=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*m; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
else{
LDA = *n+1;
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
LDB = *n+1;
B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
cblas_ctrmm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha,
A, LDA, B, LDB );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
b[j*(*ldb)+i].real=B[i*LDB+j].real;
b[j*(*ldb)+i].imag=B[i*LDB+j].imag;
}
free(A);
free(B);
}
else if (*order == TEST_COL_MJR)
cblas_ctrmm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha,
a, *lda, b, *ldb);
else
cblas_ctrmm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha,
a, *lda, b, *ldb);
}
void F77_ctrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a,
int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) {
int i,j,LDA,LDB;
CBLAS_TEST_COMPLEX *A, *B;
enum CBLAS_SIDE side;
enum CBLAS_DIAG diag;
enum CBLAS_UPLO uplo;
enum CBLAS_TRANSPOSE trans;
get_uplo_type(uplow,&uplo);
get_transpose_type(transp,&trans);
get_diag_type(diagn,&diag);
get_side_type(rtlf,&side);
if (*order == TEST_ROW_MJR) {
if (side == CblasLeft) {
LDA = *m+1;
A=(CBLAS_TEST_COMPLEX* )malloc( (*m)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
for( i=0; i<*m; i++ )
for( j=0; j<*m; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
else{
LDA = *n+1;
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
LDB = *n+1;
B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
cblas_ctrsm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha,
A, LDA, B, LDB );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
b[j*(*ldb)+i].real=B[i*LDB+j].real;
b[j*(*ldb)+i].imag=B[i*LDB+j].imag;
}
free(A);
free(B);
}
else if (*order == TEST_COL_MJR)
cblas_ctrsm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha,
a, *lda, b, *ldb);
else
cblas_ctrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha,
a, *lda, b, *ldb);
}
void F77_cgemm3m(int *order, char *transpa, char *transpb, int *m, int *n,
int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
CBLAS_TEST_COMPLEX *c, int *ldc ) {
CBLAS_TEST_COMPLEX *A, *B, *C;
int i,j,LDA, LDB, LDC;
enum CBLAS_TRANSPOSE transa, transb;
get_transpose_type(transpa, &transa);
get_transpose_type(transpb, &transb);
if (*order == TEST_ROW_MJR) {
if (transa == CblasNoTrans) {
LDA = *k+1;
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
else {
LDA = *m+1;
A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*k; i++ )
for( j=0; j<*m; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
if (transb == CblasNoTrans) {
LDB = *n+1;
B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
else {
LDB = *k+1;
B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
LDC = *n+1;
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
}
cblas_cgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
B, LDB, beta, C, LDC );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
c[j*(*ldc)+i].real=C[i*LDC+j].real;
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
}
free(A);
free(B);
free(C);
}
else if (*order == TEST_COL_MJR)
cblas_cgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc );
else
cblas_cgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc );
}

View File

@@ -49,7 +49,237 @@ void F77_z3chke(char * rout) {
if (strncmp( sf,"cblas_zgemm" ,11)==0) {
if (strncmp( sf,"cblas_zgemm3m" ,13)==0) {
cblas_rout = "cblas_zgemm3" ;
cblas_info = 1;
cblas_zgemm3m( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 1;
cblas_zgemm3m( INVALID, CblasNoTrans, CblasTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 1;
cblas_zgemm3m( INVALID, CblasTrans, CblasNoTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 1;
cblas_zgemm3m( INVALID, CblasTrans, CblasTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 2; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, INVALID, CblasNoTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 2; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, INVALID, CblasTrans, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 3; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, INVALID, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 3; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, INVALID, 0, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = FALSE;
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 4; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 5; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 6; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 1, B, 2, BETA, C, 2 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 9; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 2, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 2, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 11; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
ALPHA, A, 1, B, 2, BETA, C, 1 );
chkxer();
cblas_info = 14; RowMajorStrg = TRUE;
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 2, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
chkxer();
} else if (strncmp( sf,"cblas_zgemm" ,11)==0) {
cblas_rout = "cblas_zgemm" ;
cblas_info = 1;

File diff suppressed because it is too large Load Diff

View File

@@ -564,3 +564,80 @@ void F77_ztrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
}
void F77_zgemm3m(int *order, char *transpa, char *transpb, int *m, int *n,
int *k, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda,
CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta,
CBLAS_TEST_ZOMPLEX *c, int *ldc ) {
CBLAS_TEST_ZOMPLEX *A, *B, *C;
int i,j,LDA, LDB, LDC;
enum CBLAS_TRANSPOSE transa, transb;
get_transpose_type(transpa, &transa);
get_transpose_type(transpb, &transb);
if (*order == TEST_ROW_MJR) {
if (transa == CblasNoTrans) {
LDA = *k+1;
A=(CBLAS_TEST_ZOMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
else {
LDA = *m+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*k; i++ )
for( j=0; j<*m; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
}
}
if (transb == CblasNoTrans) {
LDB = *n+1;
B=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_ZOMPLEX) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
else {
LDB = *k+1;
B=(CBLAS_TEST_ZOMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
}
LDC = *n+1;
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_ZOMPLEX));
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
}
cblas_zgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
B, LDB, beta, C, LDC );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
c[j*(*ldc)+i].real=C[i*LDC+j].real;
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
}
free(A);
free(B);
free(C);
}
else if (*order == TEST_COL_MJR)
cblas_zgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc );
else
cblas_zgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
b, *ldb, beta, c, *ldc );
}

Some files were not shown because too many files have changed in this diff Show More