Merge branch 'develop'
This commit is contained in:
		
						commit
						53e849f4fc
					
				| 
						 | 
				
			
			@ -15,6 +15,7 @@ lapack-netlib/make.inc
 | 
			
		|||
lapack-netlib/lapacke/include/lapacke_mangling.h
 | 
			
		||||
lapack-netlib/TESTING/testing_results.txt
 | 
			
		||||
*.so
 | 
			
		||||
*.so.*
 | 
			
		||||
*.a
 | 
			
		||||
.svn
 | 
			
		||||
*~
 | 
			
		||||
| 
						 | 
				
			
			@ -65,3 +66,5 @@ test/sblat3
 | 
			
		|||
test/zblat1
 | 
			
		||||
test/zblat2
 | 
			
		||||
test/zblat3
 | 
			
		||||
build
 | 
			
		||||
build.*
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,4 +1,13 @@
 | 
			
		|||
language: c
 | 
			
		||||
 | 
			
		||||
notifications:
 | 
			
		||||
  webhooks:
 | 
			
		||||
    urls:
 | 
			
		||||
      - https://webhooks.gitter.im/e/8a6e4470a0cebd090344
 | 
			
		||||
    on_success: change  # options: [always|never|change] default: always
 | 
			
		||||
    on_failure: always  # options: [always|never|change] default: always
 | 
			
		||||
    on_start: never     # options: [always|never|change] default: always
 | 
			
		||||
 | 
			
		||||
compiler:
 | 
			
		||||
  - gcc
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,190 @@
 | 
			
		|||
##
 | 
			
		||||
## Author: Hank Anderson <hank@statease.com>
 | 
			
		||||
##
 | 
			
		||||
 | 
			
		||||
cmake_minimum_required(VERSION 2.8.4)
 | 
			
		||||
project(OpenBLAS)
 | 
			
		||||
set(OpenBLAS_MAJOR_VERSION 0)
 | 
			
		||||
set(OpenBLAS_MINOR_VERSION 2)
 | 
			
		||||
set(OpenBLAS_PATCH_VERSION 14)
 | 
			
		||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
 | 
			
		||||
 | 
			
		||||
enable_language(ASM)
 | 
			
		||||
enable_language(C)
 | 
			
		||||
 | 
			
		||||
if(MSVC)
 | 
			
		||||
set(OpenBLAS_LIBNAME libopenblas)
 | 
			
		||||
else()
 | 
			
		||||
set(OpenBLAS_LIBNAME openblas)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
#######
 | 
			
		||||
if(MSVC)
 | 
			
		||||
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
 | 
			
		||||
endif()
 | 
			
		||||
option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF)
 | 
			
		||||
option(BUILD_DEBUG "Build Debug Version" OFF)
 | 
			
		||||
#######
 | 
			
		||||
if(BUILD_WITHOUT_LAPACK)
 | 
			
		||||
set(NO_LAPACK 1)
 | 
			
		||||
set(NO_LAPACKE 1)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
if(BUILD_DEBUG)
 | 
			
		||||
set(CMAKE_BUILD_TYPE Debug)
 | 
			
		||||
else()
 | 
			
		||||
set(CMAKE_BUILD_TYPE Release)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
if(BUILD_WITHOUT_CBLAS)
 | 
			
		||||
set(NO_CBLAS 1)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
#######
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
 | 
			
		||||
 | 
			
		||||
include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake")
 | 
			
		||||
include("${CMAKE_SOURCE_DIR}/cmake/system.cmake")
 | 
			
		||||
 | 
			
		||||
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
 | 
			
		||||
 | 
			
		||||
if (NOT DYNAMIC_ARCH)
 | 
			
		||||
  list(APPEND BLASDIRS kernel)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (DEFINED UTEST_CHECK)
 | 
			
		||||
  set(SANITY_CHECK 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (DEFINED SANITY_CHECK)
 | 
			
		||||
  list(APPEND BLASDIRS reference)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
set(SUBDIRS	${BLASDIRS})
 | 
			
		||||
if (NOT NO_LAPACK)
 | 
			
		||||
  list(APPEND SUBDIRS lapack)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
# set which float types we want to build for
 | 
			
		||||
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
 | 
			
		||||
  # if none are defined, build for all
 | 
			
		||||
  set(BUILD_SINGLE true)
 | 
			
		||||
  set(BUILD_DOUBLE true)
 | 
			
		||||
  set(BUILD_COMPLEX true)
 | 
			
		||||
  set(BUILD_COMPLEX16 true)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
set(FLOAT_TYPES "")
 | 
			
		||||
if (BUILD_SINGLE)
 | 
			
		||||
  message(STATUS "Building Single Precision")
 | 
			
		||||
  list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (BUILD_DOUBLE)
 | 
			
		||||
  message(STATUS "Building Double Precision")
 | 
			
		||||
  list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (BUILD_COMPLEX)
 | 
			
		||||
  message(STATUS "Building Complex Precision")
 | 
			
		||||
  list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (BUILD_COMPLEX16)
 | 
			
		||||
  message(STATUS "Building Double Complex Precision")
 | 
			
		||||
  list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench)
 | 
			
		||||
 | 
			
		||||
# all :: libs netlib tests shared
 | 
			
		||||
 | 
			
		||||
# libs :
 | 
			
		||||
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
 | 
			
		||||
  message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${NO_STATIC} AND ${NO_SHARED})
 | 
			
		||||
  message(FATAL_ERROR "Neither static nor shared are enabled.")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
 | 
			
		||||
set(TARGET_OBJS "")
 | 
			
		||||
foreach (SUBDIR ${SUBDIRS})
 | 
			
		||||
  add_subdirectory(${SUBDIR})
 | 
			
		||||
  string(REPLACE "/" "_" subdir_obj ${SUBDIR})
 | 
			
		||||
  list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:${subdir_obj}>")
 | 
			
		||||
endforeach ()
 | 
			
		||||
 | 
			
		||||
# netlib:
 | 
			
		||||
 | 
			
		||||
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
 | 
			
		||||
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
 | 
			
		||||
if (NOT NOFORTRAN AND NOT NO_LAPACK)
 | 
			
		||||
  include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
 | 
			
		||||
if (NOT NO_LAPACKE)
 | 
			
		||||
  include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
 | 
			
		||||
endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
#Only generate .def for dll on MSVC
 | 
			
		||||
if(MSVC)
 | 
			
		||||
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
# add objects to the openblas lib
 | 
			
		||||
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
 | 
			
		||||
 | 
			
		||||
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if(NOT MSVC)
 | 
			
		||||
#only build shared library for MSVC
 | 
			
		||||
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
 | 
			
		||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
 | 
			
		||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
 | 
			
		||||
 | 
			
		||||
if(SMP)
 | 
			
		||||
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
 | 
			
		||||
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
#build test and ctest
 | 
			
		||||
enable_testing()
 | 
			
		||||
add_subdirectory(test)
 | 
			
		||||
if(NOT NO_CBLAS)
 | 
			
		||||
add_subdirectory(ctest)
 | 
			
		||||
endif()
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES 
 | 
			
		||||
  VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}
 | 
			
		||||
  SOVERSION ${OpenBLAS_MAJOR_VERSION}
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# TODO: Why is the config saved here? Is this necessary with CMake?
 | 
			
		||||
#Save the config files for installation
 | 
			
		||||
#	@cp Makefile.conf Makefile.conf_last
 | 
			
		||||
#	@cp config.h config_last.h
 | 
			
		||||
#ifdef QUAD_PRECISION
 | 
			
		||||
#	@echo "#define QUAD_PRECISION">> config_last.h
 | 
			
		||||
#endif
 | 
			
		||||
#ifeq ($(EXPRECISION), 1)
 | 
			
		||||
#	@echo "#define EXPRECISION">> config_last.h
 | 
			
		||||
#endif
 | 
			
		||||
###
 | 
			
		||||
#ifeq ($(DYNAMIC_ARCH), 1)
 | 
			
		||||
#	@$(MAKE) -C kernel commonlibs || exit 1
 | 
			
		||||
#	@for d in $(DYNAMIC_CORE) ; \
 | 
			
		||||
#	do  $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
 | 
			
		||||
#	done
 | 
			
		||||
#	@echo DYNAMIC_ARCH=1 >> Makefile.conf_last
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef USE_THREAD
 | 
			
		||||
#	@echo USE_THREAD=$(USE_THREAD) >>  Makefile.conf_last
 | 
			
		||||
#endif
 | 
			
		||||
#	@touch lib.grd
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -127,5 +127,8 @@ In chronological order:
 | 
			
		|||
* Ton van den Heuvel <https://github.com/ton>
 | 
			
		||||
  * [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().
 | 
			
		||||
 | 
			
		||||
* Martin Koehler <https://github.com/grisuthedragon/>
 | 
			
		||||
  * [2015-09-07] Improved imatcopy
 | 
			
		||||
 | 
			
		||||
* [Your name or handle] <[email or website]>
 | 
			
		||||
  * [Date] [Brief summary of your changes]
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,4 +1,57 @@
 | 
			
		|||
OpenBLAS ChangeLog
 | 
			
		||||
====================================================================
 | 
			
		||||
Version 0.2.15
 | 
			
		||||
27-Oct-2015
 | 
			
		||||
common:
 | 
			
		||||
	* Support cmake on x86/x86-64. Natively compiling on MS Visual Studio.
 | 
			
		||||
	  (experimental. Thank Hank Anderson for the initial cmake porting work.)
 | 
			
		||||
	  
 | 
			
		||||
	  On Linux and Mac OSX, OpenBLAS cmake supports assembly kernels.
 | 
			
		||||
	  e.g. cmake .
 | 
			
		||||
	       make
 | 
			
		||||
	       make test (Optional)
 | 
			
		||||
 | 
			
		||||
	  On Windows MS Visual Studio, OpenBLAS cmake only support C kernels.
 | 
			
		||||
	  (OpenBLAS uses AT&T style assembly, which is not supported by MSVC.)
 | 
			
		||||
	  e.g. cmake -G "Visual Studio 12 Win64" .
 | 
			
		||||
	       Open OpenBLAS.sln and build.
 | 
			
		||||
	  
 | 
			
		||||
	* Enable MAX_STACK_ALLOC flags by default.
 | 
			
		||||
	  Improve ger and gemv for small matrices.
 | 
			
		||||
	* Improve gemv parallel with small m and large n case.
 | 
			
		||||
	* Improve ?imatcopy when lda==ldb (#633. Thanks, Martin Koehler)
 | 
			
		||||
	* Add vecLib benchmarks (#565. Thanks, Andreas Noack.)
 | 
			
		||||
	* Fix LAPACK lantr for row major matrices (#634. Thanks, Dan Kortschak)
 | 
			
		||||
	* Fix LAPACKE lansy (#640. Thanks, Dan Kortschak)
 | 
			
		||||
	* Import bug fixes for LAPACKE s/dormlq, c/zunmlq 
 | 
			
		||||
	* Raise the signal when pthread_create fails (#668. Thanks, James K. Lowden)
 | 
			
		||||
	* Remove g77 from compiler list.
 | 
			
		||||
	* Enable AppVeyor Windows CI.
 | 
			
		||||
 | 
			
		||||
x86/x86-64:
 | 
			
		||||
	* Support pure C generic kernels for x86/x86-64.
 | 
			
		||||
	* Support Intel Boardwell and Skylake by Haswell kernels.
 | 
			
		||||
	* Support AMD Excavator by Steamroller kernels.
 | 
			
		||||
	* Optimize s/d/c/zdot for Intel SandyBridge and Haswell.
 | 
			
		||||
	* Optimize s/d/c/zdot for AMD Piledriver and Steamroller.
 | 
			
		||||
	* Optimize s/d/c/zapxy for Intel SandyBridge and Haswell.
 | 
			
		||||
	* Optimize s/d/c/zapxy for AMD Piledriver and Steamroller.
 | 
			
		||||
	* Optimize d/c/zscal for Intel Haswell, dscal for Intel SandyBridge.
 | 
			
		||||
	* Optimize d/c/zscal for AMD Bulldozer, Piledriver and Steamroller.
 | 
			
		||||
	* Optimize s/dger for Intel SandyBridge.
 | 
			
		||||
	* Optimize s/dsymv for Intel SandyBridge.
 | 
			
		||||
	* Optimize ssymv for Intel Haswell.
 | 
			
		||||
	* Optimize dgemv for Intel Nehalem and Haswell.
 | 
			
		||||
	* Optimize dtrmm for Intel Haswell.
 | 
			
		||||
 | 
			
		||||
ARM:
 | 
			
		||||
	* Support Android NDK armeabi-v7a-hard ABI (-mfloat-abi=hard)
 | 
			
		||||
	  e.g. make HOSTCC=gcc CC=arm-linux-androideabi-gcc NO_LAPACK=1 TARGET=ARMV7
 | 
			
		||||
	* Fix lock, rpcc bugs (#616, #617. Thanks, Grazvydas Ignotas)
 | 
			
		||||
POWER:
 | 
			
		||||
	* Support ppc64le platform (ELF ABI v2. #612. Thanks, Matthew Brandyberry.)
 | 
			
		||||
	* Support POWER7/8 by POWER6 kernels. (#612. Thanks, Fábio Perez.)
 | 
			
		||||
 | 
			
		||||
====================================================================
 | 
			
		||||
Version 0.2.14
 | 
			
		||||
24-Mar-2015
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										6
									
								
								Makefile
								
								
								
								
							
							
						
						
									
										6
									
								
								Makefile
								
								
								
								
							| 
						 | 
				
			
			@ -20,6 +20,8 @@ ifneq ($(NO_LAPACK), 1)
 | 
			
		|||
SUBDIRS	+= lapack
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS))
 | 
			
		||||
 | 
			
		||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
 | 
			
		||||
 | 
			
		||||
.PHONY : all libs netlib test ctest shared install
 | 
			
		||||
| 
						 | 
				
			
			@ -131,7 +133,7 @@ ifeq ($(CORE), UNKOWN)
 | 
			
		|||
	$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
 | 
			
		||||
endif
 | 
			
		||||
ifeq ($(NOFORTRAN), 1)
 | 
			
		||||
	$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.)
 | 
			
		||||
	$(info OpenBLAS: Detecting fortran compiler failed. Cannot compile LAPACK. Only compile BLAS.)
 | 
			
		||||
endif
 | 
			
		||||
ifeq ($(NO_STATIC), 1)
 | 
			
		||||
ifeq ($(NO_SHARED), 1)
 | 
			
		||||
| 
						 | 
				
			
			@ -231,7 +233,7 @@ ifndef NOFORTRAN
 | 
			
		|||
	-@echo "FORTRAN     = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
 | 
			
		||||
	-@echo "OPTS        = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
			
		||||
	-@echo "POPTS       = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
			
		||||
	-@echo "NOOPT       = $(LAPACK_FFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
			
		||||
	-@echo "NOOPT       = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
			
		||||
	-@echo "PNOOPT      = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
			
		||||
	-@echo "LOADOPTS    = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
			
		||||
	-@echo "CC          = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										14
									
								
								Makefile.arm
								
								
								
								
							
							
						
						
									
										14
									
								
								Makefile.arm
								
								
								
								
							| 
						 | 
				
			
			@ -1,13 +1,23 @@
 | 
			
		|||
# ifeq logical or
 | 
			
		||||
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
 | 
			
		||||
ifeq ($(OSNAME), Android)
 | 
			
		||||
CCOMMON_OPT += -marm -mfpu=neon  -mfloat-abi=hard -march=armv7-a
 | 
			
		||||
FCOMMON_OPT += -marm -mfpu=neon  -mfloat-abi=hard -march=armv7-a
 | 
			
		||||
else
 | 
			
		||||
CCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard -march=armv7-a
 | 
			
		||||
FCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard -march=armv7-a
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
ifeq ($(CORE), ARMV7)
 | 
			
		||||
ifeq ($(OSNAME), Android)
 | 
			
		||||
CCOMMON_OPT += -marm -mfpu=neon  -mfloat-abi=hard -march=armv7-a
 | 
			
		||||
FCOMMON_OPT += -marm -mfpu=neon  -mfloat-abi=hard -march=armv7-a
 | 
			
		||||
else
 | 
			
		||||
CCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard -march=armv7-a
 | 
			
		||||
FCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard -march=armv7-a
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
ifeq ($(CORE), ARMV6)
 | 
			
		||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard  -march=armv6
 | 
			
		||||
| 
						 | 
				
			
			@ -16,8 +26,8 @@ endif
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
ifeq ($(CORE), ARMV5)
 | 
			
		||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard  -march=armv6
 | 
			
		||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard  -march=armv6
 | 
			
		||||
CCOMMON_OPT += -marm -march=armv5
 | 
			
		||||
FCOMMON_OPT += -marm -march=armv5
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -11,6 +11,7 @@ OPENBLAS_BINARY_DIR := $(PREFIX)/bin
 | 
			
		|||
OPENBLAS_BUILD_DIR := $(CURDIR)
 | 
			
		||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
 | 
			
		||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
 | 
			
		||||
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
 | 
			
		||||
 | 
			
		||||
.PHONY : install
 | 
			
		||||
.NOTPARALLEL : install
 | 
			
		||||
| 
						 | 
				
			
			@ -86,8 +87,8 @@ ifeq ($(OSNAME), Darwin)
 | 
			
		|||
	ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
 | 
			
		||||
endif
 | 
			
		||||
ifeq ($(OSNAME), WINNT)
 | 
			
		||||
	@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
 | 
			
		||||
	@-cp $(LIBDLLNAME).a $(OPENBLAS_LIBRARY_DIR)
 | 
			
		||||
	@-cp $(LIBDLLNAME) $(DESTDIR)$(OPENBLAS_BINARY_DIR)
 | 
			
		||||
	@-cp $(LIBDLLNAME).a $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
 | 
			
		||||
endif
 | 
			
		||||
ifeq ($(OSNAME), CYGWIN_NT)
 | 
			
		||||
	@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
 | 
			
		||||
| 
						 | 
				
			
			@ -97,6 +98,7 @@ endif
 | 
			
		|||
	@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
 | 
			
		||||
	@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)	
 | 
			
		||||
	@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
 | 
			
		||||
 | 
			
		||||
ifndef NO_SHARED
 | 
			
		||||
#ifeq logical or
 | 
			
		||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
 | 
			
		||||
| 
						 | 
				
			
			@ -112,5 +114,16 @@ else
 | 
			
		|||
#only static
 | 
			
		||||
	@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
 | 
			
		||||
endif
 | 
			
		||||
#Generating OpenBLASConfigVersion.cmake
 | 
			
		||||
	@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
 | 
			
		||||
	@echo "set (PACKAGE_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
			
		||||
	@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
			
		||||
	@echo "  set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
			
		||||
	@echo "else ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
			
		||||
	@echo "  set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
			
		||||
	@echo "  if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
			
		||||
	@echo "    set (PACKAGE_VERSION_EXACT TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
			
		||||
	@echo "  endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
			
		||||
	@echo "endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
			
		||||
	@echo Install OK!
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,7 +3,7 @@
 | 
			
		|||
#
 | 
			
		||||
 | 
			
		||||
# This library's version
 | 
			
		||||
VERSION = 0.2.14
 | 
			
		||||
VERSION = 0.2.15
 | 
			
		||||
 | 
			
		||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
 | 
			
		||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
 | 
			
		||||
| 
						 | 
				
			
			@ -162,13 +162,16 @@ COMMON_PROF = -pg
 | 
			
		|||
# Improve GEMV and GER for small matrices by stack allocation.
 | 
			
		||||
# For details, https://github.com/xianyi/OpenBLAS/pull/482
 | 
			
		||||
#
 | 
			
		||||
# MAX_STACK_ALLOC=2048
 | 
			
		||||
 MAX_STACK_ALLOC=2048
 | 
			
		||||
 | 
			
		||||
# Add a prefix or suffix to all exported symbol names in the shared library.
 | 
			
		||||
# Avoid conflicts with other BLAS libraries, especially when using
 | 
			
		||||
# 64 bit integer interfaces in OpenBLAS.
 | 
			
		||||
# For details, https://github.com/xianyi/OpenBLAS/pull/459
 | 
			
		||||
#
 | 
			
		||||
# The same prefix and suffix are also added to the library name,
 | 
			
		||||
# i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas
 | 
			
		||||
#
 | 
			
		||||
# SYMBOLPREFIX=
 | 
			
		||||
# SYMBOLSUFFIX=
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -23,6 +23,7 @@ CC = gcc
 | 
			
		|||
UNAME_S := $(shell uname -s)
 | 
			
		||||
ifeq ($(UNAME_S),Darwin)
 | 
			
		||||
     CC = clang
 | 
			
		||||
#     EXTRALIB += -Wl,-no_compact_unwind
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -64,6 +65,9 @@ endif
 | 
			
		|||
ifeq ($(TARGET), STEAMROLLER)
 | 
			
		||||
GETARCH_FLAGS := -DFORCE_BARCELONA
 | 
			
		||||
endif
 | 
			
		||||
ifeq ($(TARGET), EXCAVATOR)
 | 
			
		||||
GETARCH_FLAGS := -DFORCE_BARCELONA
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -91,6 +95,9 @@ endif
 | 
			
		|||
ifeq ($(TARGET_CORE), STEAMROLLER)
 | 
			
		||||
GETARCH_FLAGS := -DFORCE_BARCELONA
 | 
			
		||||
endif
 | 
			
		||||
ifeq ($(TARGET_CORE), EXCAVATOR)
 | 
			
		||||
GETARCH_FLAGS := -DFORCE_BARCELONA
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -195,12 +202,18 @@ DLLWRAP = $(CROSS_SUFFIX)dllwrap
 | 
			
		|||
OBJCOPY = $(CROSS_SUFFIX)objcopy
 | 
			
		||||
OBJCONV = $(CROSS_SUFFIX)objconv
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# For detect fortran failed, only build BLAS.
 | 
			
		||||
ifeq ($(NOFORTRAN), 1)
 | 
			
		||||
NO_LAPACK = 1
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
#  OS dependent settings
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
ifeq ($(OSNAME), Darwin)
 | 
			
		||||
export MACOSX_DEPLOYMENT_TARGET=10.2
 | 
			
		||||
export MACOSX_DEPLOYMENT_TARGET=10.6
 | 
			
		||||
MD5SUM = md5 -r
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -323,6 +336,11 @@ ifeq ($(ARCH), x86)
 | 
			
		|||
ifndef BINARY
 | 
			
		||||
NO_BINARY_MODE	= 1
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
ifeq ($(CORE), generic)
 | 
			
		||||
NO_EXPRECISION = 1
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
ifndef NO_EXPRECISION
 | 
			
		||||
ifeq ($(F_COMPILER), GFORTRAN)
 | 
			
		||||
# ifeq logical or. GCC or LSB
 | 
			
		||||
| 
						 | 
				
			
			@ -341,6 +359,11 @@ endif
 | 
			
		|||
endif
 | 
			
		||||
 | 
			
		||||
ifeq ($(ARCH), x86_64)
 | 
			
		||||
 | 
			
		||||
ifeq ($(CORE), generic)
 | 
			
		||||
NO_EXPRECISION = 1
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
ifndef NO_EXPRECISION
 | 
			
		||||
ifeq ($(F_COMPILER), GFORTRAN)
 | 
			
		||||
# ifeq logical or. GCC or LSB
 | 
			
		||||
| 
						 | 
				
			
			@ -408,7 +431,7 @@ endif
 | 
			
		|||
ifeq ($(ARCH), x86_64)
 | 
			
		||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
 | 
			
		||||
ifneq ($(NO_AVX), 1)
 | 
			
		||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER
 | 
			
		||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
 | 
			
		||||
endif
 | 
			
		||||
ifneq ($(NO_AVX2), 1)
 | 
			
		||||
DYNAMIC_CORE += HASWELL
 | 
			
		||||
| 
						 | 
				
			
			@ -578,7 +601,7 @@ else
 | 
			
		|||
FCOMMON_OPT += -m32
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
ifdef USE_OPENMP
 | 
			
		||||
ifeq ($(USE_OPENMP), 1)
 | 
			
		||||
FCOMMON_OPT += -fopenmp
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
| 
						 | 
				
			
			@ -590,14 +613,14 @@ ifneq ($(INTERFACE64), 0)
 | 
			
		|||
FCOMMON_OPT += -i8
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
ifdef USE_OPENMP
 | 
			
		||||
ifeq ($(USE_OPENMP), 1)
 | 
			
		||||
FCOMMON_OPT += -openmp
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
ifeq ($(F_COMPILER), FUJITSU)
 | 
			
		||||
CCOMMON_OPT += -DF_INTERFACE_FUJITSU
 | 
			
		||||
ifdef USE_OPENMP
 | 
			
		||||
ifeq ($(USE_OPENMP), 1)
 | 
			
		||||
FCOMMON_OPT += -openmp
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
| 
						 | 
				
			
			@ -615,7 +638,7 @@ endif
 | 
			
		|||
else
 | 
			
		||||
FCOMMON_OPT += -q32
 | 
			
		||||
endif
 | 
			
		||||
ifdef USE_OPENMP
 | 
			
		||||
ifeq ($(USE_OPENMP), 1)
 | 
			
		||||
FCOMMON_OPT += -openmp
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
| 
						 | 
				
			
			@ -633,7 +656,7 @@ FCOMMON_OPT += -tp p7-64
 | 
			
		|||
else
 | 
			
		||||
FCOMMON_OPT += -tp p7
 | 
			
		||||
endif
 | 
			
		||||
ifdef USE_OPENMP
 | 
			
		||||
ifeq ($(USE_OPENMP), 1)
 | 
			
		||||
FCOMMON_OPT += -mp
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
| 
						 | 
				
			
			@ -662,7 +685,7 @@ FCOMMON_OPT += -mabi=n32
 | 
			
		|||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
ifdef USE_OPENMP
 | 
			
		||||
ifeq ($(USE_OPENMP), 1)
 | 
			
		||||
FCOMMON_OPT += -mp
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
| 
						 | 
				
			
			@ -699,7 +722,7 @@ FCOMMON_OPT += -m64
 | 
			
		|||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
ifdef USE_OPENMP
 | 
			
		||||
ifeq ($(USE_OPENMP), 1)
 | 
			
		||||
FEXTRALIB   += -lstdc++
 | 
			
		||||
FCOMMON_OPT += -mp
 | 
			
		||||
endif
 | 
			
		||||
| 
						 | 
				
			
			@ -747,14 +770,14 @@ FCOMMON_OPT  += -m32
 | 
			
		|||
else
 | 
			
		||||
FCOMMON_OPT  += -m64
 | 
			
		||||
endif
 | 
			
		||||
ifdef USE_OPENMP
 | 
			
		||||
ifeq ($(USE_OPENMP), 1)
 | 
			
		||||
FCOMMON_OPT += -xopenmp=parallel
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
ifeq ($(F_COMPILER), COMPAQ)
 | 
			
		||||
CCOMMON_OPT  += -DF_INTERFACE_COMPAQ
 | 
			
		||||
ifdef USE_OPENMP
 | 
			
		||||
ifeq ($(USE_OPENMP), 1)
 | 
			
		||||
FCOMMON_OPT += -openmp
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
| 
						 | 
				
			
			@ -857,12 +880,6 @@ ifdef USE_SIMPLE_THREADED_LEVEL3
 | 
			
		|||
CCOMMON_OPT	+= -DUSE_SIMPLE_THREADED_LEVEL3
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
ifndef LIBNAMESUFFIX
 | 
			
		||||
LIBPREFIX = libopenblas
 | 
			
		||||
else
 | 
			
		||||
LIBPREFIX = libopenblas_$(LIBNAMESUFFIX)
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
ifndef SYMBOLPREFIX
 | 
			
		||||
SYMBOLPREFIX =
 | 
			
		||||
endif
 | 
			
		||||
| 
						 | 
				
			
			@ -871,6 +888,12 @@ ifndef SYMBOLSUFFIX
 | 
			
		|||
SYMBOLSUFFIX =
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
ifndef LIBNAMESUFFIX
 | 
			
		||||
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
 | 
			
		||||
else
 | 
			
		||||
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
KERNELDIR	= $(TOPDIR)/kernel/$(ARCH)
 | 
			
		||||
 | 
			
		||||
include $(TOPDIR)/Makefile.$(ARCH)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,10 @@
 | 
			
		|||
# OpenBLAS
 | 
			
		||||
 | 
			
		||||
[](https://travis-ci.org/xianyi/OpenBLAS)
 | 
			
		||||
[](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 | 
			
		||||
 | 
			
		||||
Travis CI: [](https://travis-ci.org/xianyi/OpenBLAS)
 | 
			
		||||
 | 
			
		||||
AppVeyor: [](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
 | 
			
		||||
## Introduction
 | 
			
		||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -33,6 +33,7 @@ BOBCAT
 | 
			
		|||
BULLDOZER
 | 
			
		||||
PILEDRIVER
 | 
			
		||||
STEAMROLLER
 | 
			
		||||
EXCAVATOR
 | 
			
		||||
 | 
			
		||||
c)VIA CPU:
 | 
			
		||||
SSE_GENERIC
 | 
			
		||||
| 
						 | 
				
			
			@ -43,6 +44,8 @@ NANO
 | 
			
		|||
POWER4
 | 
			
		||||
POWER5
 | 
			
		||||
POWER6
 | 
			
		||||
POWER7
 | 
			
		||||
POWER8
 | 
			
		||||
PPCG4
 | 
			
		||||
PPC970
 | 
			
		||||
PPC970MP
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,42 @@
 | 
			
		|||
version: 0.2.15.{build}
 | 
			
		||||
 | 
			
		||||
#environment:
 | 
			
		||||
 | 
			
		||||
platform: 
 | 
			
		||||
  - x64
 | 
			
		||||
 | 
			
		||||
configuration: Release
 | 
			
		||||
 | 
			
		||||
clone_folder: c:\projects\OpenBLAS
 | 
			
		||||
 | 
			
		||||
init:
 | 
			
		||||
  - git config --global core.autocrlf input
 | 
			
		||||
 | 
			
		||||
build:
 | 
			
		||||
  project: OpenBLAS.sln
 | 
			
		||||
 | 
			
		||||
clone_depth: 5
 | 
			
		||||
 | 
			
		||||
#branches to build
 | 
			
		||||
branches:
 | 
			
		||||
  only:
 | 
			
		||||
    - master
 | 
			
		||||
    - develop
 | 
			
		||||
    - cmake
 | 
			
		||||
 | 
			
		||||
skip_tags: true
 | 
			
		||||
 | 
			
		||||
matrix:
 | 
			
		||||
  fast_finish: true
 | 
			
		||||
 | 
			
		||||
skip_commits:
 | 
			
		||||
# Add [av skip] to commit messages
 | 
			
		||||
  message: /\[av skip\]/
 | 
			
		||||
 | 
			
		||||
before_build:
 | 
			
		||||
  - echo Running cmake...
 | 
			
		||||
  - cd c:\projects\OpenBLAS
 | 
			
		||||
  - cmake -G "Visual Studio 12 Win64" .
 | 
			
		||||
 | 
			
		||||
test_script:
 | 
			
		||||
  - echo Build OK!
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,9 @@
 | 
			
		|||
#!/bin/bash
 | 
			
		||||
 | 
			
		||||
for f in *.goto *.acml *.mkl *.atlas
 | 
			
		||||
do
 | 
			
		||||
	if [ -f "$f" ]; then
 | 
			
		||||
		mv $f `echo $f|tr '.' '_'`.exe
 | 
			
		||||
	fi
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1012
									
								
								benchmark/Makefile
								
								
								
								
							
							
						
						
									
										1012
									
								
								benchmark/Makefile
								
								
								
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| 
						 | 
				
			
			@ -0,0 +1,196 @@
 | 
			
		|||
/***************************************************************************
 | 
			
		||||
Copyright (c) 2014, The OpenBLAS Project
 | 
			
		||||
All rights reserved.
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
modification, are permitted provided that the following conditions are
 | 
			
		||||
met:
 | 
			
		||||
1. Redistributions of source code must retain the above copyright
 | 
			
		||||
notice, this list of conditions and the following disclaimer.
 | 
			
		||||
2. Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
notice, this list of conditions and the following disclaimer in
 | 
			
		||||
the documentation and/or other materials provided with the
 | 
			
		||||
distribution.
 | 
			
		||||
3. Neither the name of the OpenBLAS project nor the names of
 | 
			
		||||
its contributors may be used to endorse or promote products
 | 
			
		||||
derived from this software without specific prior written permission.
 | 
			
		||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
			
		||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
			
		||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | 
			
		||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
			
		||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
			
		||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | 
			
		||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | 
			
		||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
			
		||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
*****************************************************************************/
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#ifdef __CYGWIN32__
 | 
			
		||||
#include <sys/time.h>
 | 
			
		||||
#endif
 | 
			
		||||
#include "common.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#undef ASUM
 | 
			
		||||
 | 
			
		||||
#ifdef COMPLEX
 | 
			
		||||
#ifdef DOUBLE
 | 
			
		||||
#define ASUM   BLASFUNC(dzasum)
 | 
			
		||||
#else
 | 
			
		||||
#define ASUM   BLASFUNC(scasum)
 | 
			
		||||
#endif
 | 
			
		||||
#else
 | 
			
		||||
#ifdef DOUBLE
 | 
			
		||||
#define ASUM   BLASFUNC(dasum)
 | 
			
		||||
#else
 | 
			
		||||
#define ASUM   BLASFUNC(sasum)
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(__WIN32__) || defined(__WIN64__)
 | 
			
		||||
 | 
			
		||||
#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
			
		||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int gettimeofday(struct timeval *tv, void *tz){
 | 
			
		||||
 | 
			
		||||
  FILETIME ft;
 | 
			
		||||
  unsigned __int64 tmpres = 0;
 | 
			
		||||
  static int tzflag;
 | 
			
		||||
 | 
			
		||||
  if (NULL != tv)
 | 
			
		||||
    {
 | 
			
		||||
      GetSystemTimeAsFileTime(&ft);
 | 
			
		||||
 | 
			
		||||
      tmpres |= ft.dwHighDateTime;
 | 
			
		||||
      tmpres <<= 32;
 | 
			
		||||
      tmpres |= ft.dwLowDateTime;
 | 
			
		||||
 | 
			
		||||
      /*converting file time to unix epoch*/
 | 
			
		||||
      tmpres /= 10;  /*convert into microseconds*/
 | 
			
		||||
      tmpres -= DELTA_EPOCH_IN_MICROSECS;
 | 
			
		||||
      tv->tv_sec = (long)(tmpres / 1000000UL);
 | 
			
		||||
      tv->tv_usec = (long)(tmpres % 1000000UL);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
 | 
			
		||||
 | 
			
		||||
static void *huge_malloc(BLASLONG size){
 | 
			
		||||
  int shmid;
 | 
			
		||||
  void *address;
 | 
			
		||||
 | 
			
		||||
#ifndef SHM_HUGETLB
 | 
			
		||||
#define SHM_HUGETLB 04000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  if ((shmid =shmget(IPC_PRIVATE,
 | 
			
		||||
		     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
 | 
			
		||||
		     SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
 | 
			
		||||
    printf( "Memory allocation failed(shmget).\n");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  address = shmat(shmid, NULL, SHM_RND);
 | 
			
		||||
 | 
			
		||||
  if ((BLASLONG)address == -1){
 | 
			
		||||
    printf( "Memory allocation failed(shmat).\n");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  shmctl(shmid, IPC_RMID, 0);
 | 
			
		||||
 | 
			
		||||
  return address;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define malloc huge_malloc
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[]){
 | 
			
		||||
 | 
			
		||||
  FLOAT *x;
 | 
			
		||||
  FLOAT result;
 | 
			
		||||
  blasint m, i;
 | 
			
		||||
  blasint inc_x=1;
 | 
			
		||||
  int loops = 1;
 | 
			
		||||
  int l;
 | 
			
		||||
  char *p;
 | 
			
		||||
 | 
			
		||||
  int from =   1;
 | 
			
		||||
  int to   = 200;
 | 
			
		||||
  int step =   1;
 | 
			
		||||
 | 
			
		||||
  struct timeval start, stop;
 | 
			
		||||
  double time1,timeg;
 | 
			
		||||
 | 
			
		||||
  argc--;argv++;
 | 
			
		||||
 | 
			
		||||
  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
 | 
			
		||||
  if ((p = getenv("OPENBLAS_LOOPS")))  loops = atoi(p);
 | 
			
		||||
  if ((p = getenv("OPENBLAS_INCX")))   inc_x = atoi(p);
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "From : %3d  To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
 | 
			
		||||
 | 
			
		||||
  if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef linux
 | 
			
		||||
  srandom(getpid());
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "   SIZE       Flops\n");
 | 
			
		||||
 | 
			
		||||
  for(m = from; m <= to; m += step)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
   timeg=0;
 | 
			
		||||
 | 
			
		||||
   fprintf(stderr, " %6d : ", (int)m);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   for (l=0; l<loops; l++)
 | 
			
		||||
   {
 | 
			
		||||
 | 
			
		||||
   	for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
 | 
			
		||||
			x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
   	}
 | 
			
		||||
 | 
			
		||||
    	gettimeofday( &start, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    	result = ASUM (&m, x, &inc_x);
 | 
			
		||||
 | 
			
		||||
    	gettimeofday( &stop, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
			
		||||
 | 
			
		||||
	timeg += time1;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    timeg /= loops;
 | 
			
		||||
 | 
			
		||||
#ifdef COMPLEX
 | 
			
		||||
    fprintf(stderr, " %10.2f MFlops\n", 4. * (double)m / timeg * 1.e-6);
 | 
			
		||||
#else
 | 
			
		||||
    fprintf(stderr, " %10.2f MFlops\n", 2. * (double)m / timeg * 1.e-6);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
 | 
			
		||||
| 
						 | 
				
			
			@ -71,8 +71,14 @@ double fabs(double);
 | 
			
		|||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(__WIN32__) || defined(__WIN64__)
 | 
			
		||||
 | 
			
		||||
#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
			
		||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int gettimeofday(struct timeval *tv, void *tz){
 | 
			
		||||
 | 
			
		||||
  FILETIME ft;
 | 
			
		||||
| 
						 | 
				
			
			@ -99,6 +105,7 @@ int gettimeofday(struct timeval *tv, void *tz){
 | 
			
		|||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static __inline double getmflops(int ratio, int m, double secs){
 | 
			
		||||
 | 
			
		||||
  double mm = (double)m;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,201 @@
 | 
			
		|||
/***************************************************************************
 | 
			
		||||
Copyright (c) 2014, The OpenBLAS Project
 | 
			
		||||
All rights reserved.
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
modification, are permitted provided that the following conditions are
 | 
			
		||||
met:
 | 
			
		||||
1. Redistributions of source code must retain the above copyright
 | 
			
		||||
notice, this list of conditions and the following disclaimer.
 | 
			
		||||
2. Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
notice, this list of conditions and the following disclaimer in
 | 
			
		||||
the documentation and/or other materials provided with the
 | 
			
		||||
distribution.
 | 
			
		||||
3. Neither the name of the OpenBLAS project nor the names of
 | 
			
		||||
its contributors may be used to endorse or promote products
 | 
			
		||||
derived from this software without specific prior written permission.
 | 
			
		||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
			
		||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
			
		||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | 
			
		||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
			
		||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
			
		||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | 
			
		||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | 
			
		||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
			
		||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
*****************************************************************************/
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#ifdef __CYGWIN32__
 | 
			
		||||
#include <sys/time.h>
 | 
			
		||||
#endif
 | 
			
		||||
#include "common.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#undef COPY
 | 
			
		||||
 | 
			
		||||
#ifdef COMPLEX
 | 
			
		||||
#ifdef DOUBLE
 | 
			
		||||
#define COPY   BLASFUNC(zcopy)
 | 
			
		||||
#else
 | 
			
		||||
#define COPY   BLASFUNC(ccopy)
 | 
			
		||||
#endif
 | 
			
		||||
#else
 | 
			
		||||
#ifdef DOUBLE
 | 
			
		||||
#define COPY   BLASFUNC(dcopy)
 | 
			
		||||
#else
 | 
			
		||||
#define COPY   BLASFUNC(scopy)
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(__WIN32__) || defined(__WIN64__)
 | 
			
		||||
 | 
			
		||||
#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
			
		||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int gettimeofday(struct timeval *tv, void *tz){
 | 
			
		||||
 | 
			
		||||
  FILETIME ft;
 | 
			
		||||
  unsigned __int64 tmpres = 0;
 | 
			
		||||
  static int tzflag;
 | 
			
		||||
 | 
			
		||||
  if (NULL != tv)
 | 
			
		||||
    {
 | 
			
		||||
      GetSystemTimeAsFileTime(&ft);
 | 
			
		||||
 | 
			
		||||
      tmpres |= ft.dwHighDateTime;
 | 
			
		||||
      tmpres <<= 32;
 | 
			
		||||
      tmpres |= ft.dwLowDateTime;
 | 
			
		||||
 | 
			
		||||
      /*converting file time to unix epoch*/
 | 
			
		||||
      tmpres /= 10;  /*convert into microseconds*/
 | 
			
		||||
      tmpres -= DELTA_EPOCH_IN_MICROSECS;
 | 
			
		||||
      tv->tv_sec = (long)(tmpres / 1000000UL);
 | 
			
		||||
      tv->tv_usec = (long)(tmpres % 1000000UL);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
 | 
			
		||||
 | 
			
		||||
static void *huge_malloc(BLASLONG size){
 | 
			
		||||
  int shmid;
 | 
			
		||||
  void *address;
 | 
			
		||||
 | 
			
		||||
#ifndef SHM_HUGETLB
 | 
			
		||||
#define SHM_HUGETLB 04000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  if ((shmid =shmget(IPC_PRIVATE,
 | 
			
		||||
		     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
 | 
			
		||||
		     SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
 | 
			
		||||
    printf( "Memory allocation failed(shmget).\n");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  address = shmat(shmid, NULL, SHM_RND);
 | 
			
		||||
 | 
			
		||||
  if ((BLASLONG)address == -1){
 | 
			
		||||
    printf( "Memory allocation failed(shmat).\n");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  shmctl(shmid, IPC_RMID, 0);
 | 
			
		||||
 | 
			
		||||
  return address;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define malloc huge_malloc
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[]){
 | 
			
		||||
 | 
			
		||||
  FLOAT *x, *y;
 | 
			
		||||
  FLOAT alpha[2] = { 2.0, 2.0 };
 | 
			
		||||
  blasint m, i;
 | 
			
		||||
  blasint inc_x=1,inc_y=1;
 | 
			
		||||
  int loops = 1;
 | 
			
		||||
  int l;
 | 
			
		||||
  char *p;
 | 
			
		||||
 | 
			
		||||
  int from =   1;
 | 
			
		||||
  int to   = 200;
 | 
			
		||||
  int step =   1;
 | 
			
		||||
 | 
			
		||||
  struct timeval start, stop;
 | 
			
		||||
  double time1,timeg;
 | 
			
		||||
 | 
			
		||||
  argc--;argv++;
 | 
			
		||||
 | 
			
		||||
  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
 | 
			
		||||
  if ((p = getenv("OPENBLAS_LOOPS")))  loops = atoi(p);
 | 
			
		||||
  if ((p = getenv("OPENBLAS_INCX")))   inc_x = atoi(p);
 | 
			
		||||
  if ((p = getenv("OPENBLAS_INCY")))   inc_y = atoi(p);
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "From : %3d  To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
 | 
			
		||||
 | 
			
		||||
  if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#ifdef linux
 | 
			
		||||
  srandom(getpid());
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "   SIZE       Flops\n");
 | 
			
		||||
 | 
			
		||||
  for(m = from; m <= to; m += step)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
   timeg=0;
 | 
			
		||||
 | 
			
		||||
   fprintf(stderr, " %6d : ", (int)m);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   for (l=0; l<loops; l++)
 | 
			
		||||
   {
 | 
			
		||||
 | 
			
		||||
   	for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
 | 
			
		||||
			x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
   	}
 | 
			
		||||
 | 
			
		||||
   	for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
 | 
			
		||||
			y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
   	}
 | 
			
		||||
    	gettimeofday( &start, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    	COPY (&m, x, &inc_x, y, &inc_y );
 | 
			
		||||
 | 
			
		||||
    	gettimeofday( &stop, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
			
		||||
 | 
			
		||||
	timeg += time1;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    timeg /= loops;
 | 
			
		||||
 | 
			
		||||
    fprintf(stderr,
 | 
			
		||||
	    " %10.2f MBytes\n",
 | 
			
		||||
	    COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
 | 
			
		||||
| 
						 | 
				
			
			@ -144,6 +144,7 @@ int main(int argc, char *argv[]){
 | 
			
		|||
  FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
 | 
			
		||||
  FLOAT wkopt[4];
 | 
			
		||||
  char job='V';
 | 
			
		||||
  char jobr='N';
 | 
			
		||||
  char *p;
 | 
			
		||||
 | 
			
		||||
  blasint m, i, j, info,lwork;
 | 
			
		||||
| 
						 | 
				
			
			@ -202,9 +203,9 @@ int main(int argc, char *argv[]){
 | 
			
		|||
    lwork = -1;
 | 
			
		||||
    m=to;
 | 
			
		||||
#ifndef COMPLEX
 | 
			
		||||
    GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
 | 
			
		||||
    GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
 | 
			
		||||
#else
 | 
			
		||||
    GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
 | 
			
		||||
    GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  lwork = (blasint)wkopt[0];
 | 
			
		||||
| 
						 | 
				
			
			@ -226,16 +227,16 @@ int main(int argc, char *argv[]){
 | 
			
		|||
 | 
			
		||||
    lwork = -1;
 | 
			
		||||
#ifndef COMPLEX
 | 
			
		||||
    GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
 | 
			
		||||
    GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
 | 
			
		||||
#else
 | 
			
		||||
    GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
 | 
			
		||||
    GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    lwork = (blasint)wkopt[0];
 | 
			
		||||
#ifndef COMPLEX
 | 
			
		||||
    GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
 | 
			
		||||
    GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
 | 
			
		||||
#else
 | 
			
		||||
    GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
 | 
			
		||||
    GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    gettimeofday( &stop, (struct timezone *)0);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -122,7 +122,7 @@ int main(int argc, char *argv[]){
 | 
			
		|||
 | 
			
		||||
  FLOAT *a, *b, *c;
 | 
			
		||||
  FLOAT alpha[] = {1.0, 1.0};
 | 
			
		||||
  FLOAT beta [] = {1.0, 1.0};
 | 
			
		||||
  FLOAT beta [] = {0.0, 0.0};
 | 
			
		||||
  char trans='N';
 | 
			
		||||
  blasint m, n, i, j;
 | 
			
		||||
  int loops = 1;
 | 
			
		||||
| 
						 | 
				
			
			@ -168,12 +168,21 @@ int main(int argc, char *argv[]){
 | 
			
		|||
	  has_param_n=1;	  
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef linux
 | 
			
		||||
  srandom(getpid());
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
  fprintf(stderr, "   SIZE       Flops\n");
 | 
			
		||||
	for(j = 0; j < m; j++){
 | 
			
		||||
      		for(i = 0; i < to * COMPSIZE; i++){
 | 
			
		||||
			a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
			b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
			c[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
      		}
 | 
			
		||||
    	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "   SIZE          Flops          Time\n");
 | 
			
		||||
 | 
			
		||||
  for(m = from; m <= to; m += step)
 | 
			
		||||
  {
 | 
			
		||||
| 
						 | 
				
			
			@ -188,34 +197,23 @@ int main(int argc, char *argv[]){
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
    fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
 | 
			
		||||
    gettimeofday( &start, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    for (l=0; l<loops; l++)
 | 
			
		||||
    {
 | 
			
		||||
 | 
			
		||||
    	for(j = 0; j < m; j++){
 | 
			
		||||
      		for(i = 0; i < m * COMPSIZE; i++){
 | 
			
		||||
			a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
			b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
			c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
      		}
 | 
			
		||||
    	}
 | 
			
		||||
 | 
			
		||||
    	gettimeofday( &start, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    	GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
 | 
			
		||||
 | 
			
		||||
    	gettimeofday( &stop, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
			
		||||
 | 
			
		||||
	timeg += time1;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
   gettimeofday( &stop, (struct timezone *)0);
 | 
			
		||||
   time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
			
		||||
 | 
			
		||||
    timeg /= loops;
 | 
			
		||||
    timeg = time1/loops;
 | 
			
		||||
    fprintf(stderr,
 | 
			
		||||
	    " %10.2f MFlops\n",
 | 
			
		||||
	    COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6);
 | 
			
		||||
	    " %10.2f MFlops %10.6f sec\n",
 | 
			
		||||
	    COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -35,12 +35,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		|||
 | 
			
		||||
#undef GER
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef COMPLEX
 | 
			
		||||
#ifdef DOUBLE
 | 
			
		||||
#define GER   BLASFUNC(zgeru)
 | 
			
		||||
#else
 | 
			
		||||
#define GER   BLASFUNC(cgeru)
 | 
			
		||||
#endif
 | 
			
		||||
#else
 | 
			
		||||
#ifdef DOUBLE
 | 
			
		||||
#define GER   BLASFUNC(dger)
 | 
			
		||||
#else
 | 
			
		||||
#define GER   BLASFUNC(sger)
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(__WIN32__) || defined(__WIN64__)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,218 @@
 | 
			
		|||
/*********************************************************************/
 | 
			
		||||
/* Copyright 2009, 2010 The University of Texas at Austin.           */
 | 
			
		||||
/* All rights reserved.                                              */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/* Redistribution and use in source and binary forms, with or        */
 | 
			
		||||
/* without modification, are permitted provided that the following   */
 | 
			
		||||
/* conditions are met:                                               */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/*   1. Redistributions of source code must retain the above         */
 | 
			
		||||
/*      copyright notice, this list of conditions and the following  */
 | 
			
		||||
/*      disclaimer.                                                  */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/*   2. Redistributions in binary form must reproduce the above      */
 | 
			
		||||
/*      copyright notice, this list of conditions and the following  */
 | 
			
		||||
/*      disclaimer in the documentation and/or other materials       */
 | 
			
		||||
/*      provided with the distribution.                              */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
 | 
			
		||||
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
 | 
			
		||||
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
 | 
			
		||||
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
 | 
			
		||||
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
 | 
			
		||||
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
 | 
			
		||||
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
 | 
			
		||||
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
 | 
			
		||||
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
 | 
			
		||||
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
 | 
			
		||||
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
 | 
			
		||||
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
 | 
			
		||||
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
 | 
			
		||||
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/* The views and conclusions contained in the software and           */
 | 
			
		||||
/* documentation are those of the authors and should not be          */
 | 
			
		||||
/* interpreted as representing official policies, either expressed   */
 | 
			
		||||
/* or implied, of The University of Texas at Austin.                 */
 | 
			
		||||
/*********************************************************************/
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#ifdef __CYGWIN32__
 | 
			
		||||
#include <sys/time.h>
 | 
			
		||||
#endif
 | 
			
		||||
#include "common.h"
 | 
			
		||||
 | 
			
		||||
double fabs(double);
 | 
			
		||||
 | 
			
		||||
#undef GESV
 | 
			
		||||
#undef GETRS
 | 
			
		||||
 | 
			
		||||
#ifndef COMPLEX
 | 
			
		||||
#ifdef XDOUBLE
 | 
			
		||||
#define GESV   BLASFUNC(qgesv)
 | 
			
		||||
#elif defined(DOUBLE)
 | 
			
		||||
#define GESV   BLASFUNC(dgesv)
 | 
			
		||||
#else
 | 
			
		||||
#define GESV   BLASFUNC(sgesv)
 | 
			
		||||
#endif
 | 
			
		||||
#else
 | 
			
		||||
#ifdef XDOUBLE
 | 
			
		||||
#define GESV   BLASFUNC(xgesv)
 | 
			
		||||
#elif defined(DOUBLE)
 | 
			
		||||
#define GESV   BLASFUNC(zgesv)
 | 
			
		||||
#else
 | 
			
		||||
#define GESV   BLASFUNC(cgesv)
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(__WIN32__) || defined(__WIN64__)
 | 
			
		||||
 | 
			
		||||
#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
			
		||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int gettimeofday(struct timeval *tv, void *tz){
 | 
			
		||||
 | 
			
		||||
  FILETIME ft;
 | 
			
		||||
  unsigned __int64 tmpres = 0;
 | 
			
		||||
  static int tzflag;
 | 
			
		||||
 | 
			
		||||
  if (NULL != tv)
 | 
			
		||||
    {
 | 
			
		||||
      GetSystemTimeAsFileTime(&ft);
 | 
			
		||||
 | 
			
		||||
      tmpres |= ft.dwHighDateTime;
 | 
			
		||||
      tmpres <<= 32;
 | 
			
		||||
      tmpres |= ft.dwLowDateTime;
 | 
			
		||||
 | 
			
		||||
      /*converting file time to unix epoch*/
 | 
			
		||||
      tmpres /= 10;  /*convert into microseconds*/
 | 
			
		||||
      tmpres -= DELTA_EPOCH_IN_MICROSECS;
 | 
			
		||||
      tv->tv_sec = (long)(tmpres / 1000000UL);
 | 
			
		||||
      tv->tv_usec = (long)(tmpres % 1000000UL);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
 | 
			
		||||
 | 
			
		||||
static void *huge_malloc(BLASLONG size){
 | 
			
		||||
  int shmid;
 | 
			
		||||
  void *address;
 | 
			
		||||
 | 
			
		||||
#ifndef SHM_HUGETLB
 | 
			
		||||
#define SHM_HUGETLB 04000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  if ((shmid =shmget(IPC_PRIVATE,
 | 
			
		||||
		     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
 | 
			
		||||
		     SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
 | 
			
		||||
    printf( "Memory allocation failed(shmget).\n");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  address = shmat(shmid, NULL, SHM_RND);
 | 
			
		||||
 | 
			
		||||
  if ((BLASLONG)address == -1){
 | 
			
		||||
    printf( "Memory allocation failed(shmat).\n");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  shmctl(shmid, IPC_RMID, 0);
 | 
			
		||||
 | 
			
		||||
  return address;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define malloc huge_malloc
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[]){
 | 
			
		||||
 | 
			
		||||
  FLOAT *a, *b;
 | 
			
		||||
  blasint *ipiv;
 | 
			
		||||
 | 
			
		||||
  blasint m, i, j, info;
 | 
			
		||||
 | 
			
		||||
  int from =   1;
 | 
			
		||||
  int to   = 200;
 | 
			
		||||
  int step =   1;
 | 
			
		||||
 | 
			
		||||
  struct timeval start, stop;
 | 
			
		||||
  double time1;
 | 
			
		||||
 | 
			
		||||
  argc--;argv++;
 | 
			
		||||
 | 
			
		||||
  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "From : %3d  To : %3d Step = %3d\n", from, to, step);
 | 
			
		||||
 | 
			
		||||
  if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#ifdef linux
 | 
			
		||||
  srandom(getpid());
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "   SIZE       Flops              Time\n");
 | 
			
		||||
 | 
			
		||||
  for(m = from; m <= to; m += step){
 | 
			
		||||
 | 
			
		||||
    fprintf(stderr, " %dx%d : ", (int)m, (int)m);
 | 
			
		||||
 | 
			
		||||
    for(j = 0; j < m; j++){
 | 
			
		||||
      for(i = 0; i < m * COMPSIZE; i++){
 | 
			
		||||
	a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for(j = 0; j < m; j++){
 | 
			
		||||
      for(i = 0; i < m * COMPSIZE; i++){
 | 
			
		||||
	b[i + j * m * COMPSIZE] = 0.0;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    for (j = 0; j < m; ++j) {
 | 
			
		||||
      for (i = 0; i < m * COMPSIZE; ++i) {
 | 
			
		||||
	b[i] += a[i + j * m * COMPSIZE];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    gettimeofday( &start, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    GESV (&m, &m, a, &m, ipiv, b, &m,  &info);
 | 
			
		||||
 | 
			
		||||
    gettimeofday( &stop, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    fprintf(stderr,
 | 
			
		||||
	    "%10.2f MFlops %10.6f s\n",
 | 
			
		||||
	    COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
 | 
			
		||||
| 
						 | 
				
			
			@ -52,6 +52,11 @@ C)
 | 
			
		|||
	awk '/MFlops/ { print $3,int($9) }'|tail --lines=+2
 | 
			
		||||
	;;
 | 
			
		||||
 | 
			
		||||
B)
 | 
			
		||||
	# Copy Benchmark	
 | 
			
		||||
	awk '/MBytes/ { print $1,int($3) }'|tail --lines=+2
 | 
			
		||||
	;;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
*)
 | 
			
		||||
	awk '/MFlops/ { print $1,int($3) }'|tail --lines=+2
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -88,6 +88,10 @@ double fabs(double);
 | 
			
		|||
 | 
			
		||||
#if defined(__WIN32__) || defined(__WIN64__)
 | 
			
		||||
 | 
			
		||||
#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
			
		||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int gettimeofday(struct timeval *tv, void *tz){
 | 
			
		||||
 | 
			
		||||
  FILETIME ft;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,202 @@
 | 
			
		|||
/***************************************************************************
 | 
			
		||||
Copyright (c) 2014, The OpenBLAS Project
 | 
			
		||||
All rights reserved.
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
modification, are permitted provided that the following conditions are
 | 
			
		||||
met:
 | 
			
		||||
1. Redistributions of source code must retain the above copyright
 | 
			
		||||
notice, this list of conditions and the following disclaimer.
 | 
			
		||||
2. Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
notice, this list of conditions and the following disclaimer in
 | 
			
		||||
the documentation and/or other materials provided with the
 | 
			
		||||
distribution.
 | 
			
		||||
3. Neither the name of the OpenBLAS project nor the names of
 | 
			
		||||
its contributors may be used to endorse or promote products
 | 
			
		||||
derived from this software without specific prior written permission.
 | 
			
		||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
			
		||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
			
		||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | 
			
		||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
			
		||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
			
		||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | 
			
		||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | 
			
		||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
			
		||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
*****************************************************************************/
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#ifdef __CYGWIN32__
 | 
			
		||||
#include <sys/time.h>
 | 
			
		||||
#endif
 | 
			
		||||
#include "common.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#undef SCAL
 | 
			
		||||
 | 
			
		||||
#ifdef COMPLEX
 | 
			
		||||
#ifdef DOUBLE
 | 
			
		||||
#define SCAL   BLASFUNC(zscal)
 | 
			
		||||
#else
 | 
			
		||||
#define SCAL   BLASFUNC(cscal)
 | 
			
		||||
#endif
 | 
			
		||||
#else
 | 
			
		||||
#ifdef DOUBLE
 | 
			
		||||
#define SCAL   BLASFUNC(dscal)
 | 
			
		||||
#else
 | 
			
		||||
#define SCAL   BLASFUNC(sscal)
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(__WIN32__) || defined(__WIN64__)
 | 
			
		||||
 | 
			
		||||
#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
			
		||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int gettimeofday(struct timeval *tv, void *tz){
 | 
			
		||||
 | 
			
		||||
  FILETIME ft;
 | 
			
		||||
  unsigned __int64 tmpres = 0;
 | 
			
		||||
  static int tzflag;
 | 
			
		||||
 | 
			
		||||
  if (NULL != tv)
 | 
			
		||||
    {
 | 
			
		||||
      GetSystemTimeAsFileTime(&ft);
 | 
			
		||||
 | 
			
		||||
      tmpres |= ft.dwHighDateTime;
 | 
			
		||||
      tmpres <<= 32;
 | 
			
		||||
      tmpres |= ft.dwLowDateTime;
 | 
			
		||||
 | 
			
		||||
      /*converting file time to unix epoch*/
 | 
			
		||||
      tmpres /= 10;  /*convert into microseconds*/
 | 
			
		||||
      tmpres -= DELTA_EPOCH_IN_MICROSECS;
 | 
			
		||||
      tv->tv_sec = (long)(tmpres / 1000000UL);
 | 
			
		||||
      tv->tv_usec = (long)(tmpres % 1000000UL);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
 | 
			
		||||
 | 
			
		||||
static void *huge_malloc(BLASLONG size){
 | 
			
		||||
  int shmid;
 | 
			
		||||
  void *address;
 | 
			
		||||
 | 
			
		||||
#ifndef SHM_HUGETLB
 | 
			
		||||
#define SHM_HUGETLB 04000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  if ((shmid =shmget(IPC_PRIVATE,
 | 
			
		||||
		     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
 | 
			
		||||
		     SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
 | 
			
		||||
    printf( "Memory allocation failed(shmget).\n");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  address = shmat(shmid, NULL, SHM_RND);
 | 
			
		||||
 | 
			
		||||
  if ((BLASLONG)address == -1){
 | 
			
		||||
    printf( "Memory allocation failed(shmat).\n");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  shmctl(shmid, IPC_RMID, 0);
 | 
			
		||||
 | 
			
		||||
  return address;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define malloc huge_malloc
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[]){
 | 
			
		||||
 | 
			
		||||
  FLOAT *x, *y;
 | 
			
		||||
  FLOAT alpha[2] = { 2.0, 2.0 };
 | 
			
		||||
  blasint m, i;
 | 
			
		||||
  blasint inc_x=1,inc_y=1;
 | 
			
		||||
  int loops = 1;
 | 
			
		||||
  int l;
 | 
			
		||||
  char *p;
 | 
			
		||||
 | 
			
		||||
  int from =   1;
 | 
			
		||||
  int to   = 200;
 | 
			
		||||
  int step =   1;
 | 
			
		||||
 | 
			
		||||
  struct timeval start, stop;
 | 
			
		||||
  double time1,timeg;
 | 
			
		||||
 | 
			
		||||
  argc--;argv++;
 | 
			
		||||
 | 
			
		||||
  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
 | 
			
		||||
  if ((p = getenv("OPENBLAS_LOOPS")))  loops = atoi(p);
 | 
			
		||||
  if ((p = getenv("OPENBLAS_INCX")))   inc_x = atoi(p);
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "From : %3d  To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
 | 
			
		||||
 | 
			
		||||
  if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#ifdef linux
 | 
			
		||||
  srandom(getpid());
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "   SIZE       Flops\n");
 | 
			
		||||
 | 
			
		||||
  for(m = from; m <= to; m += step)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
   timeg=0;
 | 
			
		||||
 | 
			
		||||
   fprintf(stderr, " %6d : ", (int)m);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   for (l=0; l<loops; l++)
 | 
			
		||||
   {
 | 
			
		||||
 | 
			
		||||
   	for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
 | 
			
		||||
			x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
   	}
 | 
			
		||||
 | 
			
		||||
   	for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
 | 
			
		||||
			y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
   	}
 | 
			
		||||
    	gettimeofday( &start, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    	SCAL (&m, alpha, x, &inc_x);
 | 
			
		||||
 | 
			
		||||
    	gettimeofday( &stop, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
			
		||||
 | 
			
		||||
	timeg += time1;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    timeg /= loops;
 | 
			
		||||
 | 
			
		||||
#ifdef COMPLEX
 | 
			
		||||
    fprintf(stderr, " %10.2f MFlops\n", 6. * (double)m / timeg * 1.e-6);
 | 
			
		||||
#else
 | 
			
		||||
    fprintf(stderr, " %10.2f MFlops\n", 1. * (double)m / timeg * 1.e-6);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import numpy
 | 
			
		||||
from numpy.random import randn
 | 
			
		||||
 | 
			
		||||
def run_cgemm(N,l):
 | 
			
		||||
 | 
			
		||||
	A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
 | 
			
		||||
	B = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
 | 
			
		||||
 | 
			
		||||
	start = time.time();
 | 
			
		||||
	for i in range(0,l):
 | 
			
		||||
		ref = numpy.dot(A,B)
 | 
			
		||||
	end = time.time()
 | 
			
		||||
	
 | 
			
		||||
	timediff = (end -start) 
 | 
			
		||||
	mflops = ( 8*N*N*N) *l / timediff
 | 
			
		||||
	mflops *= 1e-6
 | 
			
		||||
 | 
			
		||||
	size = "%dx%d" % (N,N)
 | 
			
		||||
	print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	N=128
 | 
			
		||||
	NMAX=2048
 | 
			
		||||
	NINC=128
 | 
			
		||||
	LOOPS=1
 | 
			
		||||
 | 
			
		||||
	z=0
 | 
			
		||||
	for arg in sys.argv:
 | 
			
		||||
		if z == 1:
 | 
			
		||||
			N = int(arg)
 | 
			
		||||
		elif z == 2:
 | 
			
		||||
			NMAX = int(arg)
 | 
			
		||||
		elif z == 3:
 | 
			
		||||
			NINC = int(arg)
 | 
			
		||||
		elif z == 4:
 | 
			
		||||
			LOOPS = int(arg)
 | 
			
		||||
 | 
			
		||||
		z = z + 1
 | 
			
		||||
 | 
			
		||||
	if 'OPENBLAS_LOOPS' in os.environ:
 | 
			
		||||
		p = os.environ['OPENBLAS_LOOPS']
 | 
			
		||||
		if p:
 | 
			
		||||
			LOOPS = int(p);
 | 
			
		||||
 | 
			
		||||
	print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
			
		||||
	print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
			
		||||
 | 
			
		||||
	for i in range (N,NMAX+NINC,NINC):
 | 
			
		||||
		run_cgemm(i,LOOPS)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import numpy
 | 
			
		||||
from numpy.random import randn
 | 
			
		||||
 | 
			
		||||
def run_cgemv(N,l):
 | 
			
		||||
 | 
			
		||||
	A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
 | 
			
		||||
	B = randn(N).astype('float32') + randn(N).astype('float32') * 1j;
 | 
			
		||||
 | 
			
		||||
	start = time.time();
 | 
			
		||||
	for i in range(0,l):
 | 
			
		||||
		ref = numpy.dot(A,B)
 | 
			
		||||
	end = time.time()
 | 
			
		||||
	
 | 
			
		||||
	timediff = (end -start) 
 | 
			
		||||
	mflops = ( 8*N*N) *l / timediff
 | 
			
		||||
	mflops *= 1e-6
 | 
			
		||||
 | 
			
		||||
	size = "%dx%d" % (N,N)
 | 
			
		||||
	print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	N=128
 | 
			
		||||
	NMAX=2048
 | 
			
		||||
	NINC=128
 | 
			
		||||
	LOOPS=1
 | 
			
		||||
 | 
			
		||||
	z=0
 | 
			
		||||
	for arg in sys.argv:
 | 
			
		||||
		if z == 1:
 | 
			
		||||
			N = int(arg)
 | 
			
		||||
		elif z == 2:
 | 
			
		||||
			NMAX = int(arg)
 | 
			
		||||
		elif z == 3:
 | 
			
		||||
			NINC = int(arg)
 | 
			
		||||
		elif z == 4:
 | 
			
		||||
			LOOPS = int(arg)
 | 
			
		||||
 | 
			
		||||
		z = z + 1
 | 
			
		||||
 | 
			
		||||
	if 'OPENBLAS_LOOPS' in os.environ:
 | 
			
		||||
		p = os.environ['OPENBLAS_LOOPS']
 | 
			
		||||
		if p:
 | 
			
		||||
			LOOPS = int(p);
 | 
			
		||||
 | 
			
		||||
	print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
			
		||||
	print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
			
		||||
 | 
			
		||||
	for i in range (N,NMAX+NINC,NINC):
 | 
			
		||||
		run_cgemv(i,LOOPS)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,58 @@
 | 
			
		|||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import numpy
 | 
			
		||||
from numpy.random import randn
 | 
			
		||||
from scipy.linalg.blas import daxpy
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def run_daxpy(N,l):
 | 
			
		||||
 | 
			
		||||
	x = randn(N).astype('float64')
 | 
			
		||||
	y = randn(N).astype('float64')
 | 
			
		||||
 | 
			
		||||
	start = time.time();
 | 
			
		||||
	for i in range(0,l):
 | 
			
		||||
		y = daxpy(x,y, a=2.0 )
 | 
			
		||||
	end = time.time()
 | 
			
		||||
	
 | 
			
		||||
	timediff = (end -start) 
 | 
			
		||||
	mflops = ( 2*N ) *l / timediff
 | 
			
		||||
	mflops *= 1e-6
 | 
			
		||||
 | 
			
		||||
	size = "%d" % (N)
 | 
			
		||||
	print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	N=128
 | 
			
		||||
	NMAX=2048
 | 
			
		||||
	NINC=128
 | 
			
		||||
	LOOPS=1
 | 
			
		||||
 | 
			
		||||
	z=0
 | 
			
		||||
	for arg in sys.argv:
 | 
			
		||||
		if z == 1:
 | 
			
		||||
			N = int(arg)
 | 
			
		||||
		elif z == 2:
 | 
			
		||||
			NMAX = int(arg)
 | 
			
		||||
		elif z == 3:
 | 
			
		||||
			NINC = int(arg)
 | 
			
		||||
		elif z == 4:
 | 
			
		||||
			LOOPS = int(arg)
 | 
			
		||||
 | 
			
		||||
		z = z + 1
 | 
			
		||||
 | 
			
		||||
	if 'OPENBLAS_LOOPS' in os.environ:
 | 
			
		||||
		p = os.environ['OPENBLAS_LOOPS']
 | 
			
		||||
		if p:
 | 
			
		||||
			LOOPS = int(p);
 | 
			
		||||
 | 
			
		||||
	print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
			
		||||
	print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
			
		||||
 | 
			
		||||
	for i in range (N,NMAX+NINC,NINC):
 | 
			
		||||
		run_daxpy(i,LOOPS)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import numpy
 | 
			
		||||
from numpy.random import randn
 | 
			
		||||
 | 
			
		||||
def run_ddot(N,l):
 | 
			
		||||
 | 
			
		||||
	A = randn(N).astype('float64')
 | 
			
		||||
	B = randn(N).astype('float64')
 | 
			
		||||
 | 
			
		||||
	start = time.time();
 | 
			
		||||
	for i in range(0,l):
 | 
			
		||||
		ref = numpy.dot(A,B)
 | 
			
		||||
	end = time.time()
 | 
			
		||||
	
 | 
			
		||||
	timediff = (end -start) 
 | 
			
		||||
	mflops = ( 2*N ) *l / timediff
 | 
			
		||||
	mflops *= 1e-6
 | 
			
		||||
 | 
			
		||||
	size = "%d" % (N)
 | 
			
		||||
	print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	N=128
 | 
			
		||||
	NMAX=2048
 | 
			
		||||
	NINC=128
 | 
			
		||||
	LOOPS=1
 | 
			
		||||
 | 
			
		||||
	z=0
 | 
			
		||||
	for arg in sys.argv:
 | 
			
		||||
		if z == 1:
 | 
			
		||||
			N = int(arg)
 | 
			
		||||
		elif z == 2:
 | 
			
		||||
			NMAX = int(arg)
 | 
			
		||||
		elif z == 3:
 | 
			
		||||
			NINC = int(arg)
 | 
			
		||||
		elif z == 4:
 | 
			
		||||
			LOOPS = int(arg)
 | 
			
		||||
 | 
			
		||||
		z = z + 1
 | 
			
		||||
 | 
			
		||||
	if 'OPENBLAS_LOOPS' in os.environ:
 | 
			
		||||
		p = os.environ['OPENBLAS_LOOPS']
 | 
			
		||||
		if p:
 | 
			
		||||
			LOOPS = int(p);
 | 
			
		||||
 | 
			
		||||
	print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
			
		||||
	print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
			
		||||
 | 
			
		||||
	for i in range (N,NMAX+NINC,NINC):
 | 
			
		||||
		run_ddot(i,LOOPS)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,55 @@
 | 
			
		|||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import numpy
 | 
			
		||||
from numpy.random import randn
 | 
			
		||||
 | 
			
		||||
def run_deig(N,l):
 | 
			
		||||
 | 
			
		||||
	A = randn(N,N).astype('float64')
 | 
			
		||||
 | 
			
		||||
	start = time.time();
 | 
			
		||||
	for i in range(0,l):
 | 
			
		||||
		la,v = numpy.linalg.eig(A)
 | 
			
		||||
	end = time.time()
 | 
			
		||||
	
 | 
			
		||||
	timediff = (end -start) 
 | 
			
		||||
	mflops = ( 26.33 *N*N*N) *l / timediff
 | 
			
		||||
	mflops *= 1e-6
 | 
			
		||||
 | 
			
		||||
	size = "%dx%d" % (N,N)
 | 
			
		||||
	print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	N=128
 | 
			
		||||
	NMAX=2048
 | 
			
		||||
	NINC=128
 | 
			
		||||
	LOOPS=1
 | 
			
		||||
 | 
			
		||||
	z=0
 | 
			
		||||
	for arg in sys.argv:
 | 
			
		||||
		if z == 1:
 | 
			
		||||
			N = int(arg)
 | 
			
		||||
		elif z == 2:
 | 
			
		||||
			NMAX = int(arg)
 | 
			
		||||
		elif z == 3:
 | 
			
		||||
			NINC = int(arg)
 | 
			
		||||
		elif z == 4:
 | 
			
		||||
			LOOPS = int(arg)
 | 
			
		||||
 | 
			
		||||
		z = z + 1
 | 
			
		||||
 | 
			
		||||
	if 'OPENBLAS_LOOPS' in os.environ:
 | 
			
		||||
		p = os.environ['OPENBLAS_LOOPS']
 | 
			
		||||
		if p:
 | 
			
		||||
			LOOPS = int(p);
 | 
			
		||||
 | 
			
		||||
	print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
			
		||||
	print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
			
		||||
 | 
			
		||||
	for i in range (N,NMAX+NINC,NINC):
 | 
			
		||||
		run_deig(i,LOOPS)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import numpy
 | 
			
		||||
from numpy.random import randn
 | 
			
		||||
 | 
			
		||||
def run_dgemm(N,l):
 | 
			
		||||
 | 
			
		||||
	A = randn(N,N).astype('float64')
 | 
			
		||||
	B = randn(N,N).astype('float64')
 | 
			
		||||
 | 
			
		||||
	start = time.time();
 | 
			
		||||
	for i in range(0,l):
 | 
			
		||||
		ref = numpy.dot(A,B)
 | 
			
		||||
	end = time.time()
 | 
			
		||||
	
 | 
			
		||||
	timediff = (end -start) 
 | 
			
		||||
	mflops = ( 2*N*N*N) *l / timediff
 | 
			
		||||
	mflops *= 1e-6
 | 
			
		||||
 | 
			
		||||
	size = "%dx%d" % (N,N)
 | 
			
		||||
	print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	N=128
 | 
			
		||||
	NMAX=2048
 | 
			
		||||
	NINC=128
 | 
			
		||||
	LOOPS=1
 | 
			
		||||
 | 
			
		||||
	z=0
 | 
			
		||||
	for arg in sys.argv:
 | 
			
		||||
		if z == 1:
 | 
			
		||||
			N = int(arg)
 | 
			
		||||
		elif z == 2:
 | 
			
		||||
			NMAX = int(arg)
 | 
			
		||||
		elif z == 3:
 | 
			
		||||
			NINC = int(arg)
 | 
			
		||||
		elif z == 4:
 | 
			
		||||
			LOOPS = int(arg)
 | 
			
		||||
 | 
			
		||||
		z = z + 1
 | 
			
		||||
 | 
			
		||||
	if 'OPENBLAS_LOOPS' in os.environ:
 | 
			
		||||
		p = os.environ['OPENBLAS_LOOPS']
 | 
			
		||||
		if p:
 | 
			
		||||
			LOOPS = int(p);
 | 
			
		||||
 | 
			
		||||
	print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
			
		||||
	print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
			
		||||
 | 
			
		||||
	for i in range (N,NMAX+NINC,NINC):
 | 
			
		||||
		run_dgemm(i,LOOPS)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import numpy
 | 
			
		||||
from numpy.random import randn
 | 
			
		||||
 | 
			
		||||
def run_dgemv(N,l):
 | 
			
		||||
 | 
			
		||||
	A = randn(N,N).astype('float64')
 | 
			
		||||
	B = randn(N).astype('float64')
 | 
			
		||||
 | 
			
		||||
	start = time.time();
 | 
			
		||||
	for i in range(0,l):
 | 
			
		||||
		ref = numpy.dot(A,B)
 | 
			
		||||
	end = time.time()
 | 
			
		||||
	
 | 
			
		||||
	timediff = (end -start) 
 | 
			
		||||
	mflops = ( 2*N*N) *l / timediff
 | 
			
		||||
	mflops *= 1e-6
 | 
			
		||||
 | 
			
		||||
	size = "%dx%d" % (N,N)
 | 
			
		||||
	print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	N=128
 | 
			
		||||
	NMAX=2048
 | 
			
		||||
	NINC=128
 | 
			
		||||
	LOOPS=1
 | 
			
		||||
 | 
			
		||||
	z=0
 | 
			
		||||
	for arg in sys.argv:
 | 
			
		||||
		if z == 1:
 | 
			
		||||
			N = int(arg)
 | 
			
		||||
		elif z == 2:
 | 
			
		||||
			NMAX = int(arg)
 | 
			
		||||
		elif z == 3:
 | 
			
		||||
			NINC = int(arg)
 | 
			
		||||
		elif z == 4:
 | 
			
		||||
			LOOPS = int(arg)
 | 
			
		||||
 | 
			
		||||
		z = z + 1
 | 
			
		||||
 | 
			
		||||
	if 'OPENBLAS_LOOPS' in os.environ:
 | 
			
		||||
		p = os.environ['OPENBLAS_LOOPS']
 | 
			
		||||
		if p:
 | 
			
		||||
			LOOPS = int(p);
 | 
			
		||||
 | 
			
		||||
	print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
			
		||||
	print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
			
		||||
 | 
			
		||||
	for i in range (N,NMAX+NINC,NINC):
 | 
			
		||||
		run_dgemv(i,LOOPS)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,58 @@
 | 
			
		|||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import numpy
 | 
			
		||||
from numpy.random import randn
 | 
			
		||||
from scipy.linalg.lapack import dgesv
 | 
			
		||||
 | 
			
		||||
def run_dgesv(N,l):
 | 
			
		||||
 | 
			
		||||
	a = randn(N,N).astype('float64')
 | 
			
		||||
	b = randn(N,N).astype('float64')
 | 
			
		||||
 | 
			
		||||
	start = time.time();
 | 
			
		||||
	for i in range(0,l):
 | 
			
		||||
		dgesv(a,b,1,1)
 | 
			
		||||
	end = time.time()
 | 
			
		||||
	
 | 
			
		||||
	timediff = (end -start) 
 | 
			
		||||
 | 
			
		||||
	mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff
 | 
			
		||||
	mflops *= 1e-6
 | 
			
		||||
 | 
			
		||||
	size = "%dx%d" % (N,N)
 | 
			
		||||
	print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	N=128
 | 
			
		||||
	NMAX=2048
 | 
			
		||||
	NINC=128
 | 
			
		||||
	LOOPS=1
 | 
			
		||||
 | 
			
		||||
	z=0
 | 
			
		||||
	for arg in sys.argv:
 | 
			
		||||
		if z == 1:
 | 
			
		||||
			N = int(arg)
 | 
			
		||||
		elif z == 2:
 | 
			
		||||
			NMAX = int(arg)
 | 
			
		||||
		elif z == 3:
 | 
			
		||||
			NINC = int(arg)
 | 
			
		||||
		elif z == 4:
 | 
			
		||||
			LOOPS = int(arg)
 | 
			
		||||
 | 
			
		||||
		z = z + 1
 | 
			
		||||
 | 
			
		||||
	if 'OPENBLAS_LOOPS' in os.environ:
 | 
			
		||||
		p = os.environ['OPENBLAS_LOOPS']
 | 
			
		||||
		if p:
 | 
			
		||||
			LOOPS = int(p);
 | 
			
		||||
 | 
			
		||||
	print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
			
		||||
	print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
			
		||||
 | 
			
		||||
	for i in range (N,NMAX+NINC,NINC):
 | 
			
		||||
		run_dgesv(i,LOOPS)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import numpy
 | 
			
		||||
from numpy.random import randn
 | 
			
		||||
 | 
			
		||||
def run_dsolve(N,l):
 | 
			
		||||
 | 
			
		||||
	A = randn(N,N).astype('float64')
 | 
			
		||||
	B = randn(N,N).astype('float64')
 | 
			
		||||
 | 
			
		||||
	start = time.time();
 | 
			
		||||
	for i in range(0,l):
 | 
			
		||||
		ref = numpy.linalg.solve(A,B)
 | 
			
		||||
	end = time.time()
 | 
			
		||||
	
 | 
			
		||||
	timediff = (end -start) 
 | 
			
		||||
	mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff
 | 
			
		||||
	mflops *= 1e-6
 | 
			
		||||
 | 
			
		||||
	size = "%dx%d" % (N,N)
 | 
			
		||||
	print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	N=128
 | 
			
		||||
	NMAX=2048
 | 
			
		||||
	NINC=128
 | 
			
		||||
	LOOPS=1
 | 
			
		||||
 | 
			
		||||
	z=0
 | 
			
		||||
	for arg in sys.argv:
 | 
			
		||||
		if z == 1:
 | 
			
		||||
			N = int(arg)
 | 
			
		||||
		elif z == 2:
 | 
			
		||||
			NMAX = int(arg)
 | 
			
		||||
		elif z == 3:
 | 
			
		||||
			NINC = int(arg)
 | 
			
		||||
		elif z == 4:
 | 
			
		||||
			LOOPS = int(arg)
 | 
			
		||||
 | 
			
		||||
		z = z + 1
 | 
			
		||||
 | 
			
		||||
	if 'OPENBLAS_LOOPS' in os.environ:
 | 
			
		||||
		p = os.environ['OPENBLAS_LOOPS']
 | 
			
		||||
		if p:
 | 
			
		||||
			LOOPS = int(p);
 | 
			
		||||
 | 
			
		||||
	print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
			
		||||
	print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
			
		||||
 | 
			
		||||
	for i in range (N,NMAX+NINC,NINC):
 | 
			
		||||
		run_dsolve(i,LOOPS)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import numpy
 | 
			
		||||
from numpy.random import randn
 | 
			
		||||
 | 
			
		||||
def run_sdot(N,l):
 | 
			
		||||
 | 
			
		||||
	A = randn(N).astype('float32')
 | 
			
		||||
	B = randn(N).astype('float32')
 | 
			
		||||
 | 
			
		||||
	start = time.time();
 | 
			
		||||
	for i in range(0,l):
 | 
			
		||||
		ref = numpy.dot(A,B)
 | 
			
		||||
	end = time.time()
 | 
			
		||||
	
 | 
			
		||||
	timediff = (end -start) 
 | 
			
		||||
	mflops = ( 2*N ) *l / timediff
 | 
			
		||||
	mflops *= 1e-6
 | 
			
		||||
 | 
			
		||||
	size = "%d" % (N)
 | 
			
		||||
	print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	N=128
 | 
			
		||||
	NMAX=2048
 | 
			
		||||
	NINC=128
 | 
			
		||||
	LOOPS=1
 | 
			
		||||
 | 
			
		||||
	z=0
 | 
			
		||||
	for arg in sys.argv:
 | 
			
		||||
		if z == 1:
 | 
			
		||||
			N = int(arg)
 | 
			
		||||
		elif z == 2:
 | 
			
		||||
			NMAX = int(arg)
 | 
			
		||||
		elif z == 3:
 | 
			
		||||
			NINC = int(arg)
 | 
			
		||||
		elif z == 4:
 | 
			
		||||
			LOOPS = int(arg)
 | 
			
		||||
 | 
			
		||||
		z = z + 1
 | 
			
		||||
 | 
			
		||||
	if 'OPENBLAS_LOOPS' in os.environ:
 | 
			
		||||
		p = os.environ['OPENBLAS_LOOPS']
 | 
			
		||||
		if p:
 | 
			
		||||
			LOOPS = int(p);
 | 
			
		||||
 | 
			
		||||
	print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
			
		||||
	print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
			
		||||
 | 
			
		||||
	for i in range (N,NMAX+NINC,NINC):
 | 
			
		||||
		run_sdot(i,LOOPS)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import numpy
 | 
			
		||||
from numpy.random import randn
 | 
			
		||||
 | 
			
		||||
def run_sgemm(N,l):
 | 
			
		||||
 | 
			
		||||
	A = randn(N,N).astype('float32')
 | 
			
		||||
	B = randn(N,N).astype('float32')
 | 
			
		||||
 | 
			
		||||
	start = time.time();
 | 
			
		||||
	for i in range(0,l):
 | 
			
		||||
		ref = numpy.dot(A,B)
 | 
			
		||||
	end = time.time()
 | 
			
		||||
	
 | 
			
		||||
	timediff = (end -start) 
 | 
			
		||||
	mflops = ( 2*N*N*N) *l / timediff
 | 
			
		||||
	mflops *= 1e-6
 | 
			
		||||
 | 
			
		||||
	size = "%dx%d" % (N,N)
 | 
			
		||||
	print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	N=128
 | 
			
		||||
	NMAX=2048
 | 
			
		||||
	NINC=128
 | 
			
		||||
	LOOPS=1
 | 
			
		||||
 | 
			
		||||
	z=0
 | 
			
		||||
	for arg in sys.argv:
 | 
			
		||||
		if z == 1:
 | 
			
		||||
			N = int(arg)
 | 
			
		||||
		elif z == 2:
 | 
			
		||||
			NMAX = int(arg)
 | 
			
		||||
		elif z == 3:
 | 
			
		||||
			NINC = int(arg)
 | 
			
		||||
		elif z == 4:
 | 
			
		||||
			LOOPS = int(arg)
 | 
			
		||||
 | 
			
		||||
		z = z + 1
 | 
			
		||||
 | 
			
		||||
	if 'OPENBLAS_LOOPS' in os.environ:
 | 
			
		||||
		p = os.environ['OPENBLAS_LOOPS']
 | 
			
		||||
		if p:
 | 
			
		||||
			LOOPS = int(p);
 | 
			
		||||
 | 
			
		||||
	print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
			
		||||
	print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
			
		||||
 | 
			
		||||
	for i in range (N,NMAX+NINC,NINC):
 | 
			
		||||
		run_sgemm(i,LOOPS)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import numpy
 | 
			
		||||
from numpy.random import randn
 | 
			
		||||
 | 
			
		||||
def run_sgemv(N,l):
 | 
			
		||||
 | 
			
		||||
	A = randn(N,N).astype('float32')
 | 
			
		||||
	B = randn(N).astype('float32')
 | 
			
		||||
 | 
			
		||||
	start = time.time();
 | 
			
		||||
	for i in range(0,l):
 | 
			
		||||
		ref = numpy.dot(A,B)
 | 
			
		||||
	end = time.time()
 | 
			
		||||
	
 | 
			
		||||
	timediff = (end -start) 
 | 
			
		||||
	mflops = ( 2*N*N) *l / timediff
 | 
			
		||||
	mflops *= 1e-6
 | 
			
		||||
 | 
			
		||||
	size = "%dx%d" % (N,N)
 | 
			
		||||
	print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	N=128
 | 
			
		||||
	NMAX=2048
 | 
			
		||||
	NINC=128
 | 
			
		||||
	LOOPS=1
 | 
			
		||||
 | 
			
		||||
	z=0
 | 
			
		||||
	for arg in sys.argv:
 | 
			
		||||
		if z == 1:
 | 
			
		||||
			N = int(arg)
 | 
			
		||||
		elif z == 2:
 | 
			
		||||
			NMAX = int(arg)
 | 
			
		||||
		elif z == 3:
 | 
			
		||||
			NINC = int(arg)
 | 
			
		||||
		elif z == 4:
 | 
			
		||||
			LOOPS = int(arg)
 | 
			
		||||
 | 
			
		||||
		z = z + 1
 | 
			
		||||
 | 
			
		||||
	if 'OPENBLAS_LOOPS' in os.environ:
 | 
			
		||||
		p = os.environ['OPENBLAS_LOOPS']
 | 
			
		||||
		if p:
 | 
			
		||||
			LOOPS = int(p);
 | 
			
		||||
 | 
			
		||||
	print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
			
		||||
	print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
			
		||||
 | 
			
		||||
	for i in range (N,NMAX+NINC,NINC):
 | 
			
		||||
		run_sgemv(i,LOOPS)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import numpy
 | 
			
		||||
from numpy.random import randn
 | 
			
		||||
 | 
			
		||||
def run_zgemm(N,l):
 | 
			
		||||
 | 
			
		||||
	A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
 | 
			
		||||
	B = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
 | 
			
		||||
 | 
			
		||||
	start = time.time();
 | 
			
		||||
	for i in range(0,l):
 | 
			
		||||
		ref = numpy.dot(A,B)
 | 
			
		||||
	end = time.time()
 | 
			
		||||
	
 | 
			
		||||
	timediff = (end -start) 
 | 
			
		||||
	mflops = ( 8*N*N*N) *l / timediff
 | 
			
		||||
	mflops *= 1e-6
 | 
			
		||||
 | 
			
		||||
	size = "%dx%d" % (N,N)
 | 
			
		||||
	print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	N=128
 | 
			
		||||
	NMAX=2048
 | 
			
		||||
	NINC=128
 | 
			
		||||
	LOOPS=1
 | 
			
		||||
 | 
			
		||||
	z=0
 | 
			
		||||
	for arg in sys.argv:
 | 
			
		||||
		if z == 1:
 | 
			
		||||
			N = int(arg)
 | 
			
		||||
		elif z == 2:
 | 
			
		||||
			NMAX = int(arg)
 | 
			
		||||
		elif z == 3:
 | 
			
		||||
			NINC = int(arg)
 | 
			
		||||
		elif z == 4:
 | 
			
		||||
			LOOPS = int(arg)
 | 
			
		||||
 | 
			
		||||
		z = z + 1
 | 
			
		||||
 | 
			
		||||
	if 'OPENBLAS_LOOPS' in os.environ:
 | 
			
		||||
		p = os.environ['OPENBLAS_LOOPS']
 | 
			
		||||
		if p:
 | 
			
		||||
			LOOPS = int(p);
 | 
			
		||||
 | 
			
		||||
	print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
			
		||||
	print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
			
		||||
 | 
			
		||||
	for i in range (N,NMAX+NINC,NINC):
 | 
			
		||||
		run_zgemm(i,LOOPS)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import numpy
 | 
			
		||||
from numpy.random import randn
 | 
			
		||||
 | 
			
		||||
def run_zgemv(N,l):
 | 
			
		||||
 | 
			
		||||
	A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
 | 
			
		||||
	B = randn(N).astype('float64') + randn(N).astype('float64') * 1j;
 | 
			
		||||
 | 
			
		||||
	start = time.time();
 | 
			
		||||
	for i in range(0,l):
 | 
			
		||||
		ref = numpy.dot(A,B)
 | 
			
		||||
	end = time.time()
 | 
			
		||||
	
 | 
			
		||||
	timediff = (end -start) 
 | 
			
		||||
	mflops = ( 8*N*N) *l / timediff
 | 
			
		||||
	mflops *= 1e-6
 | 
			
		||||
 | 
			
		||||
	size = "%dx%d" % (N,N)
 | 
			
		||||
	print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	N=128
 | 
			
		||||
	NMAX=2048
 | 
			
		||||
	NINC=128
 | 
			
		||||
	LOOPS=1
 | 
			
		||||
 | 
			
		||||
	z=0
 | 
			
		||||
	for arg in sys.argv:
 | 
			
		||||
		if z == 1:
 | 
			
		||||
			N = int(arg)
 | 
			
		||||
		elif z == 2:
 | 
			
		||||
			NMAX = int(arg)
 | 
			
		||||
		elif z == 3:
 | 
			
		||||
			NINC = int(arg)
 | 
			
		||||
		elif z == 4:
 | 
			
		||||
			LOOPS = int(arg)
 | 
			
		||||
 | 
			
		||||
		z = z + 1
 | 
			
		||||
 | 
			
		||||
	if 'OPENBLAS_LOOPS' in os.environ:
 | 
			
		||||
		p = os.environ['OPENBLAS_LOOPS']
 | 
			
		||||
		if p:
 | 
			
		||||
			LOOPS = int(p);
 | 
			
		||||
 | 
			
		||||
	print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
			
		||||
	print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
			
		||||
 | 
			
		||||
	for i in range (N,NMAX+NINC,NINC):
 | 
			
		||||
		run_zgemv(i,LOOPS)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/octave --silent 
 | 
			
		||||
 | 
			
		||||
nfrom = 128 ;
 | 
			
		||||
nto   = 2048;
 | 
			
		||||
nstep = 128;
 | 
			
		||||
loops = 1;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
arg_list = argv();
 | 
			
		||||
for i = 1:nargin
 | 
			
		||||
 | 
			
		||||
	switch(i)
 | 
			
		||||
		case 1
 | 
			
		||||
			nfrom = str2num(arg_list{i});
 | 
			
		||||
		case 2
 | 
			
		||||
			nto   = str2num(arg_list{i});
 | 
			
		||||
		case 3
 | 
			
		||||
			nstep = str2num(arg_list{i});
 | 
			
		||||
		case 4
 | 
			
		||||
			loops = str2num(arg_list{i});
 | 
			
		||||
 | 
			
		||||
	endswitch
 | 
			
		||||
 | 
			
		||||
endfor
 | 
			
		||||
 | 
			
		||||
p = getenv("OPENBLAS_LOOPS");
 | 
			
		||||
if p 
 | 
			
		||||
	loops = str2num(p);
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
			
		||||
printf("        SIZE             FLOPS             TIME\n");
 | 
			
		||||
 | 
			
		||||
n = nfrom;
 | 
			
		||||
while n <= nto
 | 
			
		||||
 | 
			
		||||
	A = single(rand(n,n)) + single(rand(n,n)) * 1i;
 | 
			
		||||
	B = single(rand(n,n)) + single(rand(n,n)) * 1i;
 | 
			
		||||
	start = clock();
 | 
			
		||||
 | 
			
		||||
	l=0;
 | 
			
		||||
	while l < loops
 | 
			
		||||
 | 
			
		||||
		C = A * B;
 | 
			
		||||
		l = l + 1;
 | 
			
		||||
 | 
			
		||||
	endwhile
 | 
			
		||||
 | 
			
		||||
	timeg = etime(clock(), start);
 | 
			
		||||
	mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
 | 
			
		||||
 | 
			
		||||
	st1 = sprintf("%dx%d : ", n,n);
 | 
			
		||||
	printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
			
		||||
	n = n + nstep;
 | 
			
		||||
 | 
			
		||||
endwhile
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/octave --silent 
 | 
			
		||||
 | 
			
		||||
nfrom = 128 ;
 | 
			
		||||
nto   = 2048;
 | 
			
		||||
nstep = 128;
 | 
			
		||||
loops = 1;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
arg_list = argv();
 | 
			
		||||
for i = 1:nargin
 | 
			
		||||
 | 
			
		||||
	switch(i)
 | 
			
		||||
		case 1
 | 
			
		||||
			nfrom = str2num(arg_list{i});
 | 
			
		||||
		case 2
 | 
			
		||||
			nto   = str2num(arg_list{i});
 | 
			
		||||
		case 3
 | 
			
		||||
			nstep = str2num(arg_list{i});
 | 
			
		||||
		case 4
 | 
			
		||||
			loops = str2num(arg_list{i});
 | 
			
		||||
 | 
			
		||||
	endswitch
 | 
			
		||||
 | 
			
		||||
endfor
 | 
			
		||||
 | 
			
		||||
p = getenv("OPENBLAS_LOOPS");
 | 
			
		||||
if p 
 | 
			
		||||
	loops = str2num(p);
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
			
		||||
printf("        SIZE             FLOPS             TIME\n");
 | 
			
		||||
 | 
			
		||||
n = nfrom;
 | 
			
		||||
while n <= nto
 | 
			
		||||
 | 
			
		||||
	A = single(rand(n,n)) + single(rand(n,n)) * 1i;
 | 
			
		||||
	B = single(rand(n,1)) + single(rand(n,1)) * 1i;
 | 
			
		||||
	start = clock();
 | 
			
		||||
 | 
			
		||||
	l=0;
 | 
			
		||||
	while l < loops
 | 
			
		||||
 | 
			
		||||
		C = A * B;
 | 
			
		||||
		l = l + 1;
 | 
			
		||||
 | 
			
		||||
	endwhile
 | 
			
		||||
 | 
			
		||||
	timeg = etime(clock(), start);
 | 
			
		||||
	mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 );
 | 
			
		||||
 | 
			
		||||
	st1 = sprintf("%dx%d : ", n,n);
 | 
			
		||||
	printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
			
		||||
	n = n + nstep;
 | 
			
		||||
 | 
			
		||||
endwhile
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/octave --silent 
 | 
			
		||||
 | 
			
		||||
nfrom = 128 ;
 | 
			
		||||
nto   = 2048;
 | 
			
		||||
nstep = 128;
 | 
			
		||||
loops = 1;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
arg_list = argv();
 | 
			
		||||
for i = 1:nargin
 | 
			
		||||
 | 
			
		||||
	switch(i)
 | 
			
		||||
		case 1
 | 
			
		||||
			nfrom = str2num(arg_list{i});
 | 
			
		||||
		case 2
 | 
			
		||||
			nto   = str2num(arg_list{i});
 | 
			
		||||
		case 3
 | 
			
		||||
			nstep = str2num(arg_list{i});
 | 
			
		||||
		case 4
 | 
			
		||||
			loops = str2num(arg_list{i});
 | 
			
		||||
 | 
			
		||||
	endswitch
 | 
			
		||||
 | 
			
		||||
endfor
 | 
			
		||||
 | 
			
		||||
p = getenv("OPENBLAS_LOOPS");
 | 
			
		||||
if p 
 | 
			
		||||
	loops = str2num(p);
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
			
		||||
printf("        SIZE             FLOPS             TIME\n");
 | 
			
		||||
 | 
			
		||||
n = nfrom;
 | 
			
		||||
while n <= nto
 | 
			
		||||
 | 
			
		||||
	A = double(rand(n,n));
 | 
			
		||||
	start = clock();
 | 
			
		||||
 | 
			
		||||
	l=0;
 | 
			
		||||
	while l < loops
 | 
			
		||||
 | 
			
		||||
		[V,lambda] = eig(A);
 | 
			
		||||
		l = l + 1;
 | 
			
		||||
 | 
			
		||||
	endwhile
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	timeg = etime(clock(), start);
 | 
			
		||||
	mflops = ( 26.33 *n*n*n ) *loops / ( timeg * 1.0e6 );
 | 
			
		||||
 | 
			
		||||
	st1 = sprintf("%dx%d : ", n,n);
 | 
			
		||||
	printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg );
 | 
			
		||||
	n = n + nstep;
 | 
			
		||||
 | 
			
		||||
endwhile
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/octave --silent 
 | 
			
		||||
 | 
			
		||||
nfrom = 128 ;
 | 
			
		||||
nto   = 2048;
 | 
			
		||||
nstep = 128;
 | 
			
		||||
loops = 1;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
arg_list = argv();
 | 
			
		||||
for i = 1:nargin
 | 
			
		||||
 | 
			
		||||
	switch(i)
 | 
			
		||||
		case 1
 | 
			
		||||
			nfrom = str2num(arg_list{i});
 | 
			
		||||
		case 2
 | 
			
		||||
			nto   = str2num(arg_list{i});
 | 
			
		||||
		case 3
 | 
			
		||||
			nstep = str2num(arg_list{i});
 | 
			
		||||
		case 4
 | 
			
		||||
			loops = str2num(arg_list{i});
 | 
			
		||||
 | 
			
		||||
	endswitch
 | 
			
		||||
 | 
			
		||||
endfor
 | 
			
		||||
 | 
			
		||||
p = getenv("OPENBLAS_LOOPS");
 | 
			
		||||
if p 
 | 
			
		||||
	loops = str2num(p);
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
			
		||||
printf("        SIZE             FLOPS             TIME\n");
 | 
			
		||||
 | 
			
		||||
n = nfrom;
 | 
			
		||||
while n <= nto
 | 
			
		||||
 | 
			
		||||
	A = double(rand(n,n));
 | 
			
		||||
	B = double(rand(n,n));
 | 
			
		||||
	start = clock();
 | 
			
		||||
 | 
			
		||||
	l=0;
 | 
			
		||||
	while l < loops
 | 
			
		||||
 | 
			
		||||
		C = A * B;
 | 
			
		||||
		l = l + 1;
 | 
			
		||||
 | 
			
		||||
	endwhile
 | 
			
		||||
 | 
			
		||||
	timeg = etime(clock(), start);
 | 
			
		||||
	mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
 | 
			
		||||
 | 
			
		||||
	st1 = sprintf("%dx%d : ", n,n);
 | 
			
		||||
	printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
			
		||||
	n = n + nstep;
 | 
			
		||||
 | 
			
		||||
endwhile
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/octave --silent 
 | 
			
		||||
 | 
			
		||||
nfrom = 128 ;
 | 
			
		||||
nto   = 2048;
 | 
			
		||||
nstep = 128;
 | 
			
		||||
loops = 1;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
arg_list = argv();
 | 
			
		||||
for i = 1:nargin
 | 
			
		||||
 | 
			
		||||
	switch(i)
 | 
			
		||||
		case 1
 | 
			
		||||
			nfrom = str2num(arg_list{i});
 | 
			
		||||
		case 2
 | 
			
		||||
			nto   = str2num(arg_list{i});
 | 
			
		||||
		case 3
 | 
			
		||||
			nstep = str2num(arg_list{i});
 | 
			
		||||
		case 4
 | 
			
		||||
			loops = str2num(arg_list{i});
 | 
			
		||||
 | 
			
		||||
	endswitch
 | 
			
		||||
 | 
			
		||||
endfor
 | 
			
		||||
 | 
			
		||||
p = getenv("OPENBLAS_LOOPS");
 | 
			
		||||
if p 
 | 
			
		||||
	loops = str2num(p);
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
			
		||||
printf("        SIZE             FLOPS             TIME\n");
 | 
			
		||||
 | 
			
		||||
n = nfrom;
 | 
			
		||||
while n <= nto
 | 
			
		||||
 | 
			
		||||
	A = double(rand(n,n));
 | 
			
		||||
	B = double(rand(n,1));
 | 
			
		||||
	start = clock();
 | 
			
		||||
 | 
			
		||||
	l=0;
 | 
			
		||||
	while l < loops
 | 
			
		||||
 | 
			
		||||
		C = A * B;
 | 
			
		||||
		l = l + 1;
 | 
			
		||||
 | 
			
		||||
	endwhile
 | 
			
		||||
 | 
			
		||||
	timeg = etime(clock(), start);
 | 
			
		||||
	mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 );
 | 
			
		||||
 | 
			
		||||
	st1 = sprintf("%dx%d : ", n,n);
 | 
			
		||||
	printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
			
		||||
	n = n + nstep;
 | 
			
		||||
 | 
			
		||||
endwhile
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,59 @@
 | 
			
		|||
#!/usr/bin/octave --silent 
 | 
			
		||||
 | 
			
		||||
nfrom = 128 ;
 | 
			
		||||
nto   = 2048;
 | 
			
		||||
nstep = 128;
 | 
			
		||||
loops = 1;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
arg_list = argv();
 | 
			
		||||
for i = 1:nargin
 | 
			
		||||
 | 
			
		||||
	switch(i)
 | 
			
		||||
		case 1
 | 
			
		||||
			nfrom = str2num(arg_list{i});
 | 
			
		||||
		case 2
 | 
			
		||||
			nto   = str2num(arg_list{i});
 | 
			
		||||
		case 3
 | 
			
		||||
			nstep = str2num(arg_list{i});
 | 
			
		||||
		case 4
 | 
			
		||||
			loops = str2num(arg_list{i});
 | 
			
		||||
 | 
			
		||||
	endswitch
 | 
			
		||||
 | 
			
		||||
endfor
 | 
			
		||||
 | 
			
		||||
p = getenv("OPENBLAS_LOOPS");
 | 
			
		||||
if p 
 | 
			
		||||
	loops = str2num(p);
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
			
		||||
printf("        SIZE             FLOPS             TIME\n");
 | 
			
		||||
 | 
			
		||||
n = nfrom;
 | 
			
		||||
while n <= nto
 | 
			
		||||
 | 
			
		||||
	A = double(rand(n,n));
 | 
			
		||||
	B = double(rand(n,n));
 | 
			
		||||
	start = clock();
 | 
			
		||||
 | 
			
		||||
	l=0;
 | 
			
		||||
	while l < loops
 | 
			
		||||
 | 
			
		||||
		x = linsolve(A,B);
 | 
			
		||||
		#x = A / B;
 | 
			
		||||
		l = l + 1;
 | 
			
		||||
 | 
			
		||||
	endwhile
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	timeg = etime(clock(), start);
 | 
			
		||||
	#r = norm(A*x - B)/norm(B)
 | 
			
		||||
	mflops = ( 2.0/3.0 *n*n*n + 2.0*n*n*n ) *loops / ( timeg * 1.0e6 );
 | 
			
		||||
 | 
			
		||||
	st1 = sprintf("%dx%d : ", n,n);
 | 
			
		||||
	printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg );
 | 
			
		||||
	n = n + nstep;
 | 
			
		||||
 | 
			
		||||
endwhile
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/octave --silent 
 | 
			
		||||
 | 
			
		||||
nfrom = 128 ;
 | 
			
		||||
nto   = 2048;
 | 
			
		||||
nstep = 128;
 | 
			
		||||
loops = 1;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
arg_list = argv();
 | 
			
		||||
for i = 1:nargin
 | 
			
		||||
 | 
			
		||||
	switch(i)
 | 
			
		||||
		case 1
 | 
			
		||||
			nfrom = str2num(arg_list{i});
 | 
			
		||||
		case 2
 | 
			
		||||
			nto   = str2num(arg_list{i});
 | 
			
		||||
		case 3
 | 
			
		||||
			nstep = str2num(arg_list{i});
 | 
			
		||||
		case 4
 | 
			
		||||
			loops = str2num(arg_list{i});
 | 
			
		||||
 | 
			
		||||
	endswitch
 | 
			
		||||
 | 
			
		||||
endfor
 | 
			
		||||
 | 
			
		||||
p = getenv("OPENBLAS_LOOPS");
 | 
			
		||||
if p 
 | 
			
		||||
	loops = str2num(p);
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
			
		||||
printf("        SIZE             FLOPS             TIME\n");
 | 
			
		||||
 | 
			
		||||
n = nfrom;
 | 
			
		||||
while n <= nto
 | 
			
		||||
 | 
			
		||||
	A = single(rand(n,n));
 | 
			
		||||
	B = single(rand(n,n));
 | 
			
		||||
	start = clock();
 | 
			
		||||
 | 
			
		||||
	l=0;
 | 
			
		||||
	while l < loops
 | 
			
		||||
 | 
			
		||||
		C = A * B;
 | 
			
		||||
		l = l + 1;
 | 
			
		||||
 | 
			
		||||
	endwhile
 | 
			
		||||
 | 
			
		||||
	timeg = etime(clock(), start);
 | 
			
		||||
	mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
 | 
			
		||||
 | 
			
		||||
	st1 = sprintf("%dx%d : ", n,n);
 | 
			
		||||
	printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
			
		||||
	n = n + nstep;
 | 
			
		||||
 | 
			
		||||
endwhile
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/octave --silent 
 | 
			
		||||
 | 
			
		||||
nfrom = 128 ;
 | 
			
		||||
nto   = 2048;
 | 
			
		||||
nstep = 128;
 | 
			
		||||
loops = 1;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
arg_list = argv();
 | 
			
		||||
for i = 1:nargin
 | 
			
		||||
 | 
			
		||||
	switch(i)
 | 
			
		||||
		case 1
 | 
			
		||||
			nfrom = str2num(arg_list{i});
 | 
			
		||||
		case 2
 | 
			
		||||
			nto   = str2num(arg_list{i});
 | 
			
		||||
		case 3
 | 
			
		||||
			nstep = str2num(arg_list{i});
 | 
			
		||||
		case 4
 | 
			
		||||
			loops = str2num(arg_list{i});
 | 
			
		||||
 | 
			
		||||
	endswitch
 | 
			
		||||
 | 
			
		||||
endfor
 | 
			
		||||
 | 
			
		||||
p = getenv("OPENBLAS_LOOPS");
 | 
			
		||||
if p 
 | 
			
		||||
	loops = str2num(p);
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
			
		||||
printf("        SIZE             FLOPS             TIME\n");
 | 
			
		||||
 | 
			
		||||
n = nfrom;
 | 
			
		||||
while n <= nto
 | 
			
		||||
 | 
			
		||||
	A = single(rand(n,n));
 | 
			
		||||
	B = single(rand(n,1));
 | 
			
		||||
	start = clock();
 | 
			
		||||
 | 
			
		||||
	l=0;
 | 
			
		||||
	while l < loops
 | 
			
		||||
 | 
			
		||||
		C = A * B;
 | 
			
		||||
		l = l + 1;
 | 
			
		||||
 | 
			
		||||
	endwhile
 | 
			
		||||
 | 
			
		||||
	timeg = etime(clock(), start);
 | 
			
		||||
	mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 );
 | 
			
		||||
 | 
			
		||||
	st1 = sprintf("%dx%d : ", n,n);
 | 
			
		||||
	printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
			
		||||
	n = n + nstep;
 | 
			
		||||
 | 
			
		||||
endwhile
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/octave --silent 
 | 
			
		||||
 | 
			
		||||
nfrom = 128 ;
 | 
			
		||||
nto   = 2048;
 | 
			
		||||
nstep = 128;
 | 
			
		||||
loops = 1;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
arg_list = argv();
 | 
			
		||||
for i = 1:nargin
 | 
			
		||||
 | 
			
		||||
	switch(i)
 | 
			
		||||
		case 1
 | 
			
		||||
			nfrom = str2num(arg_list{i});
 | 
			
		||||
		case 2
 | 
			
		||||
			nto   = str2num(arg_list{i});
 | 
			
		||||
		case 3
 | 
			
		||||
			nstep = str2num(arg_list{i});
 | 
			
		||||
		case 4
 | 
			
		||||
			loops = str2num(arg_list{i});
 | 
			
		||||
 | 
			
		||||
	endswitch
 | 
			
		||||
 | 
			
		||||
endfor
 | 
			
		||||
 | 
			
		||||
p = getenv("OPENBLAS_LOOPS");
 | 
			
		||||
if p 
 | 
			
		||||
	loops = str2num(p);
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
			
		||||
printf("        SIZE             FLOPS             TIME\n");
 | 
			
		||||
 | 
			
		||||
n = nfrom;
 | 
			
		||||
while n <= nto
 | 
			
		||||
 | 
			
		||||
	A = double(rand(n,n)) + double(rand(n,n)) * 1i;
 | 
			
		||||
	B = double(rand(n,n)) + double(rand(n,n)) * 1i;
 | 
			
		||||
	start = clock();
 | 
			
		||||
 | 
			
		||||
	l=0;
 | 
			
		||||
	while l < loops
 | 
			
		||||
 | 
			
		||||
		C = A * B;
 | 
			
		||||
		l = l + 1;
 | 
			
		||||
 | 
			
		||||
	endwhile
 | 
			
		||||
 | 
			
		||||
	timeg = etime(clock(), start);
 | 
			
		||||
	mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
 | 
			
		||||
 | 
			
		||||
	st1 = sprintf("%dx%d : ", n,n);
 | 
			
		||||
	printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
			
		||||
	n = n + nstep;
 | 
			
		||||
 | 
			
		||||
endwhile
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
#!/usr/bin/octave --silent 
 | 
			
		||||
 | 
			
		||||
nfrom = 128 ;
 | 
			
		||||
nto   = 2048;
 | 
			
		||||
nstep = 128;
 | 
			
		||||
loops = 1;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
arg_list = argv();
 | 
			
		||||
for i = 1:nargin
 | 
			
		||||
 | 
			
		||||
	switch(i)
 | 
			
		||||
		case 1
 | 
			
		||||
			nfrom = str2num(arg_list{i});
 | 
			
		||||
		case 2
 | 
			
		||||
			nto   = str2num(arg_list{i});
 | 
			
		||||
		case 3
 | 
			
		||||
			nstep = str2num(arg_list{i});
 | 
			
		||||
		case 4
 | 
			
		||||
			loops = str2num(arg_list{i});
 | 
			
		||||
 | 
			
		||||
	endswitch
 | 
			
		||||
 | 
			
		||||
endfor
 | 
			
		||||
 | 
			
		||||
p = getenv("OPENBLAS_LOOPS");
 | 
			
		||||
if p 
 | 
			
		||||
	loops = str2num(p);
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
			
		||||
printf("        SIZE             FLOPS             TIME\n");
 | 
			
		||||
 | 
			
		||||
n = nfrom;
 | 
			
		||||
while n <= nto
 | 
			
		||||
 | 
			
		||||
	A = double(rand(n,n)) + double(rand(n,n)) * 1i;
 | 
			
		||||
	B = double(rand(n,1)) + double(rand(n,1)) * 1i;
 | 
			
		||||
	start = clock();
 | 
			
		||||
 | 
			
		||||
	l=0;
 | 
			
		||||
	while l < loops
 | 
			
		||||
 | 
			
		||||
		C = A * B;
 | 
			
		||||
		l = l + 1;
 | 
			
		||||
 | 
			
		||||
	endwhile
 | 
			
		||||
 | 
			
		||||
	timeg = etime(clock(), start);
 | 
			
		||||
	mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 );
 | 
			
		||||
 | 
			
		||||
	st1 = sprintf("%dx%d : ", n,n);
 | 
			
		||||
	printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
			
		||||
	n = n + nstep;
 | 
			
		||||
 | 
			
		||||
endwhile
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,62 @@
 | 
			
		|||
#!/usr/bin/Rscript
 | 
			
		||||
 | 
			
		||||
argv <- commandArgs(trailingOnly = TRUE)
 | 
			
		||||
 | 
			
		||||
nfrom = 128
 | 
			
		||||
nto = 2048
 | 
			
		||||
nstep = 128
 | 
			
		||||
loops = 1
 | 
			
		||||
 | 
			
		||||
if ( length(argv) > 0 ) {
 | 
			
		||||
 | 
			
		||||
	for ( z in 1:length(argv) ) {
 | 
			
		||||
 | 
			
		||||
		if ( z == 1 ) {
 | 
			
		||||
			nfrom <- as.numeric(argv[z])
 | 
			
		||||
		} else if ( z==2 ) {
 | 
			
		||||
			nto <- as.numeric(argv[z])
 | 
			
		||||
		} else if ( z==3 ) {
 | 
			
		||||
			nstep <- as.numeric(argv[z])
 | 
			
		||||
		} else if ( z==4 ) {
 | 
			
		||||
			loops <- as.numeric(argv[z])
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
p=Sys.getenv("OPENBLAS_LOOPS")
 | 
			
		||||
if ( p != "" ) {
 | 
			
		||||
	loops <- as.numeric(p)
 | 
			
		||||
}	
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
 | 
			
		||||
cat(sprintf("      SIZE             Flops                   Time\n"))
 | 
			
		||||
 | 
			
		||||
n = nfrom
 | 
			
		||||
while ( n <= nto ) {
 | 
			
		||||
 | 
			
		||||
	A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
 | 
			
		||||
	
 | 
			
		||||
	l = 1
 | 
			
		||||
 | 
			
		||||
	start <- proc.time()[3]
 | 
			
		||||
 | 
			
		||||
	while ( l <= loops ) {
 | 
			
		||||
 | 
			
		||||
		ev <- eigen(A)
 | 
			
		||||
		l = l + 1
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	end <- proc.time()[3]
 | 
			
		||||
	timeg = end - start
 | 
			
		||||
	mflops = (26.66 *n*n*n ) * loops / ( timeg * 1.0e6 )
 | 
			
		||||
 | 
			
		||||
	st = sprintf("%.0fx%.0f :",n , n)
 | 
			
		||||
	cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
 | 
			
		||||
 | 
			
		||||
	n = n + nstep
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,63 @@
 | 
			
		|||
#!/usr/bin/Rscript
 | 
			
		||||
 | 
			
		||||
argv <- commandArgs(trailingOnly = TRUE)
 | 
			
		||||
 | 
			
		||||
nfrom = 128
 | 
			
		||||
nto = 2048
 | 
			
		||||
nstep = 128
 | 
			
		||||
loops = 1
 | 
			
		||||
 | 
			
		||||
if ( length(argv) > 0 ) {
 | 
			
		||||
 | 
			
		||||
	for ( z in 1:length(argv) ) {
 | 
			
		||||
 | 
			
		||||
		if ( z == 1 ) {
 | 
			
		||||
			nfrom <- as.numeric(argv[z])
 | 
			
		||||
		} else if ( z==2 ) {
 | 
			
		||||
			nto <- as.numeric(argv[z])
 | 
			
		||||
		} else if ( z==3 ) {
 | 
			
		||||
			nstep <- as.numeric(argv[z])
 | 
			
		||||
		} else if ( z==4 ) {
 | 
			
		||||
			loops <- as.numeric(argv[z])
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
p=Sys.getenv("OPENBLAS_LOOPS")
 | 
			
		||||
if ( p != "" ) {
 | 
			
		||||
	loops <- as.numeric(p)
 | 
			
		||||
}	
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
 | 
			
		||||
cat(sprintf("      SIZE             Flops                   Time\n"))
 | 
			
		||||
 | 
			
		||||
n = nfrom
 | 
			
		||||
while ( n <= nto ) {
 | 
			
		||||
 | 
			
		||||
	A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
 | 
			
		||||
	B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
 | 
			
		||||
	
 | 
			
		||||
	l = 1
 | 
			
		||||
 | 
			
		||||
	start <- proc.time()[3]
 | 
			
		||||
 | 
			
		||||
	while ( l <= loops ) {
 | 
			
		||||
 | 
			
		||||
		C <- A %*% B
 | 
			
		||||
		l = l + 1
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	end <- proc.time()[3]
 | 
			
		||||
	timeg = end - start
 | 
			
		||||
	mflops = ( 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
 | 
			
		||||
 | 
			
		||||
	st = sprintf("%.0fx%.0f :",n , n)
 | 
			
		||||
	cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
 | 
			
		||||
 | 
			
		||||
	n = n + nstep
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,63 @@
 | 
			
		|||
#!/usr/bin/Rscript
 | 
			
		||||
 | 
			
		||||
argv <- commandArgs(trailingOnly = TRUE)
 | 
			
		||||
 | 
			
		||||
nfrom = 128
 | 
			
		||||
nto = 2048
 | 
			
		||||
nstep = 128
 | 
			
		||||
loops = 1
 | 
			
		||||
 | 
			
		||||
if ( length(argv) > 0 ) {
 | 
			
		||||
 | 
			
		||||
	for ( z in 1:length(argv) ) {
 | 
			
		||||
 | 
			
		||||
		if ( z == 1 ) {
 | 
			
		||||
			nfrom <- as.numeric(argv[z])
 | 
			
		||||
		} else if ( z==2 ) {
 | 
			
		||||
			nto <- as.numeric(argv[z])
 | 
			
		||||
		} else if ( z==3 ) {
 | 
			
		||||
			nstep <- as.numeric(argv[z])
 | 
			
		||||
		} else if ( z==4 ) {
 | 
			
		||||
			loops <- as.numeric(argv[z])
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
p=Sys.getenv("OPENBLAS_LOOPS")
 | 
			
		||||
if ( p != "" ) {
 | 
			
		||||
	loops <- as.numeric(p)
 | 
			
		||||
}	
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
 | 
			
		||||
cat(sprintf("      SIZE             Flops                   Time\n"))
 | 
			
		||||
 | 
			
		||||
n = nfrom
 | 
			
		||||
while ( n <= nto ) {
 | 
			
		||||
 | 
			
		||||
	A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
 | 
			
		||||
	B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
 | 
			
		||||
	
 | 
			
		||||
	l = 1
 | 
			
		||||
 | 
			
		||||
	start <- proc.time()[3]
 | 
			
		||||
 | 
			
		||||
	while ( l <= loops ) {
 | 
			
		||||
 | 
			
		||||
		solve(A,B)
 | 
			
		||||
		l = l + 1
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	end <- proc.time()[3]
 | 
			
		||||
	timeg = end - start
 | 
			
		||||
	mflops = (2.0/3.0 *n*n*n + 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
 | 
			
		||||
 | 
			
		||||
	st = sprintf("%.0fx%.0f :",n , n)
 | 
			
		||||
	cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
 | 
			
		||||
 | 
			
		||||
	n = n + nstep
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,201 @@
 | 
			
		|||
/***************************************************************************
 | 
			
		||||
Copyright (c) 2014, The OpenBLAS Project
 | 
			
		||||
All rights reserved.
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
modification, are permitted provided that the following conditions are
 | 
			
		||||
met:
 | 
			
		||||
1. Redistributions of source code must retain the above swapright
 | 
			
		||||
notice, this list of conditions and the following disclaimer.
 | 
			
		||||
2. Redistributions in binary form must reproduce the above swapright
 | 
			
		||||
notice, this list of conditions and the following disclaimer in
 | 
			
		||||
the documentation and/or other materials provided with the
 | 
			
		||||
distribution.
 | 
			
		||||
3. Neither the name of the OpenBLAS project nor the names of
 | 
			
		||||
its contributors may be used to endorse or promote products
 | 
			
		||||
derived from this software without specific prior written permission.
 | 
			
		||||
THIS SOFTWARE IS PROVIDED BY THE SWAPRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
			
		||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
			
		||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | 
			
		||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
			
		||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
			
		||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | 
			
		||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | 
			
		||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
			
		||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
*****************************************************************************/
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#ifdef __CYGWIN32__
 | 
			
		||||
#include <sys/time.h>
 | 
			
		||||
#endif
 | 
			
		||||
#include "common.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#undef SWAP
 | 
			
		||||
 | 
			
		||||
#ifdef COMPLEX
 | 
			
		||||
#ifdef DOUBLE
 | 
			
		||||
#define SWAP   BLASFUNC(zswap)
 | 
			
		||||
#else
 | 
			
		||||
#define SWAP   BLASFUNC(cswap)
 | 
			
		||||
#endif
 | 
			
		||||
#else
 | 
			
		||||
#ifdef DOUBLE
 | 
			
		||||
#define SWAP   BLASFUNC(dswap)
 | 
			
		||||
#else
 | 
			
		||||
#define SWAP   BLASFUNC(sswap)
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(__WIN32__) || defined(__WIN64__)
 | 
			
		||||
 | 
			
		||||
#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
			
		||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int gettimeofday(struct timeval *tv, void *tz){
 | 
			
		||||
 | 
			
		||||
  FILETIME ft;
 | 
			
		||||
  unsigned __int64 tmpres = 0;
 | 
			
		||||
  static int tzflag;
 | 
			
		||||
 | 
			
		||||
  if (NULL != tv)
 | 
			
		||||
    {
 | 
			
		||||
      GetSystemTimeAsFileTime(&ft);
 | 
			
		||||
 | 
			
		||||
      tmpres |= ft.dwHighDateTime;
 | 
			
		||||
      tmpres <<= 32;
 | 
			
		||||
      tmpres |= ft.dwLowDateTime;
 | 
			
		||||
 | 
			
		||||
      /*converting file time to unix epoch*/
 | 
			
		||||
      tmpres /= 10;  /*convert into microseconds*/
 | 
			
		||||
      tmpres -= DELTA_EPOCH_IN_MICROSECS;
 | 
			
		||||
      tv->tv_sec = (long)(tmpres / 1000000UL);
 | 
			
		||||
      tv->tv_usec = (long)(tmpres % 1000000UL);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
 | 
			
		||||
 | 
			
		||||
static void *huge_malloc(BLASLONG size){
 | 
			
		||||
  int shmid;
 | 
			
		||||
  void *address;
 | 
			
		||||
 | 
			
		||||
#ifndef SHM_HUGETLB
 | 
			
		||||
#define SHM_HUGETLB 04000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  if ((shmid =shmget(IPC_PRIVATE,
 | 
			
		||||
		     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
 | 
			
		||||
		     SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
 | 
			
		||||
    printf( "Memory allocation failed(shmget).\n");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  address = shmat(shmid, NULL, SHM_RND);
 | 
			
		||||
 | 
			
		||||
  if ((BLASLONG)address == -1){
 | 
			
		||||
    printf( "Memory allocation failed(shmat).\n");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  shmctl(shmid, IPC_RMID, 0);
 | 
			
		||||
 | 
			
		||||
  return address;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define malloc huge_malloc
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[]){
 | 
			
		||||
 | 
			
		||||
  FLOAT *x, *y;
 | 
			
		||||
  FLOAT alpha[2] = { 2.0, 2.0 };
 | 
			
		||||
  blasint m, i;
 | 
			
		||||
  blasint inc_x=1,inc_y=1;
 | 
			
		||||
  int loops = 1;
 | 
			
		||||
  int l;
 | 
			
		||||
  char *p;
 | 
			
		||||
 | 
			
		||||
  int from =   1;
 | 
			
		||||
  int to   = 200;
 | 
			
		||||
  int step =   1;
 | 
			
		||||
 | 
			
		||||
  struct timeval start, stop;
 | 
			
		||||
  double time1,timeg;
 | 
			
		||||
 | 
			
		||||
  argc--;argv++;
 | 
			
		||||
 | 
			
		||||
  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
 | 
			
		||||
  if ((p = getenv("OPENBLAS_LOOPS")))  loops = atoi(p);
 | 
			
		||||
  if ((p = getenv("OPENBLAS_INCX")))   inc_x = atoi(p);
 | 
			
		||||
  if ((p = getenv("OPENBLAS_INCY")))   inc_y = atoi(p);
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "From : %3d  To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
 | 
			
		||||
 | 
			
		||||
  if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#ifdef linux
 | 
			
		||||
  srandom(getpid());
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "   SIZE       Flops\n");
 | 
			
		||||
 | 
			
		||||
  for(m = from; m <= to; m += step)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
   timeg=0;
 | 
			
		||||
 | 
			
		||||
   fprintf(stderr, " %6d : ", (int)m);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   for (l=0; l<loops; l++)
 | 
			
		||||
   {
 | 
			
		||||
 | 
			
		||||
   	for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
 | 
			
		||||
			x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
   	}
 | 
			
		||||
 | 
			
		||||
   	for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
 | 
			
		||||
			y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
   	}
 | 
			
		||||
    	gettimeofday( &start, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    	SWAP (&m, x, &inc_x, y, &inc_y );
 | 
			
		||||
 | 
			
		||||
    	gettimeofday( &stop, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
			
		||||
 | 
			
		||||
	timeg += time1;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    timeg /= loops;
 | 
			
		||||
 | 
			
		||||
    fprintf(stderr,
 | 
			
		||||
	    " %10.2f MBytes\n",
 | 
			
		||||
	    COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
 | 
			
		||||
| 
						 | 
				
			
			@ -130,11 +130,21 @@ int main(int argc, char *argv[]){
 | 
			
		|||
  char trans='N';
 | 
			
		||||
  char diag ='U';
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  int l;
 | 
			
		||||
  int loops = 1;
 | 
			
		||||
  double timeg;
 | 
			
		||||
 | 
			
		||||
  if ((p = getenv("OPENBLAS_SIDE"))) side=*p; 
 | 
			
		||||
  if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
 | 
			
		||||
  if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
 | 
			
		||||
  if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
 | 
			
		||||
 | 
			
		||||
  p = getenv("OPENBLAS_LOOPS");
 | 
			
		||||
  if ( p != NULL )
 | 
			
		||||
        loops = atoi(p);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  blasint m, i, j;
 | 
			
		||||
 | 
			
		||||
  int from =   1;
 | 
			
		||||
| 
						 | 
				
			
			@ -150,7 +160,7 @@ int main(int argc, char *argv[]){
 | 
			
		|||
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "From : %3d  To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag);
 | 
			
		||||
  fprintf(stderr, "From : %3d  To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c Loops = %d\n", from, to, step,side,uplo,trans,diag,loops);
 | 
			
		||||
 | 
			
		||||
  if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
| 
						 | 
				
			
			@ -171,8 +181,14 @@ int main(int argc, char *argv[]){
 | 
			
		|||
  for(m = from; m <= to; m += step)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
	timeg=0.0;
 | 
			
		||||
 | 
			
		||||
        fprintf(stderr, " %6d : ", (int)m);
 | 
			
		||||
 | 
			
		||||
	for (l=0; l<loops; l++)
 | 
			
		||||
    	{
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   		 for(j = 0; j < m; j++){
 | 
			
		||||
      			for(i = 0; i < m * COMPSIZE; i++){
 | 
			
		||||
				a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
| 
						 | 
				
			
			@ -188,11 +204,12 @@ int main(int argc, char *argv[]){
 | 
			
		|||
 | 
			
		||||
    		time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
			
		||||
 | 
			
		||||
    gettimeofday( &start, (struct timezone *)0);
 | 
			
		||||
		timeg += time1;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    fprintf(stderr,
 | 
			
		||||
	    " %10.2f MFlops\n",
 | 
			
		||||
	    COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
 | 
			
		||||
	time1 = timeg/loops;
 | 
			
		||||
 | 
			
		||||
        fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,196 @@
 | 
			
		|||
/***************************************************************************
 | 
			
		||||
Copyright (c) 2014, The OpenBLAS Project
 | 
			
		||||
All rights reserved.
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
modification, are permitted provided that the following conditions are
 | 
			
		||||
met:
 | 
			
		||||
1. Redistributions of source code must retain the above copyright
 | 
			
		||||
notice, this list of conditions and the following disclaimer.
 | 
			
		||||
2. Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
notice, this list of conditions and the following disclaimer in
 | 
			
		||||
the documentation and/or other materials provided with the
 | 
			
		||||
distribution.
 | 
			
		||||
3. Neither the name of the OpenBLAS project nor the names of
 | 
			
		||||
its contributors may be used to endorse or promote products
 | 
			
		||||
derived from this software without specific prior written permission.
 | 
			
		||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
			
		||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
			
		||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | 
			
		||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
			
		||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
			
		||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | 
			
		||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | 
			
		||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
			
		||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
*****************************************************************************/
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#ifdef __CYGWIN32__
 | 
			
		||||
#include <sys/time.h>
 | 
			
		||||
#endif
 | 
			
		||||
#define RETURN_BY_STACK 1
 | 
			
		||||
#include "common.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#undef DOT
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef DOUBLE
 | 
			
		||||
#define DOT   BLASFUNC(zdotu)
 | 
			
		||||
#else
 | 
			
		||||
#define DOT   BLASFUNC(cdotu)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(__WIN32__) || defined(__WIN64__)
 | 
			
		||||
 | 
			
		||||
#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
			
		||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int gettimeofday(struct timeval *tv, void *tz){
 | 
			
		||||
 | 
			
		||||
  FILETIME ft;
 | 
			
		||||
  unsigned __int64 tmpres = 0;
 | 
			
		||||
  static int tzflag;
 | 
			
		||||
 | 
			
		||||
  if (NULL != tv)
 | 
			
		||||
    {
 | 
			
		||||
      GetSystemTimeAsFileTime(&ft);
 | 
			
		||||
 | 
			
		||||
      tmpres |= ft.dwHighDateTime;
 | 
			
		||||
      tmpres <<= 32;
 | 
			
		||||
      tmpres |= ft.dwLowDateTime;
 | 
			
		||||
 | 
			
		||||
      /*converting file time to unix epoch*/
 | 
			
		||||
      tmpres /= 10;  /*convert into microseconds*/
 | 
			
		||||
      tmpres -= DELTA_EPOCH_IN_MICROSECS;
 | 
			
		||||
      tv->tv_sec = (long)(tmpres / 1000000UL);
 | 
			
		||||
      tv->tv_usec = (long)(tmpres % 1000000UL);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
 | 
			
		||||
 | 
			
		||||
static void *huge_malloc(BLASLONG size){
 | 
			
		||||
  int shmid;
 | 
			
		||||
  void *address;
 | 
			
		||||
 | 
			
		||||
#ifndef SHM_HUGETLB
 | 
			
		||||
#define SHM_HUGETLB 04000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  if ((shmid =shmget(IPC_PRIVATE,
 | 
			
		||||
		     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
 | 
			
		||||
		     SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
 | 
			
		||||
    printf( "Memory allocation failed(shmget).\n");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  address = shmat(shmid, NULL, SHM_RND);
 | 
			
		||||
 | 
			
		||||
  if ((BLASLONG)address == -1){
 | 
			
		||||
    printf( "Memory allocation failed(shmat).\n");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  shmctl(shmid, IPC_RMID, 0);
 | 
			
		||||
 | 
			
		||||
  return address;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define malloc huge_malloc
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[]){
 | 
			
		||||
 | 
			
		||||
  FLOAT *x, *y;
 | 
			
		||||
  FLOAT _Complex result;
 | 
			
		||||
  blasint m, i;
 | 
			
		||||
  blasint inc_x=1,inc_y=1;
 | 
			
		||||
  int loops = 1;
 | 
			
		||||
  int l;
 | 
			
		||||
  char *p;
 | 
			
		||||
 | 
			
		||||
  int from =   1;
 | 
			
		||||
  int to   = 200;
 | 
			
		||||
  int step =   1;
 | 
			
		||||
 | 
			
		||||
  struct timeval start, stop;
 | 
			
		||||
  double time1,timeg;
 | 
			
		||||
 | 
			
		||||
  argc--;argv++;
 | 
			
		||||
 | 
			
		||||
  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
 | 
			
		||||
  if ((p = getenv("OPENBLAS_LOOPS")))  loops = atoi(p);
 | 
			
		||||
  if ((p = getenv("OPENBLAS_INCX")))   inc_x = atoi(p);
 | 
			
		||||
  if ((p = getenv("OPENBLAS_INCY")))   inc_y = atoi(p);
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "From : %3d  To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
 | 
			
		||||
 | 
			
		||||
  if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#ifdef linux
 | 
			
		||||
  srandom(getpid());
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "   SIZE       Flops\n");
 | 
			
		||||
 | 
			
		||||
  for(m = from; m <= to; m += step)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
   timeg=0;
 | 
			
		||||
 | 
			
		||||
   fprintf(stderr, " %6d : ", (int)m);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   for (l=0; l<loops; l++)
 | 
			
		||||
   {
 | 
			
		||||
 | 
			
		||||
   	for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
 | 
			
		||||
			x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
   	}
 | 
			
		||||
 | 
			
		||||
   	for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
 | 
			
		||||
			y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
   	}
 | 
			
		||||
    	gettimeofday( &start, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    	DOT (&result, &m, x, &inc_x, y, &inc_y );
 | 
			
		||||
 | 
			
		||||
    	gettimeofday( &stop, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
			
		||||
 | 
			
		||||
	timeg += time1;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    timeg /= loops;
 | 
			
		||||
 | 
			
		||||
    fprintf(stderr,
 | 
			
		||||
	    " %10.2f MFlops\n",
 | 
			
		||||
	    COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,195 @@
 | 
			
		|||
/***************************************************************************
 | 
			
		||||
Copyright (c) 2014, The OpenBLAS Project
 | 
			
		||||
All rights reserved.
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
modification, are permitted provided that the following conditions are
 | 
			
		||||
met:
 | 
			
		||||
1. Redistributions of source code must retain the above copyright
 | 
			
		||||
notice, this list of conditions and the following disclaimer.
 | 
			
		||||
2. Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
notice, this list of conditions and the following disclaimer in
 | 
			
		||||
the documentation and/or other materials provided with the
 | 
			
		||||
distribution.
 | 
			
		||||
3. Neither the name of the OpenBLAS project nor the names of
 | 
			
		||||
its contributors may be used to endorse or promote products
 | 
			
		||||
derived from this software without specific prior written permission.
 | 
			
		||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
			
		||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
			
		||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | 
			
		||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
			
		||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
			
		||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | 
			
		||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | 
			
		||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
			
		||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
*****************************************************************************/
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#ifdef __CYGWIN32__
 | 
			
		||||
#include <sys/time.h>
 | 
			
		||||
#endif
 | 
			
		||||
#include "common.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#undef DOT
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef DOUBLE
 | 
			
		||||
#define DOT   BLASFUNC(zdotu)
 | 
			
		||||
#else
 | 
			
		||||
#define DOT   BLASFUNC(cdotu)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(__WIN32__) || defined(__WIN64__)
 | 
			
		||||
 | 
			
		||||
#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
			
		||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int gettimeofday(struct timeval *tv, void *tz){
 | 
			
		||||
 | 
			
		||||
  FILETIME ft;
 | 
			
		||||
  unsigned __int64 tmpres = 0;
 | 
			
		||||
  static int tzflag;
 | 
			
		||||
 | 
			
		||||
  if (NULL != tv)
 | 
			
		||||
    {
 | 
			
		||||
      GetSystemTimeAsFileTime(&ft);
 | 
			
		||||
 | 
			
		||||
      tmpres |= ft.dwHighDateTime;
 | 
			
		||||
      tmpres <<= 32;
 | 
			
		||||
      tmpres |= ft.dwLowDateTime;
 | 
			
		||||
 | 
			
		||||
      /*converting file time to unix epoch*/
 | 
			
		||||
      tmpres /= 10;  /*convert into microseconds*/
 | 
			
		||||
      tmpres -= DELTA_EPOCH_IN_MICROSECS;
 | 
			
		||||
      tv->tv_sec = (long)(tmpres / 1000000UL);
 | 
			
		||||
      tv->tv_usec = (long)(tmpres % 1000000UL);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
 | 
			
		||||
 | 
			
		||||
static void *huge_malloc(BLASLONG size){
 | 
			
		||||
  int shmid;
 | 
			
		||||
  void *address;
 | 
			
		||||
 | 
			
		||||
#ifndef SHM_HUGETLB
 | 
			
		||||
#define SHM_HUGETLB 04000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  if ((shmid =shmget(IPC_PRIVATE,
 | 
			
		||||
		     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
 | 
			
		||||
		     SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
 | 
			
		||||
    printf( "Memory allocation failed(shmget).\n");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  address = shmat(shmid, NULL, SHM_RND);
 | 
			
		||||
 | 
			
		||||
  if ((BLASLONG)address == -1){
 | 
			
		||||
    printf( "Memory allocation failed(shmat).\n");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  shmctl(shmid, IPC_RMID, 0);
 | 
			
		||||
 | 
			
		||||
  return address;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define malloc huge_malloc
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[]){
 | 
			
		||||
 | 
			
		||||
  FLOAT *x, *y;
 | 
			
		||||
  FLOAT _Complex result;
 | 
			
		||||
  blasint m, i;
 | 
			
		||||
  blasint inc_x=1,inc_y=1;
 | 
			
		||||
  int loops = 1;
 | 
			
		||||
  int l;
 | 
			
		||||
  char *p;
 | 
			
		||||
 | 
			
		||||
  int from =   1;
 | 
			
		||||
  int to   = 200;
 | 
			
		||||
  int step =   1;
 | 
			
		||||
 | 
			
		||||
  struct timeval start, stop;
 | 
			
		||||
  double time1,timeg;
 | 
			
		||||
 | 
			
		||||
  argc--;argv++;
 | 
			
		||||
 | 
			
		||||
  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
			
		||||
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
			
		||||
 | 
			
		||||
  if ((p = getenv("OPENBLAS_LOOPS")))  loops = atoi(p);
 | 
			
		||||
  if ((p = getenv("OPENBLAS_INCX")))   inc_x = atoi(p);
 | 
			
		||||
  if ((p = getenv("OPENBLAS_INCY")))   inc_y = atoi(p);
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "From : %3d  To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
 | 
			
		||||
 | 
			
		||||
  if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
 | 
			
		||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#ifdef linux
 | 
			
		||||
  srandom(getpid());
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "   SIZE       Flops\n");
 | 
			
		||||
 | 
			
		||||
  for(m = from; m <= to; m += step)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
   timeg=0;
 | 
			
		||||
 | 
			
		||||
   fprintf(stderr, " %6d : ", (int)m);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   for (l=0; l<loops; l++)
 | 
			
		||||
   {
 | 
			
		||||
 | 
			
		||||
   	for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
 | 
			
		||||
			x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
   	}
 | 
			
		||||
 | 
			
		||||
   	for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
 | 
			
		||||
			y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
			
		||||
   	}
 | 
			
		||||
    	gettimeofday( &start, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    	result = DOT (&m, x, &inc_x, y, &inc_y );
 | 
			
		||||
 | 
			
		||||
    	gettimeofday( &stop, (struct timezone *)0);
 | 
			
		||||
 | 
			
		||||
    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
			
		||||
 | 
			
		||||
	timeg += time1;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    timeg /= loops;
 | 
			
		||||
 | 
			
		||||
    fprintf(stderr,
 | 
			
		||||
	    " %10.2f MFlops\n",
 | 
			
		||||
	    COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
 | 
			
		||||
							
								
								
									
										5
									
								
								c_check
								
								
								
								
							
							
						
						
									
										5
									
								
								c_check
								
								
								
								
							| 
						 | 
				
			
			@ -4,6 +4,8 @@
 | 
			
		|||
$hostos   = `uname -s | sed -e s/\-.*//`;    chop($hostos);
 | 
			
		||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
 | 
			
		||||
$hostarch = "x86_64" if ($hostarch eq "amd64");
 | 
			
		||||
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
 | 
			
		||||
$hostarch = "arm64" if ($hostarch eq "aarch64");
 | 
			
		||||
 | 
			
		||||
$binary = $ENV{"BINARY"};
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -28,7 +30,7 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) {
 | 
			
		|||
	$cross_suffix = $1;
 | 
			
		||||
    }
 | 
			
		||||
} else {
 | 
			
		||||
    if ($ARGV[0] =~ /(.*-)(.*)/) {
 | 
			
		||||
    if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) {
 | 
			
		||||
	$cross_suffix = $1;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -55,6 +57,7 @@ $os = osf             if ($data =~ /OS_OSF/);
 | 
			
		|||
$os = WINNT           if ($data =~ /OS_WINNT/);
 | 
			
		||||
$os = CYGWIN_NT       if ($data =~ /OS_CYGWIN_NT/);
 | 
			
		||||
$os = Interix         if ($data =~ /OS_INTERIX/);
 | 
			
		||||
$os = Android         if ($data =~ /OS_ANDROID/);
 | 
			
		||||
 | 
			
		||||
$architecture = x86    if ($data =~ /ARCH_X86/);
 | 
			
		||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										350
									
								
								cblas_noconst.h
								
								
								
								
							
							
						
						
									
										350
									
								
								cblas_noconst.h
								
								
								
								
							| 
						 | 
				
			
			@ -1,350 +0,0 @@
 | 
			
		|||
#ifndef CBLAS_H
 | 
			
		||||
#define CBLAS_H
 | 
			
		||||
 | 
			
		||||
#include <stddef.h>
 | 
			
		||||
#include "common.h"
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
extern "C" {
 | 
			
		||||
	/* Assume C declarations for C++ */
 | 
			
		||||
#endif  /* __cplusplus */
 | 
			
		||||
 | 
			
		||||
/*Set the number of threads on runtime.*/
 | 
			
		||||
void openblas_set_num_threads(int num_threads);
 | 
			
		||||
void goto_set_num_threads(int num_threads);
 | 
			
		||||
 | 
			
		||||
/*Get the number of threads on runtime.*/
 | 
			
		||||
int openblas_get_num_threads(void);
 | 
			
		||||
 | 
			
		||||
/*Get the number of physical processors (cores).*/
 | 
			
		||||
int openblas_get_num_procs(void);
 | 
			
		||||
 | 
			
		||||
/*Get the build configure on runtime.*/
 | 
			
		||||
char* openblas_get_config(void);
 | 
			
		||||
 | 
			
		||||
/* Get the parallelization type which is used by OpenBLAS */
 | 
			
		||||
int openblas_get_parallel(void);
 | 
			
		||||
/* OpenBLAS is compiled for sequential use  */
 | 
			
		||||
#define OPENBLAS_SEQUENTIAL  0
 | 
			
		||||
/* OpenBLAS is compiled using normal threading model */
 | 
			
		||||
#define OPENBLAS_THREAD  1
 | 
			
		||||
/* OpenBLAS is compiled using OpenMP threading model */
 | 
			
		||||
#define OPENBLAS_OPENMP 2
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define CBLAS_INDEX size_t
 | 
			
		||||
 | 
			
		||||
typedef enum CBLAS_ORDER     {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
 | 
			
		||||
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
 | 
			
		||||
typedef enum CBLAS_UPLO      {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
 | 
			
		||||
typedef enum CBLAS_DIAG      {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
 | 
			
		||||
typedef enum CBLAS_SIDE      {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
 | 
			
		||||
 | 
			
		||||
float  cblas_sdsdot(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
 | 
			
		||||
double cblas_dsdot (blasint n, float *x, blasint incx, float *y, blasint incy);
 | 
			
		||||
float  cblas_sdot(blasint n, float  *x, blasint incx, float  *y, blasint incy);
 | 
			
		||||
double cblas_ddot(blasint n, double *x, blasint incx, double *y, blasint incy);
 | 
			
		||||
 | 
			
		||||
openblas_complex_float  cblas_cdotu(blasint n, float  *x, blasint incx, float  *y, blasint incy);
 | 
			
		||||
openblas_complex_float  cblas_cdotc(blasint n, float  *x, blasint incx, float  *y, blasint incy);
 | 
			
		||||
openblas_complex_double cblas_zdotu(blasint n, double *x, blasint incx, double *y, blasint incy);
 | 
			
		||||
openblas_complex_double cblas_zdotc(blasint n, double *x, blasint incx, double *y, blasint incy);
 | 
			
		||||
 | 
			
		||||
void  cblas_cdotu_sub(blasint n, float  *x, blasint incx, float  *y, blasint incy, openblas_complex_float  *ret);
 | 
			
		||||
void  cblas_cdotc_sub(blasint n, float  *x, blasint incx, float  *y, blasint incy, openblas_complex_float  *ret);
 | 
			
		||||
void  cblas_zdotu_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
 | 
			
		||||
void  cblas_zdotc_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
 | 
			
		||||
 | 
			
		||||
float  cblas_sasum (blasint n, float  *x, blasint incx);
 | 
			
		||||
double cblas_dasum (blasint n, double *x, blasint incx);
 | 
			
		||||
float  cblas_scasum(blasint n, float  *x, blasint incx);
 | 
			
		||||
double cblas_dzasum(blasint n, double *x, blasint incx);
 | 
			
		||||
 | 
			
		||||
float  cblas_snrm2 (blasint N, float  *X, blasint incX);
 | 
			
		||||
double cblas_dnrm2 (blasint N, double *X, blasint incX);
 | 
			
		||||
float  cblas_scnrm2(blasint N, float  *X, blasint incX);
 | 
			
		||||
double cblas_dznrm2(blasint N, double *X, blasint incX);
 | 
			
		||||
 | 
			
		||||
CBLAS_INDEX cblas_isamax(blasint n, float  *x, blasint incx);
 | 
			
		||||
CBLAS_INDEX cblas_idamax(blasint n, double *x, blasint incx);
 | 
			
		||||
CBLAS_INDEX cblas_icamax(blasint n, float  *x, blasint incx);
 | 
			
		||||
CBLAS_INDEX cblas_izamax(blasint n, double *x, blasint incx);
 | 
			
		||||
 | 
			
		||||
void cblas_saxpy(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
 | 
			
		||||
void cblas_daxpy(blasint n, double alpha, double *x, blasint incx, double *y, blasint incy);
 | 
			
		||||
void cblas_caxpy(blasint n, float *alpha, float *x, blasint incx, float *y, blasint incy);
 | 
			
		||||
void cblas_zaxpy(blasint n, double *alpha, double *x, blasint incx, double *y, blasint incy);
 | 
			
		||||
 | 
			
		||||
void cblas_scopy(blasint n, float *x, blasint incx, float *y, blasint incy);
 | 
			
		||||
void cblas_dcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
 | 
			
		||||
void cblas_ccopy(blasint n, float *x, blasint incx, float *y, blasint incy);
 | 
			
		||||
void cblas_zcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
 | 
			
		||||
 | 
			
		||||
void cblas_sswap(blasint n, float *x, blasint incx, float *y, blasint incy);
 | 
			
		||||
void cblas_dswap(blasint n, double *x, blasint incx, double *y, blasint incy);
 | 
			
		||||
void cblas_cswap(blasint n, float *x, blasint incx, float *y, blasint incy);
 | 
			
		||||
void cblas_zswap(blasint n, double *x, blasint incx, double *y, blasint incy);
 | 
			
		||||
 | 
			
		||||
void cblas_srot(blasint N, float *X, blasint incX, float *Y, blasint incY, float c, float s);
 | 
			
		||||
void cblas_drot(blasint N, double *X, blasint incX, double *Y, blasint incY, double c, double  s);
 | 
			
		||||
 | 
			
		||||
void cblas_srotg(float *a, float *b, float *c, float *s);
 | 
			
		||||
void cblas_drotg(double *a, double *b, double *c, double *s);
 | 
			
		||||
 | 
			
		||||
void cblas_srotm(blasint N, float *X, blasint incX, float *Y, blasint incY, float *P);
 | 
			
		||||
void cblas_drotm(blasint N, double *X, blasint incX, double *Y, blasint incY, double *P);
 | 
			
		||||
 | 
			
		||||
void cblas_srotmg(float *d1, float *d2, float *b1, float b2, float *P);
 | 
			
		||||
void cblas_drotmg(double *d1, double *d2, double *b1, double b2, double *P);
 | 
			
		||||
 | 
			
		||||
void cblas_sscal(blasint N, float alpha, float *X, blasint incX);
 | 
			
		||||
void cblas_dscal(blasint N, double alpha, double *X, blasint incX);
 | 
			
		||||
void cblas_cscal(blasint N, float *alpha, float *X, blasint incX);
 | 
			
		||||
void cblas_zscal(blasint N, double *alpha, double *X, blasint incX);
 | 
			
		||||
void cblas_csscal(blasint N, float alpha, float *X, blasint incX);
 | 
			
		||||
void cblas_zdscal(blasint N, double alpha, double *X, blasint incX);
 | 
			
		||||
 | 
			
		||||
void cblas_sgemv(enum CBLAS_ORDER order,  enum CBLAS_TRANSPOSE trans,  blasint m, blasint n,
 | 
			
		||||
		 float alpha, float  *a, blasint lda,  float  *x, blasint incx,  float beta,  float  *y, blasint incy);
 | 
			
		||||
void cblas_dgemv(enum CBLAS_ORDER order,  enum CBLAS_TRANSPOSE trans,  blasint m, blasint n,
 | 
			
		||||
		 double alpha, double  *a, blasint lda,  double  *x, blasint incx,  double beta,  double  *y, blasint incy);
 | 
			
		||||
void cblas_cgemv(enum CBLAS_ORDER order,  enum CBLAS_TRANSPOSE trans,  blasint m, blasint n,
 | 
			
		||||
		 float *alpha, float  *a, blasint lda,  float  *x, blasint incx,  float *beta,  float  *y, blasint incy);
 | 
			
		||||
void cblas_zgemv(enum CBLAS_ORDER order,  enum CBLAS_TRANSPOSE trans,  blasint m, blasint n,
 | 
			
		||||
		 double *alpha, double  *a, blasint lda,  double  *x, blasint incx,  double *beta,  double  *y, blasint incy);
 | 
			
		||||
 | 
			
		||||
void cblas_sger (enum CBLAS_ORDER order, blasint M, blasint N, float   alpha, float  *X, blasint incX, float  *Y, blasint incY, float  *A, blasint lda);
 | 
			
		||||
void cblas_dger (enum CBLAS_ORDER order, blasint M, blasint N, double  alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
 | 
			
		||||
void cblas_cgeru(enum CBLAS_ORDER order, blasint M, blasint N, float  *alpha, float  *X, blasint incX, float  *Y, blasint incY, float  *A, blasint lda);
 | 
			
		||||
void cblas_cgerc(enum CBLAS_ORDER order, blasint M, blasint N, float  *alpha, float  *X, blasint incX, float  *Y, blasint incY, float  *A, blasint lda);
 | 
			
		||||
void cblas_zgeru(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
 | 
			
		||||
void cblas_zgerc(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
 | 
			
		||||
 | 
			
		||||
void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
 | 
			
		||||
void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
 | 
			
		||||
void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
 | 
			
		||||
void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
 | 
			
		||||
 | 
			
		||||
void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
 | 
			
		||||
void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
 | 
			
		||||
void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
 | 
			
		||||
void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
 | 
			
		||||
 | 
			
		||||
void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
 | 
			
		||||
void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
 | 
			
		||||
void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
 | 
			
		||||
void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
 | 
			
		||||
 | 
			
		||||
void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,blasint N, float alpha, float *X,
 | 
			
		||||
                blasint incX, float *Y, blasint incY, float *A, blasint lda);
 | 
			
		||||
void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,
 | 
			
		||||
                blasint incX, double *Y, blasint incY, double *A, blasint lda);
 | 
			
		||||
void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX,
 | 
			
		||||
                float *Y, blasint incY, float *A, blasint lda);
 | 
			
		||||
void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX,
 | 
			
		||||
                double *Y, blasint incY, double *A, blasint lda);
 | 
			
		||||
 | 
			
		||||
void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
 | 
			
		||||
                 blasint KL, blasint KU, float alpha, float *A, blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
 | 
			
		||||
void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
 | 
			
		||||
                 blasint KL, blasint KU, double alpha, double *A, blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
 | 
			
		||||
void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
 | 
			
		||||
                 blasint KL, blasint KU, float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
 | 
			
		||||
void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
 | 
			
		||||
                 blasint KL, blasint KU, double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
 | 
			
		||||
 | 
			
		||||
void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, float alpha, float *A,
 | 
			
		||||
                 blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
 | 
			
		||||
void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, double alpha, double *A,
 | 
			
		||||
                 blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
 | 
			
		||||
void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
 | 
			
		||||
void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
 | 
			
		||||
void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
 | 
			
		||||
 | 
			
		||||
void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
 | 
			
		||||
void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
 | 
			
		||||
void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
 | 
			
		||||
void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
 | 
			
		||||
 | 
			
		||||
void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, float *Ap, float *X, blasint incX);
 | 
			
		||||
void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, double *Ap, double *X, blasint incX);
 | 
			
		||||
void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, float *Ap, float *X, blasint incX);
 | 
			
		||||
void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, double *Ap, double *X, blasint incX);
 | 
			
		||||
 | 
			
		||||
void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, float *Ap, float *X, blasint incX);
 | 
			
		||||
void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, double *Ap, double *X, blasint incX);
 | 
			
		||||
void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, float *Ap, float *X, blasint incX);
 | 
			
		||||
void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
			
		||||
                 blasint N, double *Ap, double *X, blasint incX);
 | 
			
		||||
 | 
			
		||||
void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *A,
 | 
			
		||||
                 blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
 | 
			
		||||
void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *A,
 | 
			
		||||
                 blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
 | 
			
		||||
void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *A,
 | 
			
		||||
                 blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
 | 
			
		||||
void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *A,
 | 
			
		||||
                 blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *Ap,
 | 
			
		||||
                 float *X, blasint incX, float beta, float *Y, blasint incY);
 | 
			
		||||
void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *Ap,
 | 
			
		||||
                 double *X, blasint incX, double beta, double *Y, blasint incY);
 | 
			
		||||
 | 
			
		||||
void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Ap);
 | 
			
		||||
void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Ap);
 | 
			
		||||
 | 
			
		||||
void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A);
 | 
			
		||||
void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,blasint incX, double *A);
 | 
			
		||||
 | 
			
		||||
void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A);
 | 
			
		||||
void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A);
 | 
			
		||||
void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *Ap);
 | 
			
		||||
void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *Ap);
 | 
			
		||||
 | 
			
		||||
void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
 | 
			
		||||
		 float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
 | 
			
		||||
void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
 | 
			
		||||
		 double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
 | 
			
		||||
 | 
			
		||||
void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
 | 
			
		||||
		 float *alpha, float *Ap, float *X, blasint incX, float *beta, float *Y, blasint incY);
 | 
			
		||||
void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
 | 
			
		||||
		 double *alpha, double *Ap, double *X, blasint incX, double *beta, double *Y, blasint incY);
 | 
			
		||||
 | 
			
		||||
void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
 | 
			
		||||
		 float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
 | 
			
		||||
void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
 | 
			
		||||
		 double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
 | 
			
		||||
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
 | 
			
		||||
		 float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
 | 
			
		||||
void cblas_cgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
 | 
			
		||||
		 float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
 | 
			
		||||
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
 | 
			
		||||
		 double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
 | 
			
		||||
void cblas_zgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
 | 
			
		||||
		 double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
 | 
			
		||||
 | 
			
		||||
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
 | 
			
		||||
                 float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
 | 
			
		||||
void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
 | 
			
		||||
                 double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
 | 
			
		||||
void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
 | 
			
		||||
                 float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
 | 
			
		||||
void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
 | 
			
		||||
                 double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
 | 
			
		||||
 | 
			
		||||
void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
			
		||||
		 blasint N, blasint K, float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
 | 
			
		||||
void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
			
		||||
		 blasint N, blasint K, double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
 | 
			
		||||
void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
			
		||||
		 blasint N, blasint K, float *alpha, float *A, blasint lda, float *beta, float *C, blasint ldc);
 | 
			
		||||
void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
			
		||||
		 blasint N, blasint K, double *alpha, double *A, blasint lda, double *beta, double *C, blasint ldc);
 | 
			
		||||
 | 
			
		||||
void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
			
		||||
		  blasint N, blasint K, float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
 | 
			
		||||
void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
			
		||||
		  blasint N, blasint K, double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
 | 
			
		||||
void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
			
		||||
		  blasint N, blasint K, float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
 | 
			
		||||
void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
			
		||||
		  blasint N, blasint K, double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
 | 
			
		||||
 | 
			
		||||
void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
			
		||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
 | 
			
		||||
void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
			
		||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
 | 
			
		||||
void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
			
		||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
 | 
			
		||||
void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
			
		||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
 | 
			
		||||
 | 
			
		||||
void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
			
		||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
 | 
			
		||||
void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
			
		||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
 | 
			
		||||
void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
			
		||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
 | 
			
		||||
void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
			
		||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
 | 
			
		||||
 | 
			
		||||
void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
 | 
			
		||||
                 float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
 | 
			
		||||
void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
 | 
			
		||||
                 double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
 | 
			
		||||
 | 
			
		||||
void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
 | 
			
		||||
                 float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
 | 
			
		||||
void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
 | 
			
		||||
                 double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
 | 
			
		||||
 | 
			
		||||
void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
 | 
			
		||||
                  float *alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
 | 
			
		||||
void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
 | 
			
		||||
                  double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
 | 
			
		||||
 | 
			
		||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
 | 
			
		||||
 | 
			
		||||
/*** BLAS extensions ***/
 | 
			
		||||
 | 
			
		||||
void cblas_saxpby(blasint n, float alpha, float *x, blasint incx,float beta, float *y, blasint incy);
 | 
			
		||||
 | 
			
		||||
void cblas_daxpby(blasint n, double alpha, double *x, blasint incx,double beta, double *y, blasint incy);
 | 
			
		||||
 | 
			
		||||
void cblas_caxpby(blasint n, float *alpha, float *x, blasint incx,float *beta, float *y, blasint incy);
 | 
			
		||||
 | 
			
		||||
void cblas_zaxpby(blasint n, double *alpha, double *x, blasint incx,double *beta, double *y, blasint incy);
 | 
			
		||||
 | 
			
		||||
void cblas_somatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  float calpha,  float *a, 
 | 
			
		||||
		      blasint clda, float *b,  blasint cldb); 
 | 
			
		||||
void cblas_domatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  double calpha,  double *a,
 | 
			
		||||
		      blasint clda, double *b,  blasint cldb); 
 | 
			
		||||
void cblas_comatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  void* calpha,  void* a, 
 | 
			
		||||
		      blasint clda, void *b,  blasint cldb); 
 | 
			
		||||
void cblas_zomatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  void* calpha,  void* a, 
 | 
			
		||||
		      blasint clda,  void *b,  blasint cldb); 
 | 
			
		||||
 | 
			
		||||
void cblas_simatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  float calpha, float *a, 
 | 
			
		||||
		      blasint clda,  blasint cldb); 
 | 
			
		||||
void cblas_dimatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  double calpha, double *a,
 | 
			
		||||
		      blasint clda,  blasint cldb); 
 | 
			
		||||
void cblas_cimatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  float* calpha, float* a, 
 | 
			
		||||
		      blasint clda,  blasint cldb); 
 | 
			
		||||
void cblas_zimatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  double* calpha, double* a, 
 | 
			
		||||
		      blasint clda,  blasint cldb); 
 | 
			
		||||
 | 
			
		||||
void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows,  blasint ccols,  float calpha, float *a,  blasint clda,  float cbeta, 
 | 
			
		||||
		  float *c,  blasint cldc); 
 | 
			
		||||
void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows,  blasint ccols,  double calpha, double *a,  blasint clda,  double cbeta, 
 | 
			
		||||
		  double *c,  blasint cldc); 
 | 
			
		||||
void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows,  blasint ccols,  float *calpha, float *a,  blasint clda,  float *cbeta, 
 | 
			
		||||
		  float *c,  blasint cldc); 
 | 
			
		||||
void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows,  blasint ccols,  double *calpha, double *a,  blasint clda,  double *cbeta, 
 | 
			
		||||
		  double *c,  blasint cldc); 
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
}
 | 
			
		||||
#endif  /* __cplusplus */
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,115 @@
 | 
			
		|||
##
 | 
			
		||||
## Author: Hank Anderson <hank@statease.com>
 | 
			
		||||
## Description: Ported from portion of OpenBLAS/Makefile.system
 | 
			
		||||
##              Sets various variables based on architecture.
 | 
			
		||||
 | 
			
		||||
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
 | 
			
		||||
 | 
			
		||||
  if (${ARCH} STREQUAL "x86")
 | 
			
		||||
    if (NOT BINARY)
 | 
			
		||||
      set(NO_BINARY_MODE 1)
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (NOT NO_EXPRECISION)
 | 
			
		||||
    if (${F_COMPILER} MATCHES "GFORTRAN")
 | 
			
		||||
      # N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa
 | 
			
		||||
      if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
 | 
			
		||||
        set(EXPRECISION	1)
 | 
			
		||||
        set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double")
 | 
			
		||||
        set(FCOMMON_OPT	"${FCOMMON_OPT} -m128bit-long-double")
 | 
			
		||||
      endif ()
 | 
			
		||||
      if (${CMAKE_C_COMPILER} STREQUAL "Clang")
 | 
			
		||||
        set(EXPRECISION	1)
 | 
			
		||||
        set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION")
 | 
			
		||||
        set(FCOMMON_OPT	"${FCOMMON_OPT} -m128bit-long-double")
 | 
			
		||||
      endif ()
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_C_COMPILER} STREQUAL "Intel")
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -wd981")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (USE_OPENMP)
 | 
			
		||||
 | 
			
		||||
  if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (${CMAKE_C_COMPILER} STREQUAL "Clang")
 | 
			
		||||
    message(WARNING "Clang doesn't support OpenMP yet.")
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (${CMAKE_C_COMPILER} STREQUAL "Intel")
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (${CMAKE_C_COMPILER} STREQUAL "PGI")
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
 | 
			
		||||
    set(CEXTRALIB "${CEXTRALIB} -lstdc++")
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if (DYNAMIC_ARCH)
 | 
			
		||||
  if (${ARCH} STREQUAL "x86")
 | 
			
		||||
    set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (${ARCH} STREQUAL "x86_64")
 | 
			
		||||
    set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
 | 
			
		||||
    if (NOT NO_AVX)
 | 
			
		||||
      set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER")
 | 
			
		||||
    endif ()
 | 
			
		||||
    if (NOT NO_AVX2)
 | 
			
		||||
      set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL")
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (NOT DYNAMIC_CORE)
 | 
			
		||||
    unset(DYNAMIC_ARCH)
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${ARCH} STREQUAL "ia64")
 | 
			
		||||
  set(NO_BINARY_MODE 1)
 | 
			
		||||
  set(BINARY_DEFINED 1)
 | 
			
		||||
 | 
			
		||||
  if (${F_COMPILER} MATCHES "GFORTRAN")
 | 
			
		||||
    if (${CMAKE_C_COMPILER} STREQUAL "GNU")
 | 
			
		||||
      # EXPRECISION	= 1
 | 
			
		||||
      # CCOMMON_OPT	+= -DEXPRECISION
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${ARCH} STREQUAL "mips64")
 | 
			
		||||
  set(NO_BINARY_MODE 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${ARCH} STREQUAL "alpha")
 | 
			
		||||
  set(NO_BINARY_MODE 1)
 | 
			
		||||
  set(BINARY_DEFINED 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${ARCH} STREQUAL "arm")
 | 
			
		||||
  set(NO_BINARY_MODE 1)
 | 
			
		||||
  set(BINARY_DEFINED 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${ARCH} STREQUAL "arm64")
 | 
			
		||||
  set(NO_BINARY_MODE 1)
 | 
			
		||||
  set(BINARY_DEFINED 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,89 @@
 | 
			
		|||
##
 | 
			
		||||
## Author: Hank Anderson <hank@statease.com>
 | 
			
		||||
## Description: Ported from the OpenBLAS/c_check perl script.
 | 
			
		||||
##              This is triggered by prebuild.cmake and runs before any of the code is built.
 | 
			
		||||
##              Creates config.h and Makefile.conf.
 | 
			
		||||
 | 
			
		||||
# CMake vars set by this file:
 | 
			
		||||
# OSNAME (use CMAKE_SYSTEM_NAME)
 | 
			
		||||
# ARCH
 | 
			
		||||
# C_COMPILER (use CMAKE_C_COMPILER)
 | 
			
		||||
# BINARY32
 | 
			
		||||
# BINARY64
 | 
			
		||||
# FU
 | 
			
		||||
# CROSS_SUFFIX
 | 
			
		||||
# CROSS
 | 
			
		||||
# CEXTRALIB
 | 
			
		||||
 | 
			
		||||
# Defines set by this file:
 | 
			
		||||
# OS_
 | 
			
		||||
# ARCH_
 | 
			
		||||
# C_
 | 
			
		||||
# __32BIT__
 | 
			
		||||
# __64BIT__
 | 
			
		||||
# FUNDERSCORE
 | 
			
		||||
# PTHREAD_CREATE_FUNC
 | 
			
		||||
 | 
			
		||||
# N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables.
 | 
			
		||||
set(FU "")
 | 
			
		||||
if(APPLE)
 | 
			
		||||
set(FU "_")
 | 
			
		||||
elseif(MSVC)
 | 
			
		||||
set(FU "_")
 | 
			
		||||
elseif(UNIX)
 | 
			
		||||
set(FU "")
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
# Convert CMake vars into the format that OpenBLAS expects
 | 
			
		||||
string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS)
 | 
			
		||||
if (${HOST_OS} STREQUAL "WINDOWS")
 | 
			
		||||
  set(HOST_OS WINNT)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
# added by hpa - check size of void ptr to detect 64-bit compile
 | 
			
		||||
if (NOT DEFINED BINARY)
 | 
			
		||||
  set(BINARY 32)
 | 
			
		||||
  if (CMAKE_SIZEOF_VOID_P EQUAL 8)
 | 
			
		||||
    set(BINARY 64)
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (BINARY EQUAL 64)
 | 
			
		||||
  set(BINARY64 1)
 | 
			
		||||
else ()
 | 
			
		||||
  set(BINARY32 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
# CMake docs define these:
 | 
			
		||||
# CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for.
 | 
			
		||||
# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on.
 | 
			
		||||
#
 | 
			
		||||
# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check
 | 
			
		||||
set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
 | 
			
		||||
if (${ARCH} STREQUAL "AMD64")
 | 
			
		||||
  set(ARCH "x86_64")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
# If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong
 | 
			
		||||
if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32)
 | 
			
		||||
  set(ARCH x86)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${ARCH} STREQUAL "X86")
 | 
			
		||||
  set(ARCH x86)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
 | 
			
		||||
if (${COMPILER_ID} STREQUAL "GNU")
 | 
			
		||||
  set(COMPILER_ID "GCC")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
string(TOUPPER ${ARCH} UC_ARCH)
 | 
			
		||||
 | 
			
		||||
file(WRITE ${TARGET_CONF}
 | 
			
		||||
  "#define OS_${HOST_OS}\t1\n"
 | 
			
		||||
  "#define ARCH_${UC_ARCH}\t1\n"
 | 
			
		||||
  "#define C_${COMPILER_ID}\t1\n"
 | 
			
		||||
  "#define __${BINARY}BIT__\t1\n"
 | 
			
		||||
  "#define FUNDERSCORE\t${FU}\n")
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,103 @@
 | 
			
		|||
##
 | 
			
		||||
## Author: Hank Anderson <hank@statease.com>
 | 
			
		||||
## Description: Ported from portion of OpenBLAS/Makefile.system
 | 
			
		||||
##              Sets C related variables.
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang")
 | 
			
		||||
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -Wall")
 | 
			
		||||
  set(COMMON_PROF "${COMMON_PROF} -fno-inline")
 | 
			
		||||
  set(NO_UNINITIALIZED_WARN "-Wno-uninitialized")
 | 
			
		||||
 | 
			
		||||
  if (QUIET_MAKE)
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} ${NO_UNINITIALIZED_WARN} -Wno-unused")
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (NO_BINARY_MODE)
 | 
			
		||||
 | 
			
		||||
    if (${ARCH} STREQUAL "mips64")
 | 
			
		||||
      if (BINARY64)
 | 
			
		||||
        set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64")
 | 
			
		||||
      else ()
 | 
			
		||||
        set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=n32")
 | 
			
		||||
      endif ()
 | 
			
		||||
      set(BINARY_DEFINED 1)
 | 
			
		||||
    endif ()
 | 
			
		||||
 | 
			
		||||
    if (${CORE} STREQUAL "LOONGSON3A")
 | 
			
		||||
      set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
 | 
			
		||||
    endif ()
 | 
			
		||||
 | 
			
		||||
    if (${CORE} STREQUAL "LOONGSON3B")
 | 
			
		||||
      set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
 | 
			
		||||
    endif ()
 | 
			
		||||
 | 
			
		||||
    if (${OSNAME} STREQUAL "AIX")
 | 
			
		||||
      set(BINARY_DEFINED 1)
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (NOT BINARY_DEFINED)
 | 
			
		||||
    if (BINARY64)
 | 
			
		||||
      set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
 | 
			
		||||
    else ()
 | 
			
		||||
      set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_C_COMPILER} STREQUAL "PGI")
 | 
			
		||||
  if (BINARY64)
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64")
 | 
			
		||||
  else ()
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
 | 
			
		||||
  if (BINARY64)
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
 | 
			
		||||
  else ()
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
 | 
			
		||||
 | 
			
		||||
  if (${ARCH} STREQUAL "mips64")
 | 
			
		||||
 | 
			
		||||
    if (NOT BINARY64)
 | 
			
		||||
      set(CCOMMON_OPT "${CCOMMON_OPT} -n32")
 | 
			
		||||
    else ()
 | 
			
		||||
      set(CCOMMON_OPT "${CCOMMON_OPT} -n64")
 | 
			
		||||
    endif ()
 | 
			
		||||
 | 
			
		||||
    if (${CORE} STREQUAL "LOONGSON3A")
 | 
			
		||||
      set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static")
 | 
			
		||||
    endif ()
 | 
			
		||||
 | 
			
		||||
    if (${CORE} STREQUAL "LOONGSON3B")
 | 
			
		||||
      set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static")
 | 
			
		||||
    endif ()
 | 
			
		||||
 | 
			
		||||
  else ()
 | 
			
		||||
 | 
			
		||||
    if (BINARY64)
 | 
			
		||||
      set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
 | 
			
		||||
    else ()
 | 
			
		||||
      set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_C_COMPILER} STREQUAL "SUN")
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -w")
 | 
			
		||||
  if (${ARCH} STREQUAL "x86")
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
 | 
			
		||||
  else ()
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,60 @@
 | 
			
		|||
 | 
			
		||||
#Only generate .def for dll on MSVC
 | 
			
		||||
if(MSVC)
 | 
			
		||||
 | 
			
		||||
set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1)
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED ARCH)
 | 
			
		||||
  set(ARCH_IN "x86_64")
 | 
			
		||||
else()
 | 
			
		||||
  set(ARCH_IN ${ARCH})
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
if (${CORE} STREQUAL "generic")
 | 
			
		||||
  set(ARCH_IN "GENERIC")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED EXPRECISION)
 | 
			
		||||
  set(EXPRECISION_IN 0)
 | 
			
		||||
else()
 | 
			
		||||
  set(EXPRECISION_IN ${EXPRECISION})
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED NO_CBLAS)
 | 
			
		||||
  set(NO_CBLAS_IN 0)
 | 
			
		||||
else()
 | 
			
		||||
  set(NO_CBLAS_IN ${NO_CBLAS})
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED NO_LAPACK)
 | 
			
		||||
  set(NO_LAPACK_IN 0)
 | 
			
		||||
else()
 | 
			
		||||
  set(NO_LAPACK_IN ${NO_LAPACK})
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED NO_LAPACKE)
 | 
			
		||||
  set(NO_LAPACKE_IN 0)
 | 
			
		||||
else()
 | 
			
		||||
  set(NO_LAPACKE_IN ${NO_LAPACKE})
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED NEED2UNDERSCORES)
 | 
			
		||||
  set(NEED2UNDERSCORES_IN 0)
 | 
			
		||||
else()
 | 
			
		||||
  set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED ONLY_CBLAS)
 | 
			
		||||
  set(ONLY_CBLAS_IN 0)
 | 
			
		||||
else()
 | 
			
		||||
  set(ONLY_CBLAS_IN ${ONLY_CBLAS})
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
add_custom_command(
 | 
			
		||||
  TARGET ${OpenBLAS_LIBNAME} PRE_LINK 
 | 
			
		||||
  COMMAND perl 
 | 
			
		||||
  ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
 | 
			
		||||
  COMMENT "Create openblas.def file"
 | 
			
		||||
  VERBATIM)
 | 
			
		||||
 | 
			
		||||
endif()
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,66 @@
 | 
			
		|||
##
 | 
			
		||||
## Author: Hank Anderson <hank@statease.com>
 | 
			
		||||
## Copyright: (c) Stat-Ease, Inc.
 | 
			
		||||
## Created: 12/29/14
 | 
			
		||||
## Last Modified: 12/29/14
 | 
			
		||||
## Description: Ported from the OpenBLAS/f_check perl script.
 | 
			
		||||
##              This is triggered by prebuild.cmake and runs before any of the code is built.
 | 
			
		||||
##              Appends Fortran information to config.h and Makefile.conf.
 | 
			
		||||
 | 
			
		||||
# CMake vars set by this file:
 | 
			
		||||
# F_COMPILER
 | 
			
		||||
# FC
 | 
			
		||||
# BU
 | 
			
		||||
# NOFORTRAN
 | 
			
		||||
# NEED2UNDERSCORES
 | 
			
		||||
# FEXTRALIB
 | 
			
		||||
 | 
			
		||||
# Defines set by this file:
 | 
			
		||||
# BUNDERSCORE
 | 
			
		||||
# NEEDBUNDERSCORE
 | 
			
		||||
# NEED2UNDERSCORES
 | 
			
		||||
 | 
			
		||||
if (MSVC)
 | 
			
		||||
  # had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa
 | 
			
		||||
  include(CMakeForceCompiler)
 | 
			
		||||
  CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT NO_LAPACK)
 | 
			
		||||
  enable_language(Fortran)
 | 
			
		||||
else()
 | 
			
		||||
  include(CMakeForceCompiler)
 | 
			
		||||
  CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
if (NOT ONLY_CBLAS)
 | 
			
		||||
  # N.B. f_check is not cross-platform, so instead try to use CMake variables
 | 
			
		||||
  # run f_check (appends to TARGET files)
 | 
			
		||||
#  message(STATUS "Running f_check...")
 | 
			
		||||
#  execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER}
 | 
			
		||||
#    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
 | 
			
		||||
 | 
			
		||||
  # TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile
 | 
			
		||||
  # TODO: set FEXTRALIB flags a la f_check?
 | 
			
		||||
 | 
			
		||||
  set(BU "_")
 | 
			
		||||
  file(APPEND ${TARGET_CONF}
 | 
			
		||||
    "#define BUNDERSCORE _\n"
 | 
			
		||||
    "#define NEEDBUNDERSCORE 1\n"
 | 
			
		||||
    "#define NEED2UNDERSCORES 0\n")
 | 
			
		||||
 | 
			
		||||
else ()
 | 
			
		||||
 | 
			
		||||
  #When we only build CBLAS, we set NOFORTRAN=2
 | 
			
		||||
  set(NOFORTRAN 2)
 | 
			
		||||
  set(NO_FBLAS 1)
 | 
			
		||||
  #set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler
 | 
			
		||||
  set(BU "_")
 | 
			
		||||
  file(APPEND ${TARGET_CONF}
 | 
			
		||||
    "#define BUNDERSCORE _\n"
 | 
			
		||||
    "#define NEEDBUNDERSCORE 1\n")
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
get_filename_component(F_COMPILER ${CMAKE_Fortran_COMPILER} NAME_WE)
 | 
			
		||||
string(TOUPPER ${F_COMPILER} F_COMPILER)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,200 @@
 | 
			
		|||
##
 | 
			
		||||
## Author: Hank Anderson <hank@statease.com>
 | 
			
		||||
## Description: Ported from portion of OpenBLAS/Makefile.system
 | 
			
		||||
##              Sets Fortran related variables.
 | 
			
		||||
 | 
			
		||||
if (${F_COMPILER} STREQUAL "G77")
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77")
 | 
			
		||||
  set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
 | 
			
		||||
  if (NOT NO_BINARY_MODE)
 | 
			
		||||
    if (BINARY64)
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
 | 
			
		||||
    else ()
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${F_COMPILER} STREQUAL "G95")
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95")
 | 
			
		||||
  set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
 | 
			
		||||
  if (NOT NO_BINARY_MODE)
 | 
			
		||||
    if (BINARY64)
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
 | 
			
		||||
    else ()
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${F_COMPILER} STREQUAL "GFORTRAN")
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
 | 
			
		||||
  set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
 | 
			
		||||
  #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
 | 
			
		||||
  if (NOT NO_LAPACK)
 | 
			
		||||
    set(EXTRALIB "{EXTRALIB} -lgfortran")
 | 
			
		||||
  endif ()
 | 
			
		||||
  if (NO_BINARY_MODE)
 | 
			
		||||
    if (${ARCH} STREQUAL "mips64")
 | 
			
		||||
      if (BINARY64)
 | 
			
		||||
        set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
 | 
			
		||||
      else ()
 | 
			
		||||
        set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
 | 
			
		||||
      endif ()
 | 
			
		||||
    endif ()
 | 
			
		||||
  else ()
 | 
			
		||||
    if (BINARY64)
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
 | 
			
		||||
      if (INTERFACE64)
 | 
			
		||||
        set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
 | 
			
		||||
      endif ()
 | 
			
		||||
    else ()
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (USE_OPENMP)
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${F_COMPILER} STREQUAL "INTEL")
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL")
 | 
			
		||||
  if (INTERFACE64)
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
 | 
			
		||||
  endif ()
 | 
			
		||||
  if (USE_OPENMP)
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${F_COMPILER} STREQUAL "FUJITSU")
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU")
 | 
			
		||||
  if (USE_OPENMP)
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${F_COMPILER} STREQUAL "IBM")
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM")
 | 
			
		||||
  # FCOMMON_OPT	+= -qarch=440
 | 
			
		||||
  if (BINARY64)
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -q64")
 | 
			
		||||
    if (INTERFACE64)
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -qintsize=8")
 | 
			
		||||
    endif ()
 | 
			
		||||
  else ()
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -q32")
 | 
			
		||||
  endif ()
 | 
			
		||||
  if (USE_OPENMP)
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${F_COMPILER} STREQUAL "PGI")
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI")
 | 
			
		||||
  set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER")
 | 
			
		||||
  if (BINARY64)
 | 
			
		||||
    if (INTERFACE64)
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
 | 
			
		||||
    endif ()
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7-64")
 | 
			
		||||
  else ()
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7")
 | 
			
		||||
  endif ()
 | 
			
		||||
  if (USE_OPENMP)
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${F_COMPILER} STREQUAL "PATHSCALE")
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE")
 | 
			
		||||
  if (BINARY64)
 | 
			
		||||
    if (INTERFACE64)
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (NOT ${ARCH} STREQUAL "mips64")
 | 
			
		||||
    if (NOT BINARY64)
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
 | 
			
		||||
    else ()
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
 | 
			
		||||
    endif ()
 | 
			
		||||
  else ()
 | 
			
		||||
    if (BINARY64)
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
 | 
			
		||||
    else ()
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (USE_OPENMP)
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${F_COMPILER} STREQUAL "OPEN64")
 | 
			
		||||
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64")
 | 
			
		||||
  if (BINARY64)
 | 
			
		||||
    if (INTERFACE64)
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (${ARCH} STREQUAL "mips64")
 | 
			
		||||
 | 
			
		||||
    if (NOT BINARY64)
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -n32")
 | 
			
		||||
    else ()
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -n64")
 | 
			
		||||
    endif ()
 | 
			
		||||
 | 
			
		||||
    if (${CORE} STREQUAL "LOONGSON3A")
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static")
 | 
			
		||||
    endif ()
 | 
			
		||||
 | 
			
		||||
    if (${CORE} STREQUAL "LOONGSON3B")
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static")
 | 
			
		||||
    endif ()
 | 
			
		||||
  else ()
 | 
			
		||||
    if (NOT BINARY64)
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
 | 
			
		||||
    else ()
 | 
			
		||||
      set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (USE_OPENMP)
 | 
			
		||||
    set(FEXTRALIB "${FEXTRALIB} -lstdc++")
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${F_COMPILER} STREQUAL "SUN")
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN")
 | 
			
		||||
  if (${ARCH} STREQUAL "x86")
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
 | 
			
		||||
  else ()
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
 | 
			
		||||
  endif ()
 | 
			
		||||
  if (USE_OPENMP)
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -xopenmp=parallel")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${F_COMPILER} STREQUAL "COMPAQ")
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ")
 | 
			
		||||
  if (USE_OPENMP)
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
# from the root Makefile - this is for lapack-netlib to compile the correct secnd file.
 | 
			
		||||
if (${F_COMPILER} STREQUAL "GFORTRAN")
 | 
			
		||||
  set(TIMER "INT_ETIME")
 | 
			
		||||
else ()
 | 
			
		||||
  set(TIMER "NONE")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,165 @@
 | 
			
		|||
# helper functions for the kernel CMakeLists.txt
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file.
 | 
			
		||||
macro(SetDefaultL1)
 | 
			
		||||
  set(SAMAXKERNEL amax.S)
 | 
			
		||||
  set(DAMAXKERNEL amax.S)
 | 
			
		||||
  set(QAMAXKERNEL amax.S)
 | 
			
		||||
  set(CAMAXKERNEL zamax.S)
 | 
			
		||||
  set(ZAMAXKERNEL zamax.S)
 | 
			
		||||
  set(XAMAXKERNEL zamax.S)
 | 
			
		||||
  set(SAMINKERNEL amin.S)
 | 
			
		||||
  set(DAMINKERNEL amin.S)
 | 
			
		||||
  set(QAMINKERNEL amin.S)
 | 
			
		||||
  set(CAMINKERNEL zamin.S)
 | 
			
		||||
  set(ZAMINKERNEL zamin.S)
 | 
			
		||||
  set(XAMINKERNEL zamin.S)
 | 
			
		||||
  set(SMAXKERNEL max.S)
 | 
			
		||||
  set(DMAXKERNEL max.S)
 | 
			
		||||
  set(QMAXKERNEL max.S)
 | 
			
		||||
  set(SMINKERNEL min.S)
 | 
			
		||||
  set(DMINKERNEL min.S)
 | 
			
		||||
  set(QMINKERNEL min.S)
 | 
			
		||||
  set(ISAMAXKERNEL iamax.S)
 | 
			
		||||
  set(IDAMAXKERNEL iamax.S)
 | 
			
		||||
  set(IQAMAXKERNEL iamax.S)
 | 
			
		||||
  set(ICAMAXKERNEL izamax.S)
 | 
			
		||||
  set(IZAMAXKERNEL izamax.S)
 | 
			
		||||
  set(IXAMAXKERNEL izamax.S)
 | 
			
		||||
  set(ISAMINKERNEL iamin.S)
 | 
			
		||||
  set(IDAMINKERNEL iamin.S)
 | 
			
		||||
  set(IQAMINKERNEL iamin.S)
 | 
			
		||||
  set(ICAMINKERNEL izamin.S)
 | 
			
		||||
  set(IZAMINKERNEL izamin.S)
 | 
			
		||||
  set(IXAMINKERNEL izamin.S)
 | 
			
		||||
  set(ISMAXKERNEL iamax.S)
 | 
			
		||||
  set(IDMAXKERNEL iamax.S)
 | 
			
		||||
  set(IQMAXKERNEL iamax.S)
 | 
			
		||||
  set(ISMINKERNEL iamin.S)
 | 
			
		||||
  set(IDMINKERNEL iamin.S)
 | 
			
		||||
  set(IQMINKERNEL iamin.S)
 | 
			
		||||
  set(SASUMKERNEL asum.S)
 | 
			
		||||
  set(DASUMKERNEL asum.S)
 | 
			
		||||
  set(CASUMKERNEL zasum.S)
 | 
			
		||||
  set(ZASUMKERNEL zasum.S)
 | 
			
		||||
  set(QASUMKERNEL asum.S)
 | 
			
		||||
  set(XASUMKERNEL zasum.S)
 | 
			
		||||
  set(SAXPYKERNEL axpy.S)
 | 
			
		||||
  set(DAXPYKERNEL axpy.S)
 | 
			
		||||
  set(CAXPYKERNEL zaxpy.S)
 | 
			
		||||
  set(ZAXPYKERNEL zaxpy.S)
 | 
			
		||||
  set(QAXPYKERNEL axpy.S)
 | 
			
		||||
  set(XAXPYKERNEL zaxpy.S)
 | 
			
		||||
  set(SCOPYKERNEL copy.S)
 | 
			
		||||
  set(DCOPYKERNEL copy.S)
 | 
			
		||||
  set(CCOPYKERNEL zcopy.S)
 | 
			
		||||
  set(ZCOPYKERNEL zcopy.S)
 | 
			
		||||
  set(QCOPYKERNEL copy.S)
 | 
			
		||||
  set(XCOPYKERNEL zcopy.S)
 | 
			
		||||
  set(SDOTKERNEL dot.S)
 | 
			
		||||
  set(DDOTKERNEL dot.S)
 | 
			
		||||
  set(CDOTKERNEL zdot.S)
 | 
			
		||||
  set(ZDOTKERNEL zdot.S)
 | 
			
		||||
  set(QDOTKERNEL dot.S)
 | 
			
		||||
  set(XDOTKERNEL zdot.S)
 | 
			
		||||
  set(SNRM2KERNEL nrm2.S)
 | 
			
		||||
  set(DNRM2KERNEL nrm2.S)
 | 
			
		||||
  set(QNRM2KERNEL nrm2.S)
 | 
			
		||||
  set(CNRM2KERNEL znrm2.S)
 | 
			
		||||
  set(ZNRM2KERNEL znrm2.S)
 | 
			
		||||
  set(XNRM2KERNEL znrm2.S)
 | 
			
		||||
  set(SROTKERNEL rot.S)
 | 
			
		||||
  set(DROTKERNEL rot.S)
 | 
			
		||||
  set(QROTKERNEL rot.S)
 | 
			
		||||
  set(CROTKERNEL zrot.S)
 | 
			
		||||
  set(ZROTKERNEL zrot.S)
 | 
			
		||||
  set(XROTKERNEL zrot.S)
 | 
			
		||||
  set(SSCALKERNEL scal.S)
 | 
			
		||||
  set(DSCALKERNEL scal.S)
 | 
			
		||||
  set(CSCALKERNEL zscal.S)
 | 
			
		||||
  set(ZSCALKERNEL zscal.S)
 | 
			
		||||
  set(QSCALKERNEL scal.S)
 | 
			
		||||
  set(XSCALKERNEL zscal.S)
 | 
			
		||||
  set(SSWAPKERNEL swap.S)
 | 
			
		||||
  set(DSWAPKERNEL swap.S)
 | 
			
		||||
  set(CSWAPKERNEL zswap.S)
 | 
			
		||||
  set(ZSWAPKERNEL zswap.S)
 | 
			
		||||
  set(QSWAPKERNEL swap.S)
 | 
			
		||||
  set(XSWAPKERNEL zswap.S)
 | 
			
		||||
  set(SGEMVNKERNEL gemv_n.S)
 | 
			
		||||
  set(SGEMVTKERNEL gemv_t.S)
 | 
			
		||||
  set(DGEMVNKERNEL gemv_n.S)
 | 
			
		||||
  set(DGEMVTKERNEL gemv_t.S)
 | 
			
		||||
  set(CGEMVNKERNEL zgemv_n.S)
 | 
			
		||||
  set(CGEMVTKERNEL zgemv_t.S)
 | 
			
		||||
  set(ZGEMVNKERNEL zgemv_n.S)
 | 
			
		||||
  set(ZGEMVTKERNEL zgemv_t.S)
 | 
			
		||||
  set(QGEMVNKERNEL gemv_n.S)
 | 
			
		||||
  set(QGEMVTKERNEL gemv_t.S)
 | 
			
		||||
  set(XGEMVNKERNEL zgemv_n.S)
 | 
			
		||||
  set(XGEMVTKERNEL zgemv_t.S)
 | 
			
		||||
  set(SCABS_KERNEL ../generic/cabs.c)
 | 
			
		||||
  set(DCABS_KERNEL ../generic/cabs.c)
 | 
			
		||||
  set(QCABS_KERNEL ../generic/cabs.c)
 | 
			
		||||
  set(LSAME_KERNEL ../generic/lsame.c)
 | 
			
		||||
  set(SAXPBYKERNEL ../arm/axpby.c)
 | 
			
		||||
  set(DAXPBYKERNEL ../arm/axpby.c)
 | 
			
		||||
  set(CAXPBYKERNEL ../arm/zaxpby.c)
 | 
			
		||||
  set(ZAXPBYKERNEL ../arm/zaxpby.c)
 | 
			
		||||
endmacro ()
 | 
			
		||||
 | 
			
		||||
macro(SetDefaultL2)
 | 
			
		||||
  set(SGEMVNKERNEL gemv_n.S)
 | 
			
		||||
  set(SGEMVTKERNEL gemv_t.S)
 | 
			
		||||
  set(DGEMVNKERNEL gemv_n.S)
 | 
			
		||||
  set(DGEMVTKERNEL gemv_t.S)
 | 
			
		||||
  set(CGEMVNKERNEL zgemv_n.S)
 | 
			
		||||
  set(CGEMVTKERNEL zgemv_t.S)
 | 
			
		||||
  set(ZGEMVNKERNEL zgemv_n.S)
 | 
			
		||||
  set(ZGEMVTKERNEL zgemv_t.S)
 | 
			
		||||
  set(QGEMVNKERNEL gemv_n.S)
 | 
			
		||||
  set(QGEMVTKERNEL gemv_t.S)
 | 
			
		||||
  set(XGEMVNKERNEL zgemv_n.S)
 | 
			
		||||
  set(XGEMVTKERNEL zgemv_t.S)
 | 
			
		||||
  set(SGERKERNEL ../generic/ger.c)
 | 
			
		||||
  set(DGERKERNEL ../generic/ger.c)
 | 
			
		||||
  set(QGERKERNEL ../generic/ger.c)
 | 
			
		||||
  set(CGERUKERNEL ../generic/zger.c)
 | 
			
		||||
  set(CGERCKERNEL ../generic/zger.c)
 | 
			
		||||
  set(ZGERUKERNEL ../generic/zger.c)
 | 
			
		||||
  set(ZGERCKERNEL ../generic/zger.c)
 | 
			
		||||
  set(XGERUKERNEL ../generic/zger.c)
 | 
			
		||||
  set(XGERCKERNEL ../generic/zger.c)
 | 
			
		||||
  set(SSYMV_U_KERNEL ../generic/symv_k.c)
 | 
			
		||||
  set(SSYMV_L_KERNEL ../generic/symv_k.c)
 | 
			
		||||
  set(DSYMV_U_KERNEL ../generic/symv_k.c)
 | 
			
		||||
  set(DSYMV_L_KERNEL ../generic/symv_k.c)
 | 
			
		||||
  set(QSYMV_U_KERNEL ../generic/symv_k.c)
 | 
			
		||||
  set(QSYMV_L_KERNEL ../generic/symv_k.c)
 | 
			
		||||
  set(CSYMV_U_KERNEL ../generic/zsymv_k.c)
 | 
			
		||||
  set(CSYMV_L_KERNEL ../generic/zsymv_k.c)
 | 
			
		||||
  set(ZSYMV_U_KERNEL ../generic/zsymv_k.c)
 | 
			
		||||
  set(ZSYMV_L_KERNEL ../generic/zsymv_k.c)
 | 
			
		||||
  set(XSYMV_U_KERNEL ../generic/zsymv_k.c)
 | 
			
		||||
  set(XSYMV_L_KERNEL ../generic/zsymv_k.c)
 | 
			
		||||
  set(CHEMV_U_KERNEL ../generic/zhemv_k.c)
 | 
			
		||||
  set(CHEMV_L_KERNEL ../generic/zhemv_k.c)
 | 
			
		||||
  set(CHEMV_V_KERNEL ../generic/zhemv_k.c)
 | 
			
		||||
  set(CHEMV_M_KERNEL ../generic/zhemv_k.c)
 | 
			
		||||
  set(ZHEMV_U_KERNEL ../generic/zhemv_k.c)
 | 
			
		||||
  set(ZHEMV_L_KERNEL ../generic/zhemv_k.c)
 | 
			
		||||
  set(ZHEMV_V_KERNEL ../generic/zhemv_k.c)
 | 
			
		||||
  set(ZHEMV_M_KERNEL ../generic/zhemv_k.c)
 | 
			
		||||
  set(XHEMV_U_KERNEL ../generic/zhemv_k.c)
 | 
			
		||||
  set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
 | 
			
		||||
  set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
 | 
			
		||||
  set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
 | 
			
		||||
endmacro ()
 | 
			
		||||
 | 
			
		||||
macro(SetDefaultL3)
 | 
			
		||||
  set(SGEADD_KERNEL ../generic/geadd.c)
 | 
			
		||||
  set(DGEADD_KERNEL ../generic/geadd.c)
 | 
			
		||||
  set(CGEADD_KERNEL ../generic/zgeadd.c)
 | 
			
		||||
  set(ZGEADD_KERNEL ../generic/zgeadd.c)
 | 
			
		||||
endmacro ()
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,347 @@
 | 
			
		|||
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files.
 | 
			
		||||
 | 
			
		||||
set(ALLAUX
 | 
			
		||||
  ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f
 | 
			
		||||
  ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f
 | 
			
		||||
  ../INSTALL/ilaver.f ../INSTALL/slamch.f
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
set(SCLAUX
 | 
			
		||||
  sbdsdc.f
 | 
			
		||||
  sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f  slaebz.f
 | 
			
		||||
  slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
 | 
			
		||||
  slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f
 | 
			
		||||
  slagts.f slamrg.f slanst.f
 | 
			
		||||
  slapy2.f slapy3.f slarnv.f
 | 
			
		||||
  slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f
 | 
			
		||||
  slarrk.f slarrr.f slaneg.f
 | 
			
		||||
  slartg.f slaruv.f slas2.f  slascl.f
 | 
			
		||||
  slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
 | 
			
		||||
  slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
 | 
			
		||||
  slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
 | 
			
		||||
  slasr.f  slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f
 | 
			
		||||
  ssteqr.f ssterf.f slaisnan.f sisnan.f
 | 
			
		||||
  slartgp.f slartgs.f
 | 
			
		||||
  ../INSTALL/second_${TIMER}.f
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
set(DZLAUX
 | 
			
		||||
  dbdsdc.f
 | 
			
		||||
  dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f  dlaebz.f
 | 
			
		||||
  dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
 | 
			
		||||
  dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
 | 
			
		||||
  dlagts.f dlamrg.f dlanst.f
 | 
			
		||||
  dlapy2.f dlapy3.f dlarnv.f
 | 
			
		||||
  dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f
 | 
			
		||||
  dlarrk.f dlarrr.f dlaneg.f
 | 
			
		||||
  dlartg.f dlaruv.f dlas2.f  dlascl.f
 | 
			
		||||
  dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
 | 
			
		||||
  dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
 | 
			
		||||
  dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
 | 
			
		||||
  dlasr.f  dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
 | 
			
		||||
  dsteqr.f dsterf.f dlaisnan.f disnan.f
 | 
			
		||||
  dlartgp.f dlartgs.f
 | 
			
		||||
  ../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
set(SLASRC
 | 
			
		||||
  sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
 | 
			
		||||
  sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
 | 
			
		||||
  sgebrd.f sgecon.f sgeequ.f sgees.f  sgeesx.f sgeev.f  sgeevx.f
 | 
			
		||||
  sgegs.f  sgegv.f  sgehd2.f sgehrd.f sgelq2.f sgelqf.f
 | 
			
		||||
  sgels.f  sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
 | 
			
		||||
  sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
 | 
			
		||||
  sgerq2.f sgerqf.f sgesc2.f sgesdd.f  sgesvd.f sgesvx.f
 | 
			
		||||
  sgetc2.f sgetri.f
 | 
			
		||||
  sggbak.f sggbal.f sgges.f  sggesx.f sggev.f  sggevx.f
 | 
			
		||||
  sggglm.f sgghrd.f sgglse.f sggqrf.f
 | 
			
		||||
  sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f
 | 
			
		||||
  sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
 | 
			
		||||
  shsein.f shseqr.f slabrd.f slacon.f slacn2.f
 | 
			
		||||
  slaein.f slaexc.f slag2.f  slags2.f slagtm.f slagv2.f slahqr.f
 | 
			
		||||
  slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
 | 
			
		||||
  slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
 | 
			
		||||
  slansy.f slantb.f slantp.f slantr.f slanv2.f
 | 
			
		||||
  slapll.f slapmt.f
 | 
			
		||||
  slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
 | 
			
		||||
  slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
 | 
			
		||||
  slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
 | 
			
		||||
  slarf.f  slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
 | 
			
		||||
  slarrv.f slartv.f
 | 
			
		||||
  slarz.f  slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
 | 
			
		||||
  slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f
 | 
			
		||||
  sopgtr.f sopmtr.f sorg2l.f sorg2r.f
 | 
			
		||||
  sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
 | 
			
		||||
  sorgrq.f sorgtr.f sorm2l.f sorm2r.f
 | 
			
		||||
  sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
 | 
			
		||||
  sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
 | 
			
		||||
  spbstf.f spbsv.f  spbsvx.f
 | 
			
		||||
  spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f
 | 
			
		||||
  sposvx.f spstrf.f spstf2.f
 | 
			
		||||
  sppcon.f sppequ.f
 | 
			
		||||
  spprfs.f sppsv.f  sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f
 | 
			
		||||
  spteqr.f sptrfs.f sptsv.f  sptsvx.f spttrs.f sptts2.f srscl.f
 | 
			
		||||
  ssbev.f  ssbevd.f ssbevx.f ssbgst.f ssbgv.f  ssbgvd.f ssbgvx.f
 | 
			
		||||
  ssbtrd.f sspcon.f sspev.f  sspevd.f sspevx.f sspgst.f
 | 
			
		||||
  sspgv.f  sspgvd.f sspgvx.f ssprfs.f sspsv.f  sspsvx.f ssptrd.f
 | 
			
		||||
  ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f  sstevd.f sstevr.f
 | 
			
		||||
  sstevx.f
 | 
			
		||||
  ssycon.f ssyev.f  ssyevd.f ssyevr.f ssyevx.f ssygs2.f
 | 
			
		||||
  ssygst.f ssygv.f  ssygvd.f ssygvx.f ssyrfs.f ssysv.f  ssysvx.f
 | 
			
		||||
  ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
 | 
			
		||||
  ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f
 | 
			
		||||
  ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f
 | 
			
		||||
  ssytri_rook.f ssycon_rook.f ssysv_rook.f
 | 
			
		||||
  stbcon.f
 | 
			
		||||
  stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
 | 
			
		||||
  stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
 | 
			
		||||
  stptrs.f
 | 
			
		||||
  strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
 | 
			
		||||
  strtrs.f stzrqf.f stzrzf.f sstemr.f
 | 
			
		||||
  slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
 | 
			
		||||
  stfttr.f stpttf.f stpttr.f strttf.f strttp.f
 | 
			
		||||
  sgejsv.f  sgesvj.f  sgsvj0.f  sgsvj1.f
 | 
			
		||||
  sgeequb.f ssyequb.f spoequb.f sgbequb.f
 | 
			
		||||
  sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f
 | 
			
		||||
  sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
 | 
			
		||||
  sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
 | 
			
		||||
  stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
set(DSLASRC spotrs.f)
 | 
			
		||||
 | 
			
		||||
set(CLASRC
 | 
			
		||||
  cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f  cgbsvx.f
 | 
			
		||||
  cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
 | 
			
		||||
  cgecon.f cgeequ.f cgees.f  cgeesx.f cgeev.f  cgeevx.f
 | 
			
		||||
  cgegs.f  cgegv.f  cgehd2.f cgehrd.f cgelq2.f cgelqf.f
 | 
			
		||||
  cgels.f  cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
 | 
			
		||||
  cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
 | 
			
		||||
  cgerq2.f cgerqf.f cgesc2.f cgesdd.f  cgesvd.f
 | 
			
		||||
  cgesvx.f cgetc2.f cgetri.f
 | 
			
		||||
  cggbak.f cggbal.f cgges.f  cggesx.f cggev.f  cggevx.f cggglm.f
 | 
			
		||||
  cgghrd.f cgglse.f cggqrf.f cggrqf.f
 | 
			
		||||
  cggsvd.f cggsvp.f
 | 
			
		||||
  cgtcon.f cgtrfs.f cgtsv.f  cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
 | 
			
		||||
  chbevd.f chbevx.f chbgst.f chbgv.f  chbgvd.f chbgvx.f chbtrd.f
 | 
			
		||||
  checon.f cheev.f  cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
 | 
			
		||||
  chegv.f  chegvd.f chegvx.f cherfs.f chesv.f  chesvx.f chetd2.f
 | 
			
		||||
  chetf2.f chetrd.f
 | 
			
		||||
  chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f
 | 
			
		||||
  chetrs.f chetrs2.f
 | 
			
		||||
  chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f
 | 
			
		||||
  chgeqz.f chpcon.f chpev.f  chpevd.f
 | 
			
		||||
  chpevx.f chpgst.f chpgv.f  chpgvd.f chpgvx.f chprfs.f chpsv.f
 | 
			
		||||
  chpsvx.f
 | 
			
		||||
  chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f
 | 
			
		||||
  clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f
 | 
			
		||||
  claed0.f claed7.f claed8.f
 | 
			
		||||
  claein.f claesy.f claev2.f clags2.f clagtm.f
 | 
			
		||||
  clahef.f clahef_rook.f clahqr.f
 | 
			
		||||
  clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
 | 
			
		||||
  clanhb.f clanhe.f
 | 
			
		||||
  clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
 | 
			
		||||
  clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
 | 
			
		||||
  claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
 | 
			
		||||
  claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
 | 
			
		||||
  claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
 | 
			
		||||
  clarf.f  clarfb.f clarfg.f clarft.f clarfgp.f
 | 
			
		||||
  clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
 | 
			
		||||
  clarz.f  clarzb.f clarzt.f clascl.f claset.f clasr.f  classq.f
 | 
			
		||||
  clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
 | 
			
		||||
  clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
 | 
			
		||||
  cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
 | 
			
		||||
  cposv.f  cposvx.f cpstrf.f cpstf2.f
 | 
			
		||||
  cppcon.f cppequ.f cpprfs.f cppsv.f  cppsvx.f cpptrf.f cpptri.f cpptrs.f
 | 
			
		||||
  cptcon.f cpteqr.f cptrfs.f cptsv.f  cptsvx.f cpttrf.f cpttrs.f cptts2.f
 | 
			
		||||
  crot.f   cspcon.f csprfs.f cspsv.f
 | 
			
		||||
  cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
 | 
			
		||||
  cstegr.f cstein.f csteqr.f
 | 
			
		||||
  csycon.f
 | 
			
		||||
  csyrfs.f csysv.f  csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f
 | 
			
		||||
  csyswapr.f csytrs.f csytrs2.f csyconv.f
 | 
			
		||||
  csytf2_rook.f csytrf_rook.f csytrs_rook.f
 | 
			
		||||
  csytri_rook.f csycon_rook.f csysv_rook.f
 | 
			
		||||
  ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
 | 
			
		||||
  ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
 | 
			
		||||
  ctprfs.f ctptri.f
 | 
			
		||||
  ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
 | 
			
		||||
  ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f
 | 
			
		||||
  cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
 | 
			
		||||
  cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
 | 
			
		||||
  cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
 | 
			
		||||
  cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f
 | 
			
		||||
  chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f
 | 
			
		||||
  ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f
 | 
			
		||||
  cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f
 | 
			
		||||
  cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f
 | 
			
		||||
  cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
 | 
			
		||||
  cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
 | 
			
		||||
  ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
set(ZCLASRC cpotrs.f)
 | 
			
		||||
 | 
			
		||||
set(DLASRC
 | 
			
		||||
  dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
 | 
			
		||||
  dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
 | 
			
		||||
  dgebrd.f dgecon.f dgeequ.f dgees.f  dgeesx.f dgeev.f  dgeevx.f
 | 
			
		||||
  dgegs.f  dgegv.f  dgehd2.f dgehrd.f dgelq2.f dgelqf.f
 | 
			
		||||
  dgels.f  dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
 | 
			
		||||
  dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
 | 
			
		||||
  dgerq2.f dgerqf.f dgesc2.f dgesdd.f  dgesvd.f dgesvx.f
 | 
			
		||||
  dgetc2.f dgetri.f
 | 
			
		||||
  dggbak.f dggbal.f dgges.f  dggesx.f dggev.f  dggevx.f
 | 
			
		||||
  dggglm.f dgghrd.f dgglse.f dggqrf.f
 | 
			
		||||
  dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
 | 
			
		||||
  dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
 | 
			
		||||
  dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
 | 
			
		||||
  dlaein.f dlaexc.f dlag2.f  dlags2.f dlagtm.f dlagv2.f dlahqr.f
 | 
			
		||||
  dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
 | 
			
		||||
  dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
 | 
			
		||||
  dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
 | 
			
		||||
  dlapll.f dlapmt.f
 | 
			
		||||
  dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
 | 
			
		||||
  dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
 | 
			
		||||
  dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
 | 
			
		||||
  dlarf.f  dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
 | 
			
		||||
  dlargv.f dlarrv.f dlartv.f
 | 
			
		||||
  dlarz.f  dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
 | 
			
		||||
  dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f
 | 
			
		||||
  dopgtr.f dopmtr.f dorg2l.f dorg2r.f
 | 
			
		||||
  dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
 | 
			
		||||
  dorgrq.f dorgtr.f dorm2l.f dorm2r.f
 | 
			
		||||
  dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
 | 
			
		||||
  dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
 | 
			
		||||
  dpbstf.f dpbsv.f  dpbsvx.f
 | 
			
		||||
  dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
 | 
			
		||||
  dposvx.f dpotrs.f dpstrf.f dpstf2.f
 | 
			
		||||
  dppcon.f dppequ.f
 | 
			
		||||
  dpprfs.f dppsv.f  dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
 | 
			
		||||
  dpteqr.f dptrfs.f dptsv.f  dptsvx.f dpttrs.f dptts2.f drscl.f
 | 
			
		||||
  dsbev.f  dsbevd.f dsbevx.f dsbgst.f dsbgv.f  dsbgvd.f dsbgvx.f
 | 
			
		||||
  dsbtrd.f  dspcon.f dspev.f  dspevd.f dspevx.f dspgst.f
 | 
			
		||||
  dspgv.f  dspgvd.f dspgvx.f dsprfs.f dspsv.f  dspsvx.f dsptrd.f
 | 
			
		||||
  dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f  dstevd.f dstevr.f
 | 
			
		||||
  dstevx.f
 | 
			
		||||
  dsycon.f dsyev.f  dsyevd.f dsyevr.f
 | 
			
		||||
  dsyevx.f dsygs2.f dsygst.f dsygv.f  dsygvd.f dsygvx.f dsyrfs.f
 | 
			
		||||
  dsysv.f  dsysvx.f
 | 
			
		||||
  dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f
 | 
			
		||||
  dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f
 | 
			
		||||
  dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f
 | 
			
		||||
  dsytri_rook.f dsycon_rook.f dsysv_rook.f
 | 
			
		||||
  dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
 | 
			
		||||
  dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
 | 
			
		||||
  dtptrs.f
 | 
			
		||||
  dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
 | 
			
		||||
  dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f
 | 
			
		||||
  dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
 | 
			
		||||
  dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
 | 
			
		||||
  dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
 | 
			
		||||
  dgejsv.f  dgesvj.f  dgsvj0.f  dgsvj1.f
 | 
			
		||||
  dgeequb.f dsyequb.f dpoequb.f dgbequb.f
 | 
			
		||||
  dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f
 | 
			
		||||
  dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
 | 
			
		||||
  dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
 | 
			
		||||
  dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
set(ZLASRC
 | 
			
		||||
  zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f  zgbsvx.f
 | 
			
		||||
  zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
 | 
			
		||||
  zgecon.f zgeequ.f zgees.f  zgeesx.f zgeev.f  zgeevx.f
 | 
			
		||||
  zgegs.f  zgegv.f  zgehd2.f zgehrd.f zgelq2.f zgelqf.f
 | 
			
		||||
  zgels.f  zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
 | 
			
		||||
  zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
 | 
			
		||||
  zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f
 | 
			
		||||
  zgetri.f
 | 
			
		||||
  zggbak.f zggbal.f zgges.f  zggesx.f zggev.f  zggevx.f zggglm.f
 | 
			
		||||
  zgghrd.f zgglse.f zggqrf.f zggrqf.f
 | 
			
		||||
  zggsvd.f zggsvp.f
 | 
			
		||||
  zgtcon.f zgtrfs.f zgtsv.f  zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
 | 
			
		||||
  zhbevd.f zhbevx.f zhbgst.f zhbgv.f  zhbgvd.f zhbgvx.f zhbtrd.f
 | 
			
		||||
  zhecon.f zheev.f  zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
 | 
			
		||||
  zhegv.f  zhegvd.f zhegvx.f zherfs.f zhesv.f  zhesvx.f zhetd2.f
 | 
			
		||||
  zhetf2.f zhetrd.f
 | 
			
		||||
  zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f
 | 
			
		||||
  zhetrs.f zhetrs2.f
 | 
			
		||||
  zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f
 | 
			
		||||
  zhgeqz.f zhpcon.f zhpev.f  zhpevd.f
 | 
			
		||||
  zhpevx.f zhpgst.f zhpgv.f  zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f
 | 
			
		||||
  zhpsvx.f
 | 
			
		||||
  zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f
 | 
			
		||||
  zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f
 | 
			
		||||
  zlaed0.f zlaed7.f zlaed8.f
 | 
			
		||||
  zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
 | 
			
		||||
  zlahef.f zlahef_rook.f zlahqr.f
 | 
			
		||||
  zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
 | 
			
		||||
  zlangt.f zlanhb.f
 | 
			
		||||
  zlanhe.f
 | 
			
		||||
  zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
 | 
			
		||||
  zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f
 | 
			
		||||
  zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
 | 
			
		||||
  zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
 | 
			
		||||
  zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
 | 
			
		||||
  zlarcm.f zlarf.f  zlarfb.f
 | 
			
		||||
  zlarfg.f zlarft.f zlarfgp.f
 | 
			
		||||
  zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
 | 
			
		||||
  zlarz.f  zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
 | 
			
		||||
  zlassq.f zlasyf.f zlasyf_rook.f
 | 
			
		||||
  zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f
 | 
			
		||||
  zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
 | 
			
		||||
  zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
 | 
			
		||||
  zposv.f  zposvx.f zpotrs.f zpstrf.f zpstf2.f
 | 
			
		||||
  zppcon.f zppequ.f zpprfs.f zppsv.f  zppsvx.f zpptrf.f zpptri.f zpptrs.f
 | 
			
		||||
  zptcon.f zpteqr.f zptrfs.f zptsv.f  zptsvx.f zpttrf.f zpttrs.f zptts2.f
 | 
			
		||||
  zrot.f   zspcon.f zsprfs.f zspsv.f
 | 
			
		||||
  zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
 | 
			
		||||
  zstegr.f zstein.f zsteqr.f
 | 
			
		||||
  zsycon.f
 | 
			
		||||
  zsyrfs.f zsysv.f  zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f
 | 
			
		||||
  zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f
 | 
			
		||||
  zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f
 | 
			
		||||
  zsytri_rook.f zsycon_rook.f zsysv_rook.f
 | 
			
		||||
  ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
 | 
			
		||||
  ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
 | 
			
		||||
  ztprfs.f ztptri.f
 | 
			
		||||
  ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
 | 
			
		||||
  ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f
 | 
			
		||||
  zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
 | 
			
		||||
  zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f
 | 
			
		||||
  zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
 | 
			
		||||
  zunmtr.f zupgtr.f
 | 
			
		||||
  zupmtr.f izmax1.f dzsum1.f zstemr.f
 | 
			
		||||
  zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f
 | 
			
		||||
  zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f
 | 
			
		||||
  ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f
 | 
			
		||||
  zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f
 | 
			
		||||
  zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f
 | 
			
		||||
  zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
 | 
			
		||||
  zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
 | 
			
		||||
  ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
set(LA_REL_SRC ${ALLAUX})
 | 
			
		||||
if (BUILD_SINGLE)
 | 
			
		||||
  list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX})
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (BUILD_DOUBLE)
 | 
			
		||||
  list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX})
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (BUILD_COMPLEX)
 | 
			
		||||
  list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX})
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (BUILD_COMPLEX16)
 | 
			
		||||
  list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX})
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
# add lapack-netlib folder to the sources
 | 
			
		||||
set(LA_SOURCES "")
 | 
			
		||||
foreach (LA_FILE ${LA_REL_SRC})
 | 
			
		||||
  list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}")
 | 
			
		||||
endforeach ()
 | 
			
		||||
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}")
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| 
						 | 
				
			
			@ -0,0 +1,104 @@
 | 
			
		|||
##
 | 
			
		||||
## Author: Hank Anderson <hank@statease.com>
 | 
			
		||||
## Description: Ported from portion of OpenBLAS/Makefile.system
 | 
			
		||||
##              Detects the OS and sets appropriate variables.
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
 | 
			
		||||
  set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var
 | 
			
		||||
  set(MD5SUM "md5 -r")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
 | 
			
		||||
  set(MD5SUM "md5 -r")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD")
 | 
			
		||||
  set(MD5SUM "md5 -n")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
 | 
			
		||||
  set(EXTRALIB "${EXTRALIB} -lm")
 | 
			
		||||
  set(NO_EXPRECISION 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "AIX")
 | 
			
		||||
  set(EXTRALIB "${EXTRALIB} -lm")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
# TODO: this is probably meant for mingw, not other windows compilers
 | 
			
		||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
 | 
			
		||||
 | 
			
		||||
  set(NEED_PIC 0)
 | 
			
		||||
  set(NO_EXPRECISION 1)
 | 
			
		||||
 | 
			
		||||
  set(EXTRALIB "${EXTRALIB} -defaultlib:advapi32")
 | 
			
		||||
 | 
			
		||||
  # probably not going to use these
 | 
			
		||||
  set(SUFFIX "obj")
 | 
			
		||||
  set(PSUFFIX "pobj")
 | 
			
		||||
  set(LIBSUFFIX "a")
 | 
			
		||||
 | 
			
		||||
  if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
 | 
			
		||||
    set(CCOMMON_OPT	"${CCOMMON_OPT} -DMS_ABI")
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
 | 
			
		||||
 | 
			
		||||
    # Test for supporting MS_ABI
 | 
			
		||||
    # removed string parsing in favor of CMake's version comparison -hpa
 | 
			
		||||
    execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
 | 
			
		||||
    if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
 | 
			
		||||
      # GCC Version >=4.7
 | 
			
		||||
      # It is compatible with MSVC ABI.
 | 
			
		||||
      set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI")
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  # Ensure the correct stack alignment on Win32
 | 
			
		||||
  # http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
 | 
			
		||||
  if (${ARCH} STREQUAL "x86")
 | 
			
		||||
    if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
 | 
			
		||||
      set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2")
 | 
			
		||||
    endif ()
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2")
 | 
			
		||||
  endif ()
 | 
			
		||||
  
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
 | 
			
		||||
  set(NEED_PIC 0)
 | 
			
		||||
  set(NO_EXPRECISION 1)
 | 
			
		||||
  
 | 
			
		||||
  set(INTERIX_TOOL_DIR STREQUAL "/opt/gcc.3.3/i586-pc-interix3/bin")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (CYGWIN)
 | 
			
		||||
  set(NEED_PIC 0)
 | 
			
		||||
  set(NO_EXPRECISION 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
 | 
			
		||||
  if (SMP)
 | 
			
		||||
    set(EXTRALIB "${EXTRALIB} -lpthread")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (QUAD_PRECISION)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DQUAD_PRECISION")
 | 
			
		||||
  set(NO_EXPRECISION 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${ARCH} STREQUAL "x86")
 | 
			
		||||
  set(NO_EXPRECISION 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (UTEST_CHECK)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK")
 | 
			
		||||
  set(SANITY_CHECK 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (SANITY_CHECK)
 | 
			
		||||
  # TODO: need some way to get $(*F) (target filename)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DSANITY_CHECK -DREFNAME=$(*F)f${BU}")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,113 @@
 | 
			
		|||
##
 | 
			
		||||
## Author: Hank Anderson <hank@statease.com>
 | 
			
		||||
## Description: Ported from OpenBLAS/Makefile.prebuild
 | 
			
		||||
##              This is triggered by system.cmake and runs before any of the code is built.
 | 
			
		||||
##              Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files).
 | 
			
		||||
##              Next it runs f_check and appends some fortran information to the files.
 | 
			
		||||
##              Finally it runs getarch and getarch_2nd for even more environment information.
 | 
			
		||||
 | 
			
		||||
# CMake vars set by this file:
 | 
			
		||||
# CORE
 | 
			
		||||
# LIBCORE
 | 
			
		||||
# NUM_CORES
 | 
			
		||||
# HAVE_MMX
 | 
			
		||||
# HAVE_SSE
 | 
			
		||||
# HAVE_SSE2
 | 
			
		||||
# HAVE_SSE3
 | 
			
		||||
# MAKE
 | 
			
		||||
# SGEMM_UNROLL_M
 | 
			
		||||
# SGEMM_UNROLL_N
 | 
			
		||||
# DGEMM_UNROLL_M
 | 
			
		||||
# DGEMM_UNROLL_M
 | 
			
		||||
# QGEMM_UNROLL_N
 | 
			
		||||
# QGEMM_UNROLL_N
 | 
			
		||||
# CGEMM_UNROLL_M
 | 
			
		||||
# CGEMM_UNROLL_M
 | 
			
		||||
# ZGEMM_UNROLL_N
 | 
			
		||||
# ZGEMM_UNROLL_N
 | 
			
		||||
# XGEMM_UNROLL_M
 | 
			
		||||
# XGEMM_UNROLL_N
 | 
			
		||||
# CGEMM3M_UNROLL_M
 | 
			
		||||
# CGEMM3M_UNROLL_N
 | 
			
		||||
# ZGEMM3M_UNROLL_M
 | 
			
		||||
# ZGEMM3M_UNROLL_M
 | 
			
		||||
# XGEMM3M_UNROLL_N
 | 
			
		||||
# XGEMM3M_UNROLL_N
 | 
			
		||||
 | 
			
		||||
# CPUIDEMU = ../../cpuid/table.o
 | 
			
		||||
 | 
			
		||||
if (DEFINED CPUIDEMU)
 | 
			
		||||
  set(EXFLAGS "-DCPUIDEMU -DVENDOR=99")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (DEFINED TARGET_CORE)
 | 
			
		||||
  # set the C flags for just this file
 | 
			
		||||
  set(GETARCH2_FLAGS "-DBUILD_KERNEL")
 | 
			
		||||
  set(TARGET_MAKE "Makefile_kernel.conf")
 | 
			
		||||
  set(TARGET_CONF "config_kernel.h")
 | 
			
		||||
else()
 | 
			
		||||
  set(TARGET_MAKE "Makefile.conf")
 | 
			
		||||
  set(TARGET_CONF "config.h")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake")
 | 
			
		||||
 | 
			
		||||
if (NOT NOFORTRAN)
 | 
			
		||||
  include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
# compile getarch
 | 
			
		||||
set(GETARCH_SRC
 | 
			
		||||
  ${CMAKE_SOURCE_DIR}/getarch.c
 | 
			
		||||
  ${CPUIDEMO}
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
if (NOT MSVC)
 | 
			
		||||
  list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (MSVC)
 | 
			
		||||
#Use generic for MSVC now
 | 
			
		||||
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
 | 
			
		||||
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
 | 
			
		||||
file(MAKE_DIRECTORY ${GETARCH_DIR})
 | 
			
		||||
try_compile(GETARCH_RESULT ${GETARCH_DIR}
 | 
			
		||||
  SOURCES ${GETARCH_SRC}
 | 
			
		||||
  COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
 | 
			
		||||
  OUTPUT_VARIABLE GETARCH_LOG
 | 
			
		||||
  COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
message(STATUS "Running getarch")
 | 
			
		||||
 | 
			
		||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
 | 
			
		||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
 | 
			
		||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
 | 
			
		||||
 | 
			
		||||
message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
 | 
			
		||||
 | 
			
		||||
# append config data from getarch to the TARGET file and read in CMake vars
 | 
			
		||||
file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT})
 | 
			
		||||
ParseGetArchVars(${GETARCH_MAKE_OUT})
 | 
			
		||||
 | 
			
		||||
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
 | 
			
		||||
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
 | 
			
		||||
file(MAKE_DIRECTORY ${GETARCH2_DIR})
 | 
			
		||||
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
 | 
			
		||||
  SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
 | 
			
		||||
  COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
 | 
			
		||||
  OUTPUT_VARIABLE GETARCH2_LOG
 | 
			
		||||
  COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
 | 
			
		||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
 | 
			
		||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
 | 
			
		||||
 | 
			
		||||
# append config data from getarch_2nd to the TARGET file and read in CMake vars
 | 
			
		||||
file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT})
 | 
			
		||||
ParseGetArchVars(${GETARCH2_MAKE_OUT})
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,552 @@
 | 
			
		|||
##
 | 
			
		||||
## Author: Hank Anderson <hank@statease.com>
 | 
			
		||||
## Description: Ported from OpenBLAS/Makefile.system
 | 
			
		||||
##
 | 
			
		||||
 | 
			
		||||
set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib")
 | 
			
		||||
 | 
			
		||||
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa
 | 
			
		||||
# http://stackoverflow.com/questions/714100/os-detecting-makefile
 | 
			
		||||
 | 
			
		||||
# TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa
 | 
			
		||||
 | 
			
		||||
# TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
 | 
			
		||||
if (DEFINED TARGET_CORE)
 | 
			
		||||
  set(TARGET ${TARGET_CORE})
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
# Force fallbacks for 32bit
 | 
			
		||||
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
 | 
			
		||||
  message(STATUS "Compiling a ${BINARY}-bit binary.")
 | 
			
		||||
  set(NO_AVX 1)
 | 
			
		||||
  if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
 | 
			
		||||
    set(TARGET "NEHALEM")
 | 
			
		||||
  endif ()
 | 
			
		||||
  if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER")
 | 
			
		||||
    set(TARGET "BARCELONA")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (DEFINED TARGET)
 | 
			
		||||
  message(STATUS "Targetting the ${TARGET} architecture.")
 | 
			
		||||
  set(GETARCH_FLAGS "-DFORCE_${TARGET}")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (INTERFACE64)
 | 
			
		||||
  message(STATUS "Using 64-bit integers.")
 | 
			
		||||
  set(GETARCH_FLAGS	"${GETARCH_FLAGS} -DUSE64BITINT")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED GEMM_MULTITHREAD_THRESHOLD)
 | 
			
		||||
  set(GEMM_MULTITHREAD_THRESHOLD 4)
 | 
			
		||||
endif ()
 | 
			
		||||
message(STATUS "GEMM multithread threshold set to ${GEMM_MULTITHREAD_THRESHOLD}.")
 | 
			
		||||
set(GETARCH_FLAGS	"${GETARCH_FLAGS} -DGEMM_MULTITHREAD_THRESHOLD=${GEMM_MULTITHREAD_THRESHOLD}")
 | 
			
		||||
 | 
			
		||||
if (NO_AVX)
 | 
			
		||||
  message(STATUS "Disabling Advanced Vector Extensions (AVX).")
 | 
			
		||||
  set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NO_AVX2)
 | 
			
		||||
  message(STATUS "Disabling Advanced Vector Extensions 2 (AVX2).")
 | 
			
		||||
  set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (CMAKE_BUILD_TYPE STREQUAL Debug)
 | 
			
		||||
  set(GETARCH_FLAGS "${GETARCH_FLAGS} -g")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
# TODO: let CMake handle this? -hpa
 | 
			
		||||
#if (${QUIET_MAKE})
 | 
			
		||||
#  set(MAKE "${MAKE} -s")
 | 
			
		||||
#endif()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED NO_PARALLEL_MAKE)
 | 
			
		||||
  set(NO_PARALLEL_MAKE 0)
 | 
			
		||||
endif ()
 | 
			
		||||
set(GETARCH_FLAGS	"${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}")
 | 
			
		||||
 | 
			
		||||
if (CMAKE_CXX_COMPILER STREQUAL loongcc)
 | 
			
		||||
  set(GETARCH_FLAGS	"${GETARCH_FLAGS} -static")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
#if don't use Fortran, it will only compile CBLAS.
 | 
			
		||||
if (ONLY_CBLAS)
 | 
			
		||||
  set(NO_LAPACK 1)
 | 
			
		||||
else ()
 | 
			
		||||
  set(ONLY_CBLAS 0)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake")
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED NUM_THREADS)
 | 
			
		||||
  set(NUM_THREADS ${NUM_CORES})
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${NUM_THREADS} EQUAL 1)
 | 
			
		||||
  set(USE_THREAD 0)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (DEFINED USE_THREAD)
 | 
			
		||||
  if (NOT ${USE_THREAD})
 | 
			
		||||
    unset(SMP)
 | 
			
		||||
  else ()
 | 
			
		||||
    set(SMP 1)
 | 
			
		||||
  endif ()
 | 
			
		||||
else ()
 | 
			
		||||
  # N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa
 | 
			
		||||
  if (${NUM_THREADS} EQUAL 1)
 | 
			
		||||
    unset(SMP)
 | 
			
		||||
  else ()
 | 
			
		||||
    set(SMP 1)
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${SMP})
 | 
			
		||||
  message(STATUS "SMP enabled.")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED NEED_PIC)
 | 
			
		||||
  set(NEED_PIC 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
# TODO: I think CMake should be handling all this stuff -hpa
 | 
			
		||||
unset(ARFLAGS)
 | 
			
		||||
set(CPP "${COMPILER} -E")
 | 
			
		||||
set(AR "${CROSS_SUFFIX}ar")
 | 
			
		||||
set(AS "${CROSS_SUFFIX}as")
 | 
			
		||||
set(LD "${CROSS_SUFFIX}ld")
 | 
			
		||||
set(RANLIB "${CROSS_SUFFIX}ranlib")
 | 
			
		||||
set(NM "${CROSS_SUFFIX}nm")
 | 
			
		||||
set(DLLWRAP "${CROSS_SUFFIX}dllwrap")
 | 
			
		||||
set(OBJCOPY "${CROSS_SUFFIX}objcopy")
 | 
			
		||||
set(OBJCONV "${CROSS_SUFFIX}objconv")
 | 
			
		||||
 | 
			
		||||
# OS dependent settings
 | 
			
		||||
include("${CMAKE_SOURCE_DIR}/cmake/os.cmake")
 | 
			
		||||
 | 
			
		||||
# Architecture dependent settings
 | 
			
		||||
include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake")
 | 
			
		||||
 | 
			
		||||
# C Compiler dependent settings
 | 
			
		||||
include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake")
 | 
			
		||||
 | 
			
		||||
if (NOT NOFORTRAN)
 | 
			
		||||
  # Fortran Compiler dependent settings
 | 
			
		||||
  include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (BINARY64)
 | 
			
		||||
  if (INTERFACE64)
 | 
			
		||||
    # CCOMMON_OPT += -DUSE64BITINT
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NEED_PIC)
 | 
			
		||||
  if (${CMAKE_C_COMPILER} STREQUAL "IBM")
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large")
 | 
			
		||||
  else ()
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC")
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (${F_COMPILER} STREQUAL "SUN")
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -pic")
 | 
			
		||||
  else ()
 | 
			
		||||
    set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (DYNAMIC_ARCH)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NO_LAPACK)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACK")
 | 
			
		||||
  #Disable LAPACK C interface
 | 
			
		||||
  set(NO_LAPACKE 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NO_LAPACKE)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACKE")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NO_AVX)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${ARCH} STREQUAL "x86")
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NO_AVX2)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (SMP)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER")
 | 
			
		||||
 | 
			
		||||
  if (${ARCH} STREQUAL "mips64")
 | 
			
		||||
    if (NOT ${CORE} STREQUAL "LOONGSON3B")
 | 
			
		||||
      set(USE_SIMPLE_THREADED_LEVEL3 1)
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (USE_OPENMP)
 | 
			
		||||
    # USE_SIMPLE_THREADED_LEVEL3 = 1
 | 
			
		||||
    # NO_AFFINITY = 1
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP")
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (BIGNUMA)
 | 
			
		||||
    set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA")
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NO_WARMUP)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_WARMUP")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (CONSISTENT_FPCSR)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DCONSISTENT_FPCSR")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
# Only for development
 | 
			
		||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPARAMTEST")
 | 
			
		||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPREFETCHTEST")
 | 
			
		||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_SWITCHING")
 | 
			
		||||
# set(USE_PAPI 1)
 | 
			
		||||
 | 
			
		||||
if (USE_PAPI)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_PAPI")
 | 
			
		||||
  set(EXTRALIB "${EXTRALIB} -lpapi -lperfctr")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (DYNAMIC_THREADS)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_THREADS")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}")
 | 
			
		||||
 | 
			
		||||
if (USE_SIMPLE_THREADED_LEVEL3)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (DEFINED LIBNAMESUFFIX)
 | 
			
		||||
  set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}")
 | 
			
		||||
else ()
 | 
			
		||||
  set(LIBPREFIX "libopenblas")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED SYMBOLPREFIX)
 | 
			
		||||
  set(SYMBOLPREFIX "")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED SYMBOLSUFFIX)
 | 
			
		||||
  set(SYMBOLSUFFIX "")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
set(KERNELDIR	"${CMAKE_SOURCE_DIR}/kernel/${ARCH}")
 | 
			
		||||
 | 
			
		||||
# TODO: nead to convert these Makefiles
 | 
			
		||||
# include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake
 | 
			
		||||
 | 
			
		||||
if (${CORE} STREQUAL "PPC440")
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${CORE} STREQUAL "PPC440FP2")
 | 
			
		||||
  set(STATIC_ALLOCATION 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
 | 
			
		||||
  set(NO_AFFINITY 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B")
 | 
			
		||||
  set(NO_AFFINITY 1)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NO_AFFINITY)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AFFINITY")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (FUNCTION_PROFILE)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DFUNCTION_PROFILE")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (HUGETLB_ALLOCATION)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLB")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (DEFINED HUGETLBFILE_ALLOCATION)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=${HUGETLBFILE_ALLOCATION})")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (STATIC_ALLOCATION)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_STATIC")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (DEVICEDRIVER_ALLOCATION)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_DEVICEDRIVER -DDEVICEDRIVER_NAME=\"/dev/mapper\"")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (MIXED_MEMORY_ALLOCATION)
 | 
			
		||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS")
 | 
			
		||||
  set(TAR	gtar)
 | 
			
		||||
  set(PATCH	gpatch)
 | 
			
		||||
  set(GREP ggrep)
 | 
			
		||||
else ()
 | 
			
		||||
  set(TAR tar)
 | 
			
		||||
  set(PATCH patch)
 | 
			
		||||
  set(GREP grep)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED MD5SUM)
 | 
			
		||||
  set(MD5SUM md5sum)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
set(AWK awk)
 | 
			
		||||
 | 
			
		||||
set(REVISION "-r${OpenBLAS_VERSION}")
 | 
			
		||||
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
 | 
			
		||||
 | 
			
		||||
if (DEBUG)
 | 
			
		||||
  set(COMMON_OPT "${COMMON_OPT} -g")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED COMMON_OPT)
 | 
			
		||||
  set(COMMON_OPT "-O2")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
#For x86 32-bit
 | 
			
		||||
if (DEFINED BINARY AND BINARY EQUAL 32)
 | 
			
		||||
if (NOT MSVC)
 | 
			
		||||
  set(COMMON_OPT "${COMMON_OPT} -m32")
 | 
			
		||||
endif()
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
 | 
			
		||||
if(NOT MSVC)
 | 
			
		||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
 | 
			
		||||
endif()
 | 
			
		||||
# TODO: not sure what PFLAGS is -hpa
 | 
			
		||||
set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}")
 | 
			
		||||
 | 
			
		||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}")
 | 
			
		||||
# TODO: not sure what FPFLAGS is -hpa
 | 
			
		||||
set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}")
 | 
			
		||||
 | 
			
		||||
#For LAPACK Fortran codes.
 | 
			
		||||
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}")
 | 
			
		||||
set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}")
 | 
			
		||||
 | 
			
		||||
#Disable -fopenmp for LAPACK Fortran codes on Windows.
 | 
			
		||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
 | 
			
		||||
  set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel")
 | 
			
		||||
  foreach (FILTER_FLAG ${FILTER_FLAGS})
 | 
			
		||||
    string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS})
 | 
			
		||||
    string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS})
 | 
			
		||||
  endforeach ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if ("${F_COMPILER}" STREQUAL "GFORTRAN")
 | 
			
		||||
  # lapack-netlib is rife with uninitialized warnings -hpa
 | 
			
		||||
  set(LAPACK_FFLAGS "${LAPACK_FFLAGS} -Wno-maybe-uninitialized")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
set(LAPACK_CFLAGS "${CMAKE_C_CFLAGS} -DHAVE_LAPACK_CONFIG_H")
 | 
			
		||||
if (INTERFACE64)
 | 
			
		||||
  set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_ILP64")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
 | 
			
		||||
  set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DOPENBLAS_OS_WINDOWS")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
 | 
			
		||||
  set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED SUFFIX)
 | 
			
		||||
  set(SUFFIX o)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED PSUFFIX)
 | 
			
		||||
  set(PSUFFIX po)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT DEFINED LIBSUFFIX)
 | 
			
		||||
  set(LIBSUFFIX a)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (DYNAMIC_ARCH)
 | 
			
		||||
  if (DEFINED SMP)
 | 
			
		||||
    set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}")
 | 
			
		||||
    set(LIBNAME_P	"${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}")
 | 
			
		||||
  else ()
 | 
			
		||||
    set(LIBNAME "${LIBPREFIX}${REVISION}.${LIBSUFFIX}")
 | 
			
		||||
    set(LIBNAME_P	"${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}")
 | 
			
		||||
  endif ()
 | 
			
		||||
else ()
 | 
			
		||||
  if (DEFINED SMP)
 | 
			
		||||
    set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}")
 | 
			
		||||
    set(LIBNAME_P	"${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}")
 | 
			
		||||
  else ()
 | 
			
		||||
    set(LIBNAME	"${LIBPREFIX}_${LIBCORE}${REVISION}.${LIBSUFFIX}")
 | 
			
		||||
    set(LIBNAME_P	"${LIBPREFIX}_${LIBCORE}${REVISION}_p.${LIBSUFFIX}")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
set(LIBDLLNAME "${LIBPREFIX}.dll")
 | 
			
		||||
set(LIBSONAME "${LIBNAME}.${LIBSUFFIX}.so")
 | 
			
		||||
set(LIBDYNNAME "${LIBNAME}.${LIBSUFFIX}.dylib")
 | 
			
		||||
set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def")
 | 
			
		||||
set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp")
 | 
			
		||||
set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip")
 | 
			
		||||
 | 
			
		||||
set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}")
 | 
			
		||||
set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
set(LIB_COMPONENTS BLAS)
 | 
			
		||||
if (NOT NO_CBLAS)
 | 
			
		||||
  set(LIB_COMPONENTS "${LIB_COMPONENTS} CBLAS")
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (NOT NO_LAPACK)
 | 
			
		||||
  set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACK")
 | 
			
		||||
  if (NOT NO_LAPACKE)
 | 
			
		||||
    set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE")
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
if (ONLY_CBLAS)
 | 
			
		||||
  set(LIB_COMPONENTS CBLAS)
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# For GEMM3M
 | 
			
		||||
set(USE_GEMM3M 0)
 | 
			
		||||
 | 
			
		||||
if (DEFINED ARCH)
 | 
			
		||||
  if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
 | 
			
		||||
    set(USE_GEMM3M 1)
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (${CORE} STREQUAL "generic")
 | 
			
		||||
    set(USE_GEMM3M 0)
 | 
			
		||||
  endif ()
 | 
			
		||||
endif ()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#export OSNAME
 | 
			
		||||
#export ARCH
 | 
			
		||||
#export CORE
 | 
			
		||||
#export LIBCORE
 | 
			
		||||
#export PGCPATH
 | 
			
		||||
#export CONFIG
 | 
			
		||||
#export CC
 | 
			
		||||
#export FC
 | 
			
		||||
#export BU
 | 
			
		||||
#export FU
 | 
			
		||||
#export NEED2UNDERSCORES
 | 
			
		||||
#export USE_THREAD
 | 
			
		||||
#export NUM_THREADS
 | 
			
		||||
#export NUM_CORES
 | 
			
		||||
#export SMP
 | 
			
		||||
#export MAKEFILE_RULE
 | 
			
		||||
#export NEED_PIC
 | 
			
		||||
#export BINARY
 | 
			
		||||
#export BINARY32
 | 
			
		||||
#export BINARY64
 | 
			
		||||
#export F_COMPILER
 | 
			
		||||
#export C_COMPILER
 | 
			
		||||
#export USE_OPENMP
 | 
			
		||||
#export CROSS
 | 
			
		||||
#export CROSS_SUFFIX
 | 
			
		||||
#export NOFORTRAN
 | 
			
		||||
#export NO_FBLAS
 | 
			
		||||
#export EXTRALIB
 | 
			
		||||
#export CEXTRALIB
 | 
			
		||||
#export FEXTRALIB
 | 
			
		||||
#export HAVE_SSE
 | 
			
		||||
#export HAVE_SSE2
 | 
			
		||||
#export HAVE_SSE3
 | 
			
		||||
#export HAVE_SSSE3
 | 
			
		||||
#export HAVE_SSE4_1
 | 
			
		||||
#export HAVE_SSE4_2
 | 
			
		||||
#export HAVE_SSE4A
 | 
			
		||||
#export HAVE_SSE5
 | 
			
		||||
#export HAVE_AVX
 | 
			
		||||
#export HAVE_VFP
 | 
			
		||||
#export HAVE_VFPV3
 | 
			
		||||
#export HAVE_VFPV4
 | 
			
		||||
#export HAVE_NEON
 | 
			
		||||
#export KERNELDIR
 | 
			
		||||
#export FUNCTION_PROFILE
 | 
			
		||||
#export TARGET_CORE
 | 
			
		||||
#
 | 
			
		||||
#export SGEMM_UNROLL_M
 | 
			
		||||
#export SGEMM_UNROLL_N
 | 
			
		||||
#export DGEMM_UNROLL_M
 | 
			
		||||
#export DGEMM_UNROLL_N
 | 
			
		||||
#export QGEMM_UNROLL_M
 | 
			
		||||
#export QGEMM_UNROLL_N
 | 
			
		||||
#export CGEMM_UNROLL_M
 | 
			
		||||
#export CGEMM_UNROLL_N
 | 
			
		||||
#export ZGEMM_UNROLL_M
 | 
			
		||||
#export ZGEMM_UNROLL_N
 | 
			
		||||
#export XGEMM_UNROLL_M
 | 
			
		||||
#export XGEMM_UNROLL_N
 | 
			
		||||
#export CGEMM3M_UNROLL_M
 | 
			
		||||
#export CGEMM3M_UNROLL_N
 | 
			
		||||
#export ZGEMM3M_UNROLL_M
 | 
			
		||||
#export ZGEMM3M_UNROLL_N
 | 
			
		||||
#export XGEMM3M_UNROLL_M
 | 
			
		||||
#export XGEMM3M_UNROLL_N
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if (USE_CUDA)
 | 
			
		||||
#  export CUDADIR
 | 
			
		||||
#  export CUCC
 | 
			
		||||
#  export CUFLAGS
 | 
			
		||||
#  export CULIB
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f
 | 
			
		||||
#
 | 
			
		||||
#.f.$(SUFFIX):
 | 
			
		||||
#	$(FC) $(FFLAGS) -c $<  -o $(@F)
 | 
			
		||||
#
 | 
			
		||||
#.f.$(PSUFFIX):
 | 
			
		||||
#	$(FC) $(FPFLAGS) -pg -c $<  -o $(@F)
 | 
			
		||||
 | 
			
		||||
# these are not cross-platform
 | 
			
		||||
#ifdef BINARY64
 | 
			
		||||
#PATHSCALEPATH	= /opt/pathscale/lib/3.1
 | 
			
		||||
#PGIPATH		= /opt/pgi/linux86-64/7.1-5/lib
 | 
			
		||||
#else
 | 
			
		||||
#PATHSCALEPATH	= /opt/pathscale/lib/3.1/32
 | 
			
		||||
#PGIPATH		= /opt/pgi/linux86/7.1-5/lib
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ACMLPATH	= /opt/acml/4.3.0
 | 
			
		||||
#ifneq ($(OSNAME), Darwin)
 | 
			
		||||
#MKLPATH         = /opt/intel/mkl/10.2.2.025/lib
 | 
			
		||||
#else
 | 
			
		||||
#MKLPATH         = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib
 | 
			
		||||
#endif
 | 
			
		||||
#ATLASPATH	= /opt/atlas/3.9.17/opteron
 | 
			
		||||
#FLAMEPATH	= $(HOME)/flame/lib
 | 
			
		||||
#ifneq ($(OSNAME), SunOS)
 | 
			
		||||
#SUNPATH		= /opt/sunstudio12.1
 | 
			
		||||
#else
 | 
			
		||||
#SUNPATH		= /opt/SUNWspro
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,346 @@
 | 
			
		|||
# Functions to help with the OpenBLAS build
 | 
			
		||||
 | 
			
		||||
# Reads string from getarch into CMake vars. Format of getarch vars is VARNAME=VALUE
 | 
			
		||||
function(ParseGetArchVars GETARCH_IN)
 | 
			
		||||
  string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_IN}")
 | 
			
		||||
  foreach (GETARCH_LINE ${GETARCH_RESULT_LIST})
 | 
			
		||||
    # split the line into var and value, then assign the value to a CMake var
 | 
			
		||||
    string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}")
 | 
			
		||||
    list(GET SPLIT_VAR 0 VAR_NAME)
 | 
			
		||||
    list(GET SPLIT_VAR 1 VAR_VALUE)
 | 
			
		||||
    set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE)
 | 
			
		||||
  endforeach ()
 | 
			
		||||
endfunction ()
 | 
			
		||||
 | 
			
		||||
# Reads a Makefile into CMake vars.
 | 
			
		||||
macro(ParseMakefileVars MAKEFILE_IN)
 | 
			
		||||
  message(STATUS "Reading vars from ${MAKEFILE_IN}...")
 | 
			
		||||
  file(STRINGS ${MAKEFILE_IN} makefile_contents)
 | 
			
		||||
  foreach (makefile_line ${makefile_contents})
 | 
			
		||||
    string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}")
 | 
			
		||||
    if (NOT "${line_match}" STREQUAL "")
 | 
			
		||||
      set(var_name ${CMAKE_MATCH_1})
 | 
			
		||||
      set(var_value ${CMAKE_MATCH_2})
 | 
			
		||||
      # check for Makefile variables in the string, e.g. $(TSUFFIX)
 | 
			
		||||
      string(REGEX MATCHALL "\\$\\(([0-9_a-zA-Z]+)\\)" make_var_matches ${var_value})
 | 
			
		||||
      foreach (make_var ${make_var_matches})
 | 
			
		||||
        # strip out Makefile $() markup
 | 
			
		||||
        string(REGEX REPLACE "\\$\\(([0-9_a-zA-Z]+)\\)" "\\1" make_var ${make_var})
 | 
			
		||||
        # now replace the instance of the Makefile variable with the value of the CMake variable (note the double quote)
 | 
			
		||||
        string(REPLACE "$(${make_var})" "${${make_var}}" var_value ${var_value})
 | 
			
		||||
      endforeach ()
 | 
			
		||||
      set(${var_name} ${var_value})
 | 
			
		||||
    else ()
 | 
			
		||||
      string(REGEX MATCH "include \\$\\(KERNELDIR\\)/(.+)$" line_match "${makefile_line}")
 | 
			
		||||
      if (NOT "${line_match}" STREQUAL "")
 | 
			
		||||
        ParseMakefileVars(${KERNELDIR}/${CMAKE_MATCH_1})
 | 
			
		||||
      endif ()
 | 
			
		||||
    endif ()
 | 
			
		||||
  endforeach ()
 | 
			
		||||
endmacro ()
 | 
			
		||||
 | 
			
		||||
# Returns all combinations of the input list, as a list with colon-separated combinations
 | 
			
		||||
# E.g. input of A B C returns A B C A:B A:C B:C
 | 
			
		||||
# N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")).
 | 
			
		||||
# #param absent_codes codes to use when an element is absent from a combination. For example, if you have TRANS;UNIT;UPPER you may want the code to be NNL when nothing is present.
 | 
			
		||||
# @returns LIST_OUT a list of combinations
 | 
			
		||||
#          CODES_OUT a list of codes corresponding to each combination, with N meaning the item is not present, and the first letter of the list item meaning it is presen
 | 
			
		||||
function(AllCombinations list_in absent_codes_in)
 | 
			
		||||
  list(LENGTH list_in list_count)
 | 
			
		||||
  set(num_combos 1)
 | 
			
		||||
  # subtract 1 since we will iterate from 0 to num_combos
 | 
			
		||||
  math(EXPR num_combos "(${num_combos} << ${list_count}) - 1")
 | 
			
		||||
  set(LIST_OUT "")
 | 
			
		||||
  set(CODES_OUT "")
 | 
			
		||||
  foreach (c RANGE 0 ${num_combos})
 | 
			
		||||
 | 
			
		||||
    set(current_combo "")
 | 
			
		||||
    set(current_code "")
 | 
			
		||||
 | 
			
		||||
    # this is a little ridiculous just to iterate through a list w/ indices
 | 
			
		||||
    math(EXPR last_list_index "${list_count} - 1")
 | 
			
		||||
    foreach (list_index RANGE 0 ${last_list_index})
 | 
			
		||||
      math(EXPR bit "1 << ${list_index}")
 | 
			
		||||
      math(EXPR combo_has_bit "${c} & ${bit}")
 | 
			
		||||
      list(GET list_in ${list_index} list_elem)
 | 
			
		||||
      if (combo_has_bit)
 | 
			
		||||
        if (current_combo)
 | 
			
		||||
          set(current_combo "${current_combo}:${list_elem}")
 | 
			
		||||
        else ()
 | 
			
		||||
          set(current_combo ${list_elem})
 | 
			
		||||
        endif ()
 | 
			
		||||
        string(SUBSTRING ${list_elem} 0 1 code_char)
 | 
			
		||||
      else ()
 | 
			
		||||
        list(GET absent_codes_in ${list_index} code_char)
 | 
			
		||||
      endif ()
 | 
			
		||||
      set(current_code "${current_code}${code_char}")
 | 
			
		||||
    endforeach ()
 | 
			
		||||
 | 
			
		||||
    if (current_combo STREQUAL "")
 | 
			
		||||
      list(APPEND LIST_OUT " ") # Empty set is a valid combination, but CMake isn't appending the empty string for some reason, use a space
 | 
			
		||||
    else ()
 | 
			
		||||
      list(APPEND LIST_OUT ${current_combo})
 | 
			
		||||
    endif ()
 | 
			
		||||
    list(APPEND CODES_OUT ${current_code})
 | 
			
		||||
 | 
			
		||||
  endforeach ()
 | 
			
		||||
 | 
			
		||||
  set(LIST_OUT ${LIST_OUT} PARENT_SCOPE)
 | 
			
		||||
  set(CODES_OUT ${CODES_OUT} PARENT_SCOPE)
 | 
			
		||||
endfunction ()
 | 
			
		||||
 | 
			
		||||
# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition
 | 
			
		||||
# @param sources_in the source files to build from
 | 
			
		||||
# @param defines_in (optional) preprocessor definitions that will be applied to all objects
 | 
			
		||||
# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended.
 | 
			
		||||
#                           e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax"
 | 
			
		||||
# @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU)
 | 
			
		||||
# @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters)
 | 
			
		||||
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
 | 
			
		||||
# @param complex_filename_scheme some routines have separate source files for complex and non-complex float types.
 | 
			
		||||
#                               0 - compiles for all types
 | 
			
		||||
#                               1 - compiles the sources for non-complex types only (SINGLE/DOUBLE)
 | 
			
		||||
#                               2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX)
 | 
			
		||||
#                               3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c)
 | 
			
		||||
#                               4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c)
 | 
			
		||||
#                               STRING - compiles only the given type (e.g. DOUBLE)
 | 
			
		||||
function(GenerateNamedObjects sources_in)
 | 
			
		||||
 | 
			
		||||
  if (DEFINED ARGV1)
 | 
			
		||||
    set(defines_in ${ARGV1})
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (DEFINED ARGV2 AND NOT "${ARGV2}" STREQUAL "")
 | 
			
		||||
    set(name_in ${ARGV2})
 | 
			
		||||
    # strip off extension for kernel files that pass in the object name.
 | 
			
		||||
    get_filename_component(name_in ${name_in} NAME_WE)
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (DEFINED ARGV3)
 | 
			
		||||
    set(use_cblas ${ARGV3})
 | 
			
		||||
  else ()
 | 
			
		||||
    set(use_cblas false)
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (DEFINED ARGV4)
 | 
			
		||||
    set(replace_last_with ${ARGV4})
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (DEFINED ARGV5)
 | 
			
		||||
    set(append_with ${ARGV5})
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (DEFINED ARGV6)
 | 
			
		||||
    set(no_float_type ${ARGV6})
 | 
			
		||||
  else ()
 | 
			
		||||
    set(no_float_type false)
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (no_float_type)
 | 
			
		||||
    set(float_list "DUMMY") # still need to loop once
 | 
			
		||||
  else ()
 | 
			
		||||
    set(float_list "${FLOAT_TYPES}")
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  set(real_only false)
 | 
			
		||||
  set(complex_only false)
 | 
			
		||||
  set(mangle_complex_sources false)
 | 
			
		||||
  if (DEFINED ARGV7 AND NOT "${ARGV7}" STREQUAL "")
 | 
			
		||||
    if (${ARGV7} EQUAL 1)
 | 
			
		||||
      set(real_only true)
 | 
			
		||||
    elseif (${ARGV7} EQUAL 2)
 | 
			
		||||
      set(complex_only true)
 | 
			
		||||
    elseif (${ARGV7} EQUAL 3)
 | 
			
		||||
      set(mangle_complex_sources true)
 | 
			
		||||
    elseif (${ARGV7} EQUAL 4)
 | 
			
		||||
      set(mangle_complex_sources true)
 | 
			
		||||
      set(complex_only true)
 | 
			
		||||
    elseif (NOT ${ARGV7} EQUAL 0)
 | 
			
		||||
      set(float_list ${ARGV7})
 | 
			
		||||
    endif ()
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  if (complex_only)
 | 
			
		||||
    list(REMOVE_ITEM float_list "SINGLE")
 | 
			
		||||
    list(REMOVE_ITEM float_list "DOUBLE")
 | 
			
		||||
  elseif (real_only)
 | 
			
		||||
    list(REMOVE_ITEM float_list "COMPLEX")
 | 
			
		||||
    list(REMOVE_ITEM float_list "ZCOMPLEX")
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  set(float_char "")
 | 
			
		||||
  set(OBJ_LIST_OUT "")
 | 
			
		||||
  foreach (float_type ${float_list})
 | 
			
		||||
    foreach (source_file ${sources_in})
 | 
			
		||||
 | 
			
		||||
      if (NOT no_float_type)
 | 
			
		||||
        string(SUBSTRING ${float_type} 0 1 float_char)
 | 
			
		||||
        string(TOLOWER ${float_char} float_char)
 | 
			
		||||
      endif ()
 | 
			
		||||
 | 
			
		||||
      if (NOT name_in)
 | 
			
		||||
        get_filename_component(source_name ${source_file} NAME_WE)
 | 
			
		||||
        set(obj_name "${float_char}${source_name}")
 | 
			
		||||
      else ()
 | 
			
		||||
        # replace * with float_char
 | 
			
		||||
        if (${name_in} MATCHES "\\*")
 | 
			
		||||
          string(REPLACE "*" ${float_char} obj_name ${name_in})
 | 
			
		||||
        else ()
 | 
			
		||||
          set(obj_name "${float_char}${name_in}")
 | 
			
		||||
        endif ()
 | 
			
		||||
      endif ()
 | 
			
		||||
 | 
			
		||||
      if (replace_last_with)
 | 
			
		||||
        string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name})
 | 
			
		||||
      else ()
 | 
			
		||||
        set(obj_name "${obj_name}${append_with}")
 | 
			
		||||
      endif ()
 | 
			
		||||
 | 
			
		||||
      # now add the object and set the defines
 | 
			
		||||
      set(obj_defines ${defines_in})
 | 
			
		||||
 | 
			
		||||
      if (use_cblas)
 | 
			
		||||
        set(obj_name "cblas_${obj_name}")
 | 
			
		||||
        list(APPEND obj_defines "CBLAS")
 | 
			
		||||
      endif ()
 | 
			
		||||
 | 
			
		||||
      list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"")
 | 
			
		||||
      if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX")
 | 
			
		||||
        list(APPEND obj_defines "DOUBLE")
 | 
			
		||||
      endif ()
 | 
			
		||||
      if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
 | 
			
		||||
        list(APPEND obj_defines "COMPLEX")
 | 
			
		||||
        if (mangle_complex_sources)
 | 
			
		||||
          # add a z to the filename
 | 
			
		||||
          get_filename_component(source_name ${source_file} NAME)
 | 
			
		||||
          get_filename_component(source_dir ${source_file} DIRECTORY)
 | 
			
		||||
          string(REPLACE ${source_name} "z${source_name}" source_file ${source_file})
 | 
			
		||||
        endif ()
 | 
			
		||||
      endif ()
 | 
			
		||||
 | 
			
		||||
      if (VERBOSE_GEN)
 | 
			
		||||
        message(STATUS "${obj_name}:${source_file}")
 | 
			
		||||
        message(STATUS "${obj_defines}")
 | 
			
		||||
      endif ()
 | 
			
		||||
 | 
			
		||||
      # create a copy of the source to avoid duplicate obj filename problem with ar.exe
 | 
			
		||||
      get_filename_component(source_extension ${source_file} EXT)
 | 
			
		||||
      set(new_source_file "${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${obj_name}${source_extension}")
 | 
			
		||||
      if (IS_ABSOLUTE ${source_file})
 | 
			
		||||
        set(old_source_file ${source_file})
 | 
			
		||||
      else ()
 | 
			
		||||
        set(old_source_file "${CMAKE_CURRENT_LIST_DIR}/${source_file}")
 | 
			
		||||
      endif ()
 | 
			
		||||
 | 
			
		||||
      string(REPLACE ";" "\n#define " define_source "${obj_defines}")
 | 
			
		||||
      string(REPLACE "=" " " define_source "${define_source}")
 | 
			
		||||
      file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"")
 | 
			
		||||
      list(APPEND SRC_LIST_OUT ${new_source_file})
 | 
			
		||||
 | 
			
		||||
    endforeach ()
 | 
			
		||||
  endforeach ()
 | 
			
		||||
 | 
			
		||||
  list(APPEND OPENBLAS_SRC ${SRC_LIST_OUT})
 | 
			
		||||
  set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE)
 | 
			
		||||
endfunction ()
 | 
			
		||||
 | 
			
		||||
# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in
 | 
			
		||||
# @param sources_in the source files to build from
 | 
			
		||||
# @param defines_in the preprocessor definitions that will be combined to create the object files
 | 
			
		||||
# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects
 | 
			
		||||
# @param replace_scheme If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k.c with TRANS and UNIT defined will be symm_TU.
 | 
			
		||||
#                  If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU.
 | 
			
		||||
#                  If 2, it will append the code with an underscore, e.g. symm.c with TRANS and UNIT will be symm_TU.
 | 
			
		||||
#                  If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects).
 | 
			
		||||
#                  If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel
 | 
			
		||||
# @param alternate_name replaces the source name as the object name (define codes are still appended)
 | 
			
		||||
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
 | 
			
		||||
# @param complex_filename_scheme see GenerateNamedObjects
 | 
			
		||||
function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme)
 | 
			
		||||
 | 
			
		||||
  set(alternate_name_in "")
 | 
			
		||||
  if (DEFINED ARGV5)
 | 
			
		||||
    set(alternate_name_in ${ARGV5})
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  set(no_float_type false)
 | 
			
		||||
  if (DEFINED ARGV6)
 | 
			
		||||
    set(no_float_type ${ARGV6})
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  set(complex_filename_scheme "")
 | 
			
		||||
  if (DEFINED ARGV7)
 | 
			
		||||
    set(complex_filename_scheme ${ARGV7})
 | 
			
		||||
  endif ()
 | 
			
		||||
 | 
			
		||||
  AllCombinations("${defines_in}" "${absent_codes_in}")
 | 
			
		||||
  set(define_combos ${LIST_OUT})
 | 
			
		||||
  set(define_codes ${CODES_OUT})
 | 
			
		||||
 | 
			
		||||
  list(LENGTH define_combos num_combos)
 | 
			
		||||
  math(EXPR num_combos "${num_combos} - 1")
 | 
			
		||||
 | 
			
		||||
  foreach (c RANGE 0 ${num_combos})
 | 
			
		||||
 | 
			
		||||
    list(GET define_combos ${c} define_combo)
 | 
			
		||||
    list(GET define_codes ${c} define_code)
 | 
			
		||||
 | 
			
		||||
    foreach (source_file ${sources_in})
 | 
			
		||||
 | 
			
		||||
      set(alternate_name ${alternate_name_in})
 | 
			
		||||
 | 
			
		||||
      # replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with
 | 
			
		||||
      string(REPLACE ":" ";" define_combo ${define_combo})
 | 
			
		||||
 | 
			
		||||
      # now add the object and set the defines
 | 
			
		||||
      set(cur_defines ${define_combo})
 | 
			
		||||
      if ("${cur_defines}" STREQUAL " ")
 | 
			
		||||
        set(cur_defines ${all_defines_in})
 | 
			
		||||
      else ()
 | 
			
		||||
        list(APPEND cur_defines ${all_defines_in})
 | 
			
		||||
      endif ()
 | 
			
		||||
 | 
			
		||||
      set(replace_code "")
 | 
			
		||||
      set(append_code "")
 | 
			
		||||
      if (replace_scheme EQUAL 1)
 | 
			
		||||
        set(replace_code ${define_code})
 | 
			
		||||
      else ()
 | 
			
		||||
        if (replace_scheme EQUAL 2)
 | 
			
		||||
          set(append_code "_${define_code}")
 | 
			
		||||
        elseif (replace_scheme EQUAL 3)
 | 
			
		||||
          if ("${alternate_name}" STREQUAL "")
 | 
			
		||||
            string(REGEX MATCH "[a-zA-Z]\\." last_letter ${source_file})
 | 
			
		||||
          else ()
 | 
			
		||||
            string(REGEX MATCH "[a-zA-Z]$" last_letter ${alternate_name})
 | 
			
		||||
          endif ()
 | 
			
		||||
          # first extract the last letter
 | 
			
		||||
          string(SUBSTRING ${last_letter} 0 1 last_letter) # remove period from match
 | 
			
		||||
          # break the code up into the first letter and the remaining (should only be 2 anyway)
 | 
			
		||||
          string(SUBSTRING ${define_code} 0 1 define_code_first)
 | 
			
		||||
          string(SUBSTRING ${define_code} 1 -1 define_code_second)
 | 
			
		||||
          set(replace_code "${define_code_first}${last_letter}${define_code_second}")
 | 
			
		||||
        elseif (replace_scheme EQUAL 4)
 | 
			
		||||
          # insert code before the last underscore and pass that in as the alternate_name
 | 
			
		||||
          if ("${alternate_name}" STREQUAL "")
 | 
			
		||||
            get_filename_component(alternate_name ${source_file} NAME_WE)
 | 
			
		||||
          endif ()
 | 
			
		||||
          set(extra_underscore "")
 | 
			
		||||
          # check if filename has two underscores, insert another if not (e.g. getrs_parallel needs to become getrs_U_parallel not getrsU_parallel)
 | 
			
		||||
          string(REGEX MATCH "_[a-zA-Z]+_" underscores ${alternate_name})
 | 
			
		||||
          string(LENGTH "${underscores}" underscores)
 | 
			
		||||
          if (underscores EQUAL 0)
 | 
			
		||||
            set(extra_underscore "_")
 | 
			
		||||
          endif ()
 | 
			
		||||
          string(REGEX REPLACE "(.+)(_[^_]+)$" "\\1${extra_underscore}${define_code}\\2" alternate_name ${alternate_name})
 | 
			
		||||
        else()
 | 
			
		||||
          set(append_code ${define_code}) # replace_scheme should be 0
 | 
			
		||||
        endif ()
 | 
			
		||||
      endif ()
 | 
			
		||||
 | 
			
		||||
      GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" false "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}")
 | 
			
		||||
    endforeach ()
 | 
			
		||||
  endforeach ()
 | 
			
		||||
 | 
			
		||||
  set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE)
 | 
			
		||||
endfunction ()
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										113
									
								
								common.h
								
								
								
								
							
							
						
						
									
										113
									
								
								common.h
								
								
								
								
							| 
						 | 
				
			
			@ -82,7 +82,10 @@ extern "C" {
 | 
			
		|||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
#if !defined(_MSC_VER)
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef OS_LINUX
 | 
			
		||||
#include <malloc.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -93,6 +96,14 @@ extern "C" {
 | 
			
		|||
#include <sched.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef OS_ANDROID
 | 
			
		||||
#define NO_SYSV_IPC
 | 
			
		||||
//Android NDK only supports complex.h since Android 5.0
 | 
			
		||||
#if __ANDROID_API__ < 21
 | 
			
		||||
#define FORCE_OPENBLAS_COMPLEX_STRUCT
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef OS_WINDOWS
 | 
			
		||||
#ifdef  ATOM
 | 
			
		||||
#define GOTO_ATOM ATOM
 | 
			
		||||
| 
						 | 
				
			
			@ -106,8 +117,11 @@ extern "C" {
 | 
			
		|||
#endif
 | 
			
		||||
#else
 | 
			
		||||
#include <sys/mman.h>
 | 
			
		||||
#ifndef NO_SYSV_IPC
 | 
			
		||||
#include <sys/shm.h>
 | 
			
		||||
#endif
 | 
			
		||||
#include <sys/time.h>
 | 
			
		||||
#include <time.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <math.h>
 | 
			
		||||
#ifdef SMP
 | 
			
		||||
| 
						 | 
				
			
			@ -287,13 +301,6 @@ typedef int blasint;
 | 
			
		|||
#define COMPSIZE  2
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(C_PGI) || defined(C_SUN)
 | 
			
		||||
#define CREAL(X)	(*((FLOAT *)&X + 0))
 | 
			
		||||
#define CIMAG(X)	(*((FLOAT *)&X + 1))
 | 
			
		||||
#else
 | 
			
		||||
#define CREAL	__real__
 | 
			
		||||
#define CIMAG	__imag__
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define Address_H(x) (((x)+(1<<15))>>16)
 | 
			
		||||
#define Address_L(x) ((x)-((Address_H(x))<<16))
 | 
			
		||||
| 
						 | 
				
			
			@ -307,8 +314,12 @@ typedef int blasint;
 | 
			
		|||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(OS_WINDOWS)
 | 
			
		||||
#if defined(_MSC_VER) && !defined(__clang__)
 | 
			
		||||
#define YIELDING    YieldProcessor()
 | 
			
		||||
#else
 | 
			
		||||
#define YIELDING	SwitchToThread()
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
 | 
			
		||||
#define YIELDING        asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
 | 
			
		||||
| 
						 | 
				
			
			@ -404,7 +415,51 @@ typedef char env_var_t[MAX_PATH];
 | 
			
		|||
typedef char* env_var_t;
 | 
			
		||||
#define readenv(p, n) ((p)=getenv(n))
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS)
 | 
			
		||||
#ifdef _POSIX_MONOTONIC_CLOCK
 | 
			
		||||
#if defined(__GLIBC_PREREQ) // cut the if condition if two lines, otherwise will fail at __GLIBC_PREREQ(2, 17)
 | 
			
		||||
#if __GLIBC_PREREQ(2, 17) // don't require -lrt
 | 
			
		||||
#define USE_MONOTONIC
 | 
			
		||||
#endif
 | 
			
		||||
#elif defined(OS_ANDROID)
 | 
			
		||||
#define USE_MONOTONIC
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
/* use similar scale as x86 rdtsc for timeouts to work correctly */
 | 
			
		||||
static inline unsigned long long rpcc(void){
 | 
			
		||||
#ifdef USE_MONOTONIC
 | 
			
		||||
  struct timespec ts;
 | 
			
		||||
  clock_gettime(CLOCK_MONOTONIC, &ts);
 | 
			
		||||
  return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec;
 | 
			
		||||
#else
 | 
			
		||||
  struct timeval tv;
 | 
			
		||||
  gettimeofday(&tv,NULL);
 | 
			
		||||
  return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000;
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
#define RPCC_DEFINED
 | 
			
		||||
#define RPCC64BIT
 | 
			
		||||
#endif // !RPCC_DEFINED
 | 
			
		||||
 | 
			
		||||
#if !defined(BLAS_LOCK_DEFINED) && defined(__GNUC__)
 | 
			
		||||
static void __inline blas_lock(volatile BLASULONG *address){
 | 
			
		||||
 | 
			
		||||
  do {
 | 
			
		||||
    while (*address) {YIELDING;};
 | 
			
		||||
 | 
			
		||||
  } while (!__sync_bool_compare_and_swap(address, 0, 1));
 | 
			
		||||
}
 | 
			
		||||
#define BLAS_LOCK_DEFINED
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef RPCC_DEFINED
 | 
			
		||||
#error "rpcc() implementation is missing for your platform"
 | 
			
		||||
#endif
 | 
			
		||||
#ifndef BLAS_LOCK_DEFINED
 | 
			
		||||
#error "blas_lock() implementation is missing for your platform"
 | 
			
		||||
#endif
 | 
			
		||||
#endif // !ASSEMBLER
 | 
			
		||||
 | 
			
		||||
#ifdef OS_LINUX
 | 
			
		||||
#include "common_linux.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -450,18 +505,52 @@ typedef char* env_var_t;
 | 
			
		|||
/* C99 supports complex floating numbers natively, which GCC also offers as an
 | 
			
		||||
   extension since version 3.0.  If neither are available, use a compatible
 | 
			
		||||
   structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
 | 
			
		||||
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
 | 
			
		||||
     (__GNUC__ >= 3 && !defined(__cplusplus)))
 | 
			
		||||
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
 | 
			
		||||
      (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT)))
 | 
			
		||||
  #define OPENBLAS_COMPLEX_C99
 | 
			
		||||
  #ifndef __cplusplus
 | 
			
		||||
    #include <complex.h>
 | 
			
		||||
  #endif
 | 
			
		||||
  typedef float _Complex openblas_complex_float;
 | 
			
		||||
  typedef double _Complex openblas_complex_double;
 | 
			
		||||
  typedef xdouble _Complex openblas_complex_xdouble;
 | 
			
		||||
  #define openblas_make_complex_float(real, imag)    ((real) + ((imag) * _Complex_I))
 | 
			
		||||
  #define openblas_make_complex_double(real, imag)   ((real) + ((imag) * _Complex_I))
 | 
			
		||||
  #define openblas_make_complex_xdouble(real, imag)  ((real) + ((imag) * _Complex_I))
 | 
			
		||||
#else
 | 
			
		||||
  #define OPENBLAS_COMPLEX_STRUCT
 | 
			
		||||
  typedef struct { float real, imag; } openblas_complex_float;
 | 
			
		||||
  typedef struct { double real, imag; } openblas_complex_double;
 | 
			
		||||
  typedef struct { xdouble real, imag; } openblas_complex_xdouble;
 | 
			
		||||
  #define openblas_make_complex_float(real, imag)    {(real), (imag)}
 | 
			
		||||
  #define openblas_make_complex_double(real, imag)   {(real), (imag)}
 | 
			
		||||
  #define openblas_make_complex_xdouble(real, imag)  {(real), (imag)}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef XDOUBLE
 | 
			
		||||
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
 | 
			
		||||
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
 | 
			
		||||
#elif defined(DOUBLE)
 | 
			
		||||
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
 | 
			
		||||
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
 | 
			
		||||
#else
 | 
			
		||||
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
 | 
			
		||||
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(C_PGI) || defined(C_SUN)
 | 
			
		||||
#define CREAL(X)	(*((FLOAT *)&X + 0))
 | 
			
		||||
#define CIMAG(X)	(*((FLOAT *)&X + 1))
 | 
			
		||||
#else
 | 
			
		||||
#ifdef OPENBLAS_COMPLEX_STRUCT
 | 
			
		||||
#define CREAL(Z)	((Z).real)
 | 
			
		||||
#define CIMAG(Z)	((Z).imag)
 | 
			
		||||
#else
 | 
			
		||||
#define CREAL	__real__
 | 
			
		||||
#define CIMAG	__imag__
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif  // ASSEMBLER
 | 
			
		||||
 | 
			
		||||
#ifndef IFLUSH
 | 
			
		||||
| 
						 | 
				
			
			@ -478,6 +567,10 @@ typedef char* env_var_t;
 | 
			
		|||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(C_MSVC)
 | 
			
		||||
#define inline __inline
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef ASSEMBLER
 | 
			
		||||
 | 
			
		||||
#ifndef MIN
 | 
			
		||||
| 
						 | 
				
			
			@ -499,6 +592,8 @@ void  blas_set_parameter(void);
 | 
			
		|||
int   blas_get_cpu_number(void);
 | 
			
		||||
void *blas_memory_alloc  (int);
 | 
			
		||||
void  blas_memory_free   (void *);
 | 
			
		||||
void *blas_memory_alloc_nolock  (int); //use malloc without blas_lock
 | 
			
		||||
void  blas_memory_free_nolock   (void *);
 | 
			
		||||
 | 
			
		||||
int  get_num_procs (void);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -76,6 +76,7 @@ static void __inline blas_lock(unsigned long *address){
 | 
			
		|||
    "30:", address);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
#define BLAS_LOCK_DEFINED
 | 
			
		||||
 | 
			
		||||
static __inline unsigned int rpcc(void){
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -89,6 +90,7 @@ static __inline unsigned int rpcc(void){
 | 
			
		|||
 | 
			
		||||
  return r0;
 | 
			
		||||
}
 | 
			
		||||
#define RPCC_DEFINED
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define HALT 	ldq	$0, 0($0)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										87
									
								
								common_arm.h
								
								
								
								
							
							
						
						
									
										87
									
								
								common_arm.h
								
								
								
								
							| 
						 | 
				
			
			@ -1,5 +1,5 @@
 | 
			
		|||
/*****************************************************************************
 | 
			
		||||
Copyright (c) 2011-2014, The OpenBLAS Project
 | 
			
		||||
Copyright (c) 2011-2015, The OpenBLAS Project
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
| 
						 | 
				
			
			@ -30,56 +30,29 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
			
		|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
**********************************************************************************/
 | 
			
		||||
 | 
			
		||||
/*********************************************************************/
 | 
			
		||||
/* Copyright 2009, 2010 The University of Texas at Austin.           */
 | 
			
		||||
/* All rights reserved.                                              */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/* Redistribution and use in source and binary forms, with or        */
 | 
			
		||||
/* without modification, are permitted provided that the following   */
 | 
			
		||||
/* conditions are met:                                               */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/*   1. Redistributions of source code must retain the above         */
 | 
			
		||||
/*      copyright notice, this list of conditions and the following  */
 | 
			
		||||
/*      disclaimer.                                                  */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/*   2. Redistributions in binary form must reproduce the above      */
 | 
			
		||||
/*      copyright notice, this list of conditions and the following  */
 | 
			
		||||
/*      disclaimer in the documentation and/or other materials       */
 | 
			
		||||
/*      provided with the distribution.                              */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
 | 
			
		||||
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
 | 
			
		||||
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
 | 
			
		||||
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
 | 
			
		||||
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
 | 
			
		||||
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
 | 
			
		||||
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
 | 
			
		||||
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
 | 
			
		||||
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
 | 
			
		||||
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
 | 
			
		||||
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
 | 
			
		||||
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
 | 
			
		||||
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
 | 
			
		||||
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/* The views and conclusions contained in the software and           */
 | 
			
		||||
/* documentation are those of the authors and should not be          */
 | 
			
		||||
/* interpreted as representing official policies, either expressed   */
 | 
			
		||||
/* or implied, of The University of Texas at Austin.                 */
 | 
			
		||||
/*********************************************************************/
 | 
			
		||||
 | 
			
		||||
#ifndef COMMON_ARM
 | 
			
		||||
#define COMMON_ARM
 | 
			
		||||
 | 
			
		||||
#if defined(ARMV5) || defined(ARMV6)
 | 
			
		||||
 | 
			
		||||
#define MB
 | 
			
		||||
#define WMB
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
#define MB   __asm__ __volatile__ ("dmb  ish" : : : "memory")
 | 
			
		||||
#define WMB  __asm__ __volatile__ ("dmb  ishst" : : : "memory")
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define INLINE inline
 | 
			
		||||
 | 
			
		||||
#define RETURN_BY_COMPLEX
 | 
			
		||||
 | 
			
		||||
#ifndef ASSEMBLER
 | 
			
		||||
 | 
			
		||||
#if defined(ARMV6) || defined(ARMV7) || defined(ARMV8)
 | 
			
		||||
 | 
			
		||||
static void __inline blas_lock(volatile BLASULONG *address){
 | 
			
		||||
 | 
			
		||||
  int register ret;
 | 
			
		||||
| 
						 | 
				
			
			@ -89,36 +62,28 @@ static void __inline blas_lock(volatile BLASULONG *address){
 | 
			
		|||
 | 
			
		||||
    __asm__ __volatile__(
 | 
			
		||||
                         "ldrex r2, [%1]      \n\t"
 | 
			
		||||
                         "mov   r2, #0                                                  \n\t"
 | 
			
		||||
                         "strex r3, r2, [%1]                                            \n\t"
 | 
			
		||||
			 "mov	%0 , r3							\n\t"
 | 
			
		||||
                         : "=r"(ret), "=r"(address)
 | 
			
		||||
                         : "1"(address)
 | 
			
		||||
                         : "memory", "r2" , "r3"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
                         "strex %0, %2, [%1]  \n\t"
 | 
			
		||||
                         "orr   %0, r2        \n\t"
 | 
			
		||||
                         : "=&r"(ret)
 | 
			
		||||
                         : "r"(address), "r"(1)
 | 
			
		||||
                         : "memory", "r2"
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
  } while (ret);
 | 
			
		||||
 | 
			
		||||
  MB;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static inline unsigned long long rpcc(void){
 | 
			
		||||
  unsigned long long ret=0;
 | 
			
		||||
  double v;
 | 
			
		||||
  struct timeval tv;
 | 
			
		||||
  gettimeofday(&tv,NULL);
 | 
			
		||||
  v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
 | 
			
		||||
  ret = (unsigned long long) ( v * 1000.0d );
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
#define BLAS_LOCK_DEFINED
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static inline int blas_quickdivide(blasint x, blasint y){
 | 
			
		||||
  return x / y;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#if defined(DOUBLE)
 | 
			
		||||
#if !defined(HAVE_VFP)
 | 
			
		||||
/* no FPU, soft float */
 | 
			
		||||
#define GET_IMAGE(res)
 | 
			
		||||
#elif defined(DOUBLE)
 | 
			
		||||
#define GET_IMAGE(res)  __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
 | 
			
		||||
#else
 | 
			
		||||
#define GET_IMAGE(res)  __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
 | 
			
		||||
| 
						 | 
				
			
			@ -166,4 +131,8 @@ REALNAME:
 | 
			
		|||
#define MAP_ANONYMOUS MAP_ANON
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if !defined(ARMV5) && !defined(ARMV6) && !defined(ARMV7) && !defined(ARMV8)
 | 
			
		||||
#error "you must define ARMV5, ARMV6, ARMV7 or ARMV8"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,5 @@
 | 
			
		|||
/*****************************************************************************
 | 
			
		||||
Copyright (c) 2011-2014, The OpenBLAS Project
 | 
			
		||||
Copyright (c) 2011-2015, The OpenBLAS Project
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
| 
						 | 
				
			
			@ -30,49 +30,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
			
		|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
**********************************************************************************/
 | 
			
		||||
 | 
			
		||||
/*********************************************************************/
 | 
			
		||||
/* Copyright 2009, 2010 The University of Texas at Austin.           */
 | 
			
		||||
/* All rights reserved.                                              */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/* Redistribution and use in source and binary forms, with or        */
 | 
			
		||||
/* without modification, are permitted provided that the following   */
 | 
			
		||||
/* conditions are met:                                               */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/*   1. Redistributions of source code must retain the above         */
 | 
			
		||||
/*      copyright notice, this list of conditions and the following  */
 | 
			
		||||
/*      disclaimer.                                                  */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/*   2. Redistributions in binary form must reproduce the above      */
 | 
			
		||||
/*      copyright notice, this list of conditions and the following  */
 | 
			
		||||
/*      disclaimer in the documentation and/or other materials       */
 | 
			
		||||
/*      provided with the distribution.                              */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
 | 
			
		||||
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
 | 
			
		||||
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
 | 
			
		||||
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
 | 
			
		||||
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
 | 
			
		||||
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
 | 
			
		||||
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
 | 
			
		||||
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
 | 
			
		||||
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
 | 
			
		||||
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
 | 
			
		||||
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
 | 
			
		||||
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
 | 
			
		||||
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
 | 
			
		||||
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
 | 
			
		||||
/*                                                                   */
 | 
			
		||||
/* The views and conclusions contained in the software and           */
 | 
			
		||||
/* documentation are those of the authors and should not be          */
 | 
			
		||||
/* interpreted as representing official policies, either expressed   */
 | 
			
		||||
/* or implied, of The University of Texas at Austin.                 */
 | 
			
		||||
/*********************************************************************/
 | 
			
		||||
 | 
			
		||||
#ifndef COMMON_ARM64
 | 
			
		||||
#define COMMON_ARM64
 | 
			
		||||
 | 
			
		||||
#define MB
 | 
			
		||||
#define WMB
 | 
			
		||||
#define MB   __asm__ __volatile__ ("dmb  ish" : : : "memory")
 | 
			
		||||
#define WMB  __asm__ __volatile__ ("dmb  ishst" : : : "memory")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define INLINE inline
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -81,39 +44,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		|||
#ifndef ASSEMBLER
 | 
			
		||||
 | 
			
		||||
static void __inline blas_lock(volatile BLASULONG *address){
 | 
			
		||||
/*
 | 
			
		||||
  int register ret;
 | 
			
		||||
 | 
			
		||||
  long register ret;
 | 
			
		||||
 | 
			
		||||
  do {
 | 
			
		||||
    while (*address) {YIELDING;};
 | 
			
		||||
 | 
			
		||||
    __asm__ __volatile__(
 | 
			
		||||
                         "ldrex r2, [%1]                                                \n\t"
 | 
			
		||||
                         "mov   r2, #0                                                  \n\t"
 | 
			
		||||
                         "strex r3, r2, [%1]                                            \n\t"
 | 
			
		||||
			 "mov	%0 , r3							\n\t"
 | 
			
		||||
                         : "=r"(ret), "=r"(address)
 | 
			
		||||
                         : "1"(address)
 | 
			
		||||
                         : "memory", "r2" , "r3"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
                         "ldaxr %0, [%1]      \n\t"
 | 
			
		||||
                         "stlxr w2, %2, [%1]  \n\t"
 | 
			
		||||
                         "orr   %0, %0, x2    \n\t"
 | 
			
		||||
                         : "=r"(ret)
 | 
			
		||||
                         : "r"(address), "r"(1l)
 | 
			
		||||
                         : "memory", "x2"
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
  } while (ret);
 | 
			
		||||
*/
 | 
			
		||||
  MB;
 | 
			
		||||
}
 | 
			
		||||
#define BLAS_LOCK_DEFINED
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static inline unsigned long long rpcc(void){
 | 
			
		||||
  unsigned long long ret=0;
 | 
			
		||||
  double v;
 | 
			
		||||
  struct timeval tv;
 | 
			
		||||
  gettimeofday(&tv,NULL);
 | 
			
		||||
  v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
 | 
			
		||||
  ret = (unsigned long long) ( v * 1000.0d );
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int blas_quickdivide(blasint x, blasint y){
 | 
			
		||||
  return x / y;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -166,3 +117,4 @@ REALNAME:
 | 
			
		|||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										19
									
								
								common_c.h
								
								
								
								
							
							
						
						
									
										19
									
								
								common_c.h
								
								
								
								
							| 
						 | 
				
			
			@ -220,6 +220,15 @@
 | 
			
		|||
#define COMATCOPY_K_CTC         comatcopy_k_ctc
 | 
			
		||||
#define COMATCOPY_K_RTC         comatcopy_k_rtc
 | 
			
		||||
 | 
			
		||||
#define CIMATCOPY_K_CN          cimatcopy_k_cn
 | 
			
		||||
#define CIMATCOPY_K_RN          cimatcopy_k_rn
 | 
			
		||||
#define CIMATCOPY_K_CT          cimatcopy_k_ct
 | 
			
		||||
#define CIMATCOPY_K_RT          cimatcopy_k_rt
 | 
			
		||||
#define CIMATCOPY_K_CNC         cimatcopy_k_cnc
 | 
			
		||||
#define CIMATCOPY_K_RNC         cimatcopy_k_rnc
 | 
			
		||||
#define CIMATCOPY_K_CTC         cimatcopy_k_ctc
 | 
			
		||||
#define CIMATCOPY_K_RTC         cimatcopy_k_rtc
 | 
			
		||||
 | 
			
		||||
#define CGEADD_K                cgeadd_k 
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
| 
						 | 
				
			
			@ -403,6 +412,16 @@
 | 
			
		|||
#define COMATCOPY_K_RNC         gotoblas -> comatcopy_k_rnc
 | 
			
		||||
#define COMATCOPY_K_CTC         gotoblas -> comatcopy_k_ctc
 | 
			
		||||
#define COMATCOPY_K_RTC         gotoblas -> comatcopy_k_rtc
 | 
			
		||||
 | 
			
		||||
#define CIMATCOPY_K_CN          gotoblas -> cimatcopy_k_cn
 | 
			
		||||
#define CIMATCOPY_K_RN          gotoblas -> cimatcopy_k_rn
 | 
			
		||||
#define CIMATCOPY_K_CT          gotoblas -> cimatcopy_k_ct
 | 
			
		||||
#define CIMATCOPY_K_RT          gotoblas -> cimatcopy_k_rt
 | 
			
		||||
#define CIMATCOPY_K_CNC         gotoblas -> cimatcopy_k_cnc
 | 
			
		||||
#define CIMATCOPY_K_RNC         gotoblas -> cimatcopy_k_rnc
 | 
			
		||||
#define CIMATCOPY_K_CTC         gotoblas -> cimatcopy_k_ctc
 | 
			
		||||
#define CIMATCOPY_K_RTC         gotoblas -> cimatcopy_k_rtc
 | 
			
		||||
 | 
			
		||||
#define CGEADD_K                gotoblas -> cgeadd_k 
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -149,6 +149,11 @@
 | 
			
		|||
#define DOMATCOPY_K_RN		domatcopy_k_rn
 | 
			
		||||
#define DOMATCOPY_K_CT		domatcopy_k_ct
 | 
			
		||||
#define DOMATCOPY_K_RT		domatcopy_k_rt
 | 
			
		||||
 | 
			
		||||
#define DIMATCOPY_K_CN		dimatcopy_k_cn
 | 
			
		||||
#define DIMATCOPY_K_RN		dimatcopy_k_rn
 | 
			
		||||
#define DIMATCOPY_K_CT      dimatcopy_k_ct
 | 
			
		||||
#define DIMATCOPY_K_RT      dimatcopy_k_rt
 | 
			
		||||
#define DGEADD_K                dgeadd_k 
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
| 
						 | 
				
			
			@ -267,6 +272,10 @@
 | 
			
		|||
#define DOMATCOPY_K_RN		gotoblas -> domatcopy_k_rn
 | 
			
		||||
#define DOMATCOPY_K_CT		gotoblas -> domatcopy_k_ct
 | 
			
		||||
#define DOMATCOPY_K_RT		gotoblas -> domatcopy_k_rt
 | 
			
		||||
#define DIMATCOPY_K_CN		gotoblas -> dimatcopy_k_cn
 | 
			
		||||
#define DIMATCOPY_K_RN		gotoblas -> dimatcopy_k_rn
 | 
			
		||||
#define DIMATCOPY_K_CT		gotoblas -> dimatcopy_k_ct
 | 
			
		||||
#define DIMATCOPY_K_RT		gotoblas -> dimatcopy_k_rt
 | 
			
		||||
 | 
			
		||||
#define DGEADD_K                gotoblas -> dgeadd_k 
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -68,6 +68,7 @@ static __inline void blas_lock(volatile unsigned long *address){
 | 
			
		|||
			  : "ar.ccv", "memory");
 | 
			
		||||
  } while (ret);
 | 
			
		||||
}
 | 
			
		||||
#define BLAS_LOCK_DEFINED
 | 
			
		||||
 | 
			
		||||
static __inline unsigned long rpcc(void) {
 | 
			
		||||
  unsigned long clocks;
 | 
			
		||||
| 
						 | 
				
			
			@ -75,6 +76,7 @@ static __inline unsigned long rpcc(void) {
 | 
			
		|||
  __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(clocks));
 | 
			
		||||
  return clocks;
 | 
			
		||||
}
 | 
			
		||||
#define RPCC_DEFINED
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static __inline unsigned long stmxcsr(void){
 | 
			
		||||
| 
						 | 
				
			
			@ -99,10 +101,12 @@ static __inline void blas_lock(volatile unsigned long *address){
 | 
			
		|||
  while (*address || _InterlockedCompareExchange((volatile int *) address,1,0))
 | 
			
		||||
    ;
 | 
			
		||||
}
 | 
			
		||||
#define BLAS_LOCK_DEFINED
 | 
			
		||||
 | 
			
		||||
static __inline unsigned int rpcc(void) {
 | 
			
		||||
  return __getReg(_IA64_REG_AR_ITC);
 | 
			
		||||
}
 | 
			
		||||
#define RPCC_DEFINED
 | 
			
		||||
 | 
			
		||||
static __inline unsigned int stmxcsr(void) {
 | 
			
		||||
  return __getReg(_IA64_REG_AR_FPSR);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -47,12 +47,12 @@ double dsdot_k(BLASLONG, float   *, BLASLONG, float *, BLASLONG);
 | 
			
		|||
double  ddot_k(BLASLONG, double  *, BLASLONG, double  *, BLASLONG);
 | 
			
		||||
xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
 | 
			
		||||
 | 
			
		||||
float   _Complex cdotc_k (BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
float   _Complex cdotu_k (BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
double  _Complex zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
 | 
			
		||||
double  _Complex zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
 | 
			
		||||
xdouble _Complex xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
 | 
			
		||||
xdouble _Complex xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
 | 
			
		||||
openblas_complex_float cdotc_k (BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
openblas_complex_float cdotu_k (BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
openblas_complex_double zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
 | 
			
		||||
openblas_complex_double zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
 | 
			
		||||
openblas_complex_xdouble xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
 | 
			
		||||
openblas_complex_xdouble xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
 | 
			
		||||
 | 
			
		||||
int    saxpy_k (BLASLONG, BLASLONG, BLASLONG, float,
 | 
			
		||||
	       float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1736,31 +1736,55 @@ int somatcopy_k_cn(BLASLONG, BLASLONG,  float, float *, BLASLONG, float  *, BLAS
 | 
			
		|||
int somatcopy_k_rn(BLASLONG, BLASLONG,  float, float *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
int somatcopy_k_ct(BLASLONG, BLASLONG,  float, float *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
int somatcopy_k_rt(BLASLONG, BLASLONG,  float, float *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
int simatcopy_k_cn(BLASLONG, BLASLONG,  float, float *, BLASLONG);
 | 
			
		||||
int simatcopy_k_rn(BLASLONG, BLASLONG,  float, float *, BLASLONG);
 | 
			
		||||
int simatcopy_k_ct(BLASLONG, BLASLONG,  float, float *, BLASLONG);
 | 
			
		||||
int simatcopy_k_rt(BLASLONG, BLASLONG,  float, float *, BLASLONG);
 | 
			
		||||
 | 
			
		||||
int domatcopy_k_cn(BLASLONG, BLASLONG,  double, double *, BLASLONG, double  *, BLASLONG);
 | 
			
		||||
int domatcopy_k_rn(BLASLONG, BLASLONG,  double, double *, BLASLONG, double  *, BLASLONG);
 | 
			
		||||
int domatcopy_k_ct(BLASLONG, BLASLONG,  double, double *, BLASLONG, double  *, BLASLONG);
 | 
			
		||||
int domatcopy_k_rt(BLASLONG, BLASLONG,  double, double *, BLASLONG, double  *, BLASLONG);
 | 
			
		||||
int dimatcopy_k_cn(BLASLONG, BLASLONG,  double, double *, BLASLONG);
 | 
			
		||||
int dimatcopy_k_rn(BLASLONG, BLASLONG,  double, double *, BLASLONG);
 | 
			
		||||
int dimatcopy_k_ct(BLASLONG, BLASLONG,  double, double *, BLASLONG);
 | 
			
		||||
int dimatcopy_k_rt(BLASLONG, BLASLONG,  double, double *, BLASLONG);
 | 
			
		||||
 | 
			
		||||
int comatcopy_k_cn(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
int comatcopy_k_rn(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
int comatcopy_k_ct(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
int comatcopy_k_rt(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
int cimatcopy_k_cn(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
			
		||||
int cimatcopy_k_rn(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
			
		||||
int cimatcopy_k_ct(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
			
		||||
int cimatcopy_k_rt(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
			
		||||
 | 
			
		||||
int comatcopy_k_cnc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
int comatcopy_k_rnc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
int comatcopy_k_ctc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
int comatcopy_k_rtc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
			
		||||
int cimatcopy_k_cnc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
			
		||||
int cimatcopy_k_rnc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
			
		||||
int cimatcopy_k_ctc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
			
		||||
int cimatcopy_k_rtc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
			
		||||
 | 
			
		||||
int zomatcopy_k_cn(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
			
		||||
int zomatcopy_k_rn(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
			
		||||
int zomatcopy_k_ct(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
			
		||||
int zomatcopy_k_rt(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
			
		||||
int zimatcopy_k_cn(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
			
		||||
int zimatcopy_k_rn(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
			
		||||
int zimatcopy_k_ct(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
			
		||||
int zimatcopy_k_rt(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
			
		||||
 | 
			
		||||
int zomatcopy_k_cnc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
			
		||||
int zomatcopy_k_rnc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
			
		||||
int zomatcopy_k_ctc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
			
		||||
int zomatcopy_k_rtc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
			
		||||
int zimatcopy_k_cnc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
			
		||||
int zimatcopy_k_rnc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
			
		||||
int zimatcopy_k_ctc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
			
		||||
int zimatcopy_k_rtc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
			
		||||
 | 
			
		||||
int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG); 
 | 
			
		||||
int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG); 
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -634,6 +634,11 @@
 | 
			
		|||
#define OMATCOPY_K_RN		DOMATCOPY_K_RN
 | 
			
		||||
#define OMATCOPY_K_CT		DOMATCOPY_K_CT
 | 
			
		||||
#define OMATCOPY_K_RT		DOMATCOPY_K_RT
 | 
			
		||||
#define IMATCOPY_K_CN		DIMATCOPY_K_CN
 | 
			
		||||
#define IMATCOPY_K_RN		DIMATCOPY_K_RN
 | 
			
		||||
#define IMATCOPY_K_CT		DIMATCOPY_K_CT
 | 
			
		||||
#define IMATCOPY_K_RT		DIMATCOPY_K_RT
 | 
			
		||||
 | 
			
		||||
#define GEADD_K                 DGEADD_K 
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -931,6 +936,10 @@
 | 
			
		|||
#define OMATCOPY_K_RN		SOMATCOPY_K_RN
 | 
			
		||||
#define OMATCOPY_K_CT		SOMATCOPY_K_CT
 | 
			
		||||
#define OMATCOPY_K_RT		SOMATCOPY_K_RT
 | 
			
		||||
#define IMATCOPY_K_CN		SIMATCOPY_K_CN
 | 
			
		||||
#define IMATCOPY_K_RN		SIMATCOPY_K_RN
 | 
			
		||||
#define IMATCOPY_K_CT		SIMATCOPY_K_CT
 | 
			
		||||
#define IMATCOPY_K_RT		SIMATCOPY_K_RT
 | 
			
		||||
 | 
			
		||||
#define GEADD_K 		SGEADD_K 
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			@ -1747,6 +1756,15 @@
 | 
			
		|||
#define OMATCOPY_K_RNC		ZOMATCOPY_K_RNC
 | 
			
		||||
#define OMATCOPY_K_CTC		ZOMATCOPY_K_CTC
 | 
			
		||||
#define OMATCOPY_K_RTC		ZOMATCOPY_K_RTC
 | 
			
		||||
#define IMATCOPY_K_CN		ZIMATCOPY_K_CN
 | 
			
		||||
#define IMATCOPY_K_RN		ZIMATCOPY_K_RN
 | 
			
		||||
#define IMATCOPY_K_CT		ZIMATCOPY_K_CT
 | 
			
		||||
#define IMATCOPY_K_RT		ZIMATCOPY_K_RT
 | 
			
		||||
#define IMATCOPY_K_CNC		ZIMATCOPY_K_CNC
 | 
			
		||||
#define IMATCOPY_K_RNC		ZIMATCOPY_K_RNC
 | 
			
		||||
#define IMATCOPY_K_CTC		ZIMATCOPY_K_CTC
 | 
			
		||||
#define IMATCOPY_K_RTC		ZIMATCOPY_K_RTC
 | 
			
		||||
 | 
			
		||||
#define GEADD_K                 ZGEADD_K 
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
| 
						 | 
				
			
			@ -2160,6 +2178,14 @@
 | 
			
		|||
#define OMATCOPY_K_RNC		COMATCOPY_K_RNC
 | 
			
		||||
#define OMATCOPY_K_CTC		COMATCOPY_K_CTC
 | 
			
		||||
#define OMATCOPY_K_RTC		COMATCOPY_K_RTC
 | 
			
		||||
#define IMATCOPY_K_CN		CIMATCOPY_K_CN
 | 
			
		||||
#define IMATCOPY_K_RN		CIMATCOPY_K_RN
 | 
			
		||||
#define IMATCOPY_K_CT		CIMATCOPY_K_CT
 | 
			
		||||
#define IMATCOPY_K_RT		CIMATCOPY_K_RT
 | 
			
		||||
#define IMATCOPY_K_CNC		CIMATCOPY_K_CNC
 | 
			
		||||
#define IMATCOPY_K_RNC		CIMATCOPY_K_RNC
 | 
			
		||||
#define IMATCOPY_K_CTC		CIMATCOPY_K_CTC
 | 
			
		||||
#define IMATCOPY_K_RTC		CIMATCOPY_K_RTC
 | 
			
		||||
 | 
			
		||||
#define GEADD_K                 CGEADD_K 
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -98,6 +98,7 @@ static void INLINE blas_lock(volatile unsigned long *address){
 | 
			
		|||
 | 
			
		||||
  } while (ret);
 | 
			
		||||
}
 | 
			
		||||
#define BLAS_LOCK_DEFINED
 | 
			
		||||
 | 
			
		||||
static inline unsigned int rpcc(void){
 | 
			
		||||
  unsigned long ret;
 | 
			
		||||
| 
						 | 
				
			
			@ -118,6 +119,7 @@ static inline unsigned int rpcc(void){
 | 
			
		|||
#endif
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
#define RPCC_DEFINED
 | 
			
		||||
 | 
			
		||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
 | 
			
		||||
#ifndef NO_AFFINITY
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -855,6 +855,36 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
 | 
			
		|||
  int    (*zomatcopy_k_rnc)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
 | 
			
		||||
  int    (*zomatcopy_k_rtc)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
 | 
			
		||||
 | 
			
		||||
  int    (*simatcopy_k_cn)	(BLASLONG, BLASLONG, float, float*, BLASLONG);
 | 
			
		||||
  int    (*simatcopy_k_ct)	(BLASLONG, BLASLONG, float, float*, BLASLONG);
 | 
			
		||||
  int    (*simatcopy_k_rn)	(BLASLONG, BLASLONG, float, float*, BLASLONG);
 | 
			
		||||
  int    (*simatcopy_k_rt)	(BLASLONG, BLASLONG, float, float*, BLASLONG);
 | 
			
		||||
 | 
			
		||||
  int    (*dimatcopy_k_cn)	(BLASLONG, BLASLONG, double, double*, BLASLONG);
 | 
			
		||||
  int    (*dimatcopy_k_ct)	(BLASLONG, BLASLONG, double, double*, BLASLONG);
 | 
			
		||||
  int    (*dimatcopy_k_rn)	(BLASLONG, BLASLONG, double, double*, BLASLONG);
 | 
			
		||||
  int    (*dimatcopy_k_rt)	(BLASLONG, BLASLONG, double, double*, BLASLONG);
 | 
			
		||||
 | 
			
		||||
  int    (*cimatcopy_k_cn)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
			
		||||
  int    (*cimatcopy_k_ct)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
			
		||||
  int    (*cimatcopy_k_rn)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
			
		||||
  int    (*cimatcopy_k_rt)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
			
		||||
 | 
			
		||||
  int    (*cimatcopy_k_cnc)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
			
		||||
  int    (*cimatcopy_k_ctc)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
			
		||||
  int    (*cimatcopy_k_rnc)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
			
		||||
  int    (*cimatcopy_k_rtc)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
			
		||||
 | 
			
		||||
  int    (*zimatcopy_k_cn)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
			
		||||
  int    (*zimatcopy_k_ct)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
			
		||||
  int    (*zimatcopy_k_rn)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
			
		||||
  int    (*zimatcopy_k_rt)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
			
		||||
 | 
			
		||||
  int    (*zimatcopy_k_cnc)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
			
		||||
  int    (*zimatcopy_k_ctc)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
			
		||||
  int    (*zimatcopy_k_rnc)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
			
		||||
  int    (*zimatcopy_k_rtc)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
			
		||||
 | 
			
		||||
  int    (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); 
 | 
			
		||||
  int    (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); 
 | 
			
		||||
  int    (*cgeadd_k) (BLASLONG, BLASLONG, float, float,  float *,  BLASLONG, float, float, float *, BLASLONG); 
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -87,6 +87,7 @@ static void INLINE blas_lock(volatile unsigned long *address){
 | 
			
		|||
#endif
 | 
			
		||||
  } while (ret);
 | 
			
		||||
}
 | 
			
		||||
#define BLAS_LOCK_DEFINED
 | 
			
		||||
 | 
			
		||||
static inline unsigned long rpcc(void){
 | 
			
		||||
  unsigned long ret;
 | 
			
		||||
| 
						 | 
				
			
			@ -103,6 +104,7 @@ static inline unsigned long rpcc(void){
 | 
			
		|||
#endif
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#define RPCC_DEFINED
 | 
			
		||||
 | 
			
		||||
#ifdef __64BIT__
 | 
			
		||||
#define RPCC64BIT
 | 
			
		||||
| 
						 | 
				
			
			@ -495,6 +497,15 @@ static inline int blas_quickdivide(blasint x, blasint y){
 | 
			
		|||
REALNAME:
 | 
			
		||||
#define EPILOGUE	.size	REALNAME, .-REALNAME
 | 
			
		||||
#else
 | 
			
		||||
#if _CALL_ELF == 2
 | 
			
		||||
#define PROLOGUE \
 | 
			
		||||
	.section .text;\
 | 
			
		||||
	.align 6;\
 | 
			
		||||
	.globl	REALNAME;\
 | 
			
		||||
	.type	REALNAME, @function;\
 | 
			
		||||
REALNAME:
 | 
			
		||||
#define EPILOGUE	.size	REALNAME, .-REALNAME
 | 
			
		||||
#else
 | 
			
		||||
#define PROLOGUE \
 | 
			
		||||
	.section .text;\
 | 
			
		||||
	.align 5;\
 | 
			
		||||
| 
						 | 
				
			
			@ -514,6 +525,7 @@ REALNAME:;\
 | 
			
		|||
	.size	.REALNAME, .-.REALNAME; \
 | 
			
		||||
	.section	.note.GNU-stack,"",@progbits
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef PROFILE
 | 
			
		||||
#ifndef __64BIT__
 | 
			
		||||
| 
						 | 
				
			
			@ -792,4 +804,25 @@ Lmcount$lazy_ptr:
 | 
			
		|||
#ifndef MAP_ANONYMOUS
 | 
			
		||||
#define MAP_ANONYMOUS MAP_ANON
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef OS_LINUX
 | 
			
		||||
#ifndef __64BIT__
 | 
			
		||||
#define FRAMESLOT(X) (((X) * 4) + 8)
 | 
			
		||||
#else
 | 
			
		||||
#if _CALL_ELF == 2
 | 
			
		||||
#define FRAMESLOT(X) (((X) * 8) + 96)
 | 
			
		||||
#else
 | 
			
		||||
#define FRAMESLOT(X) (((X) * 8) + 112)
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(OS_AIX) || defined(OS_DARWIN)
 | 
			
		||||
#ifndef __64BIT__
 | 
			
		||||
#define FRAMESLOT(X) (((X) * 4) + 56)
 | 
			
		||||
#else
 | 
			
		||||
#define FRAMESLOT(X) (((X) * 8) + 112)
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -152,6 +152,10 @@
 | 
			
		|||
#define SOMATCOPY_K_RN          somatcopy_k_rn
 | 
			
		||||
#define SOMATCOPY_K_CT          somatcopy_k_ct
 | 
			
		||||
#define SOMATCOPY_K_RT          somatcopy_k_rt
 | 
			
		||||
#define SIMATCOPY_K_CN          simatcopy_k_cn
 | 
			
		||||
#define SIMATCOPY_K_RN          simatcopy_k_rn
 | 
			
		||||
#define SIMATCOPY_K_CT          simatcopy_k_ct
 | 
			
		||||
#define SIMATCOPY_K_RT          simatcopy_k_rt
 | 
			
		||||
 | 
			
		||||
#define SGEADD_K                sgeadd_k 
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -274,6 +278,10 @@
 | 
			
		|||
#define SOMATCOPY_K_RN          gotoblas -> somatcopy_k_rn
 | 
			
		||||
#define SOMATCOPY_K_CT          gotoblas -> somatcopy_k_ct
 | 
			
		||||
#define SOMATCOPY_K_RT          gotoblas -> somatcopy_k_rt
 | 
			
		||||
#define SIMATCOPY_K_CN          gotoblas -> simatcopy_k_cn
 | 
			
		||||
#define SIMATCOPY_K_RN          gotoblas -> simatcopy_k_rn
 | 
			
		||||
#define SIMATCOPY_K_CT          gotoblas -> simatcopy_k_ct
 | 
			
		||||
#define SIMATCOPY_K_RT          gotoblas -> simatcopy_k_rt
 | 
			
		||||
 | 
			
		||||
#define SGEADD_K                gotoblas -> sgeadd_k 
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -58,6 +58,7 @@ static void __inline blas_lock(volatile unsigned long *address){
 | 
			
		|||
			 : "memory");
 | 
			
		||||
  } while (ret);
 | 
			
		||||
}
 | 
			
		||||
#define BLAS_LOCK_DEFINED
 | 
			
		||||
 | 
			
		||||
static __inline unsigned long rpcc(void){
 | 
			
		||||
  unsigned long clocks;
 | 
			
		||||
| 
						 | 
				
			
			@ -66,6 +67,7 @@ static __inline unsigned long rpcc(void){
 | 
			
		|||
 | 
			
		||||
  return clocks;
 | 
			
		||||
};
 | 
			
		||||
#define RPCC_DEFINED
 | 
			
		||||
 | 
			
		||||
#ifdef __64BIT__
 | 
			
		||||
#define RPCC64BIT
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										39
									
								
								common_x86.h
								
								
								
								
							
							
						
						
									
										39
									
								
								common_x86.h
								
								
								
								
							| 
						 | 
				
			
			@ -56,41 +56,67 @@ static void __inline blas_lock(volatile BLASULONG *address){
 | 
			
		|||
  do {
 | 
			
		||||
    while (*address) {YIELDING;};
 | 
			
		||||
 | 
			
		||||
#if defined(_MSC_VER) && !defined(__clang__)
 | 
			
		||||
	// use intrinsic instead of inline assembly
 | 
			
		||||
	ret = _InterlockedExchange(address, 1);
 | 
			
		||||
	// inline assembly
 | 
			
		||||
	/*__asm {
 | 
			
		||||
		mov eax, address
 | 
			
		||||
		mov ebx, 1
 | 
			
		||||
		xchg [eax], ebx
 | 
			
		||||
		mov ret, ebx
 | 
			
		||||
	}*/
 | 
			
		||||
#else
 | 
			
		||||
    __asm__ __volatile__(
 | 
			
		||||
			 "xchgl %0, %1\n"
 | 
			
		||||
			 : "=r"(ret), "=m"(*address)
 | 
			
		||||
			 : "0"(1), "m"(*address)
 | 
			
		||||
			 : "memory");
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  } while (ret);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#define BLAS_LOCK_DEFINED
 | 
			
		||||
 | 
			
		||||
static __inline unsigned long long rpcc(void){
 | 
			
		||||
#if defined(_MSC_VER) && !defined(__clang__)
 | 
			
		||||
  return __rdtsc(); // use MSVC intrinsic
 | 
			
		||||
#else
 | 
			
		||||
  unsigned int a, d;
 | 
			
		||||
 | 
			
		||||
  __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
 | 
			
		||||
 | 
			
		||||
  return ((unsigned long long)a + ((unsigned long long)d << 32));
 | 
			
		||||
#endif
 | 
			
		||||
};
 | 
			
		||||
#define RPCC_DEFINED
 | 
			
		||||
 | 
			
		||||
static __inline unsigned long getstackaddr(void){
 | 
			
		||||
#if defined(_MSC_VER) && !defined(__clang__)
 | 
			
		||||
  return (unsigned long)_ReturnAddress(); // use MSVC intrinsic
 | 
			
		||||
#else
 | 
			
		||||
  unsigned long addr;
 | 
			
		||||
 | 
			
		||||
  __asm__ __volatile__ ("mov %%esp, %0"
 | 
			
		||||
			 : "=r"(addr) : : "memory");
 | 
			
		||||
 | 
			
		||||
  return addr;
 | 
			
		||||
#endif
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static __inline long double sqrt_long(long double val) {
 | 
			
		||||
#if defined(_MSC_VER) && !defined(__clang__)
 | 
			
		||||
  return sqrt(val); // not sure if this will use fsqrt
 | 
			
		||||
#else
 | 
			
		||||
  long double result;
 | 
			
		||||
 | 
			
		||||
  __asm__ __volatile__ ("fldt %1\n"
 | 
			
		||||
		    "fsqrt\n"
 | 
			
		||||
		    "fstpt %0\n" : "=m" (result) : "m"(val));
 | 
			
		||||
  return result;
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define SQRT(a)  sqrt_long(a)
 | 
			
		||||
| 
						 | 
				
			
			@ -100,7 +126,7 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
 | 
			
		|||
 | 
			
		||||
#define WHEREAMI
 | 
			
		||||
 | 
			
		||||
static inline int WhereAmI(void){
 | 
			
		||||
static __inline int WhereAmI(void){
 | 
			
		||||
  int eax, ebx, ecx, edx;
 | 
			
		||||
  int apicid;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -146,9 +172,14 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
 | 
			
		|||
 | 
			
		||||
  y = blas_quick_divide_table[y];
 | 
			
		||||
 | 
			
		||||
#if defined(_MSC_VER) && !defined(__clang__)
 | 
			
		||||
  (void*)result;
 | 
			
		||||
  return x*y;
 | 
			
		||||
#else
 | 
			
		||||
  __asm__ __volatile__  ("mull %0" :"=d" (result) :"a"(x), "0" (y));
 | 
			
		||||
 | 
			
		||||
  return result;
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -171,7 +202,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
 | 
			
		|||
#define MMXSTORE	movd
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
 | 
			
		||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
 | 
			
		||||
//Enable some optimazation for barcelona.
 | 
			
		||||
#define BARCELONA_OPTIMIZATION
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			@ -284,8 +315,12 @@ REALNAME:
 | 
			
		|||
 | 
			
		||||
#define PROFCODE
 | 
			
		||||
 | 
			
		||||
#ifdef __clang__
 | 
			
		||||
#define EPILOGUE .end
 | 
			
		||||
#else
 | 
			
		||||
#define EPILOGUE .end	 REALNAME
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__)
 | 
			
		||||
#define PROLOGUE \
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -41,6 +41,10 @@
 | 
			
		|||
 | 
			
		||||
#ifndef ASSEMBLER
 | 
			
		||||
 | 
			
		||||
#ifdef C_MSVC
 | 
			
		||||
#include <intrin.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef C_SUN
 | 
			
		||||
#define	__asm__ __asm
 | 
			
		||||
#define	__volatile__
 | 
			
		||||
| 
						 | 
				
			
			@ -61,30 +65,45 @@
 | 
			
		|||
 | 
			
		||||
static void __inline blas_lock(volatile BLASULONG *address){
 | 
			
		||||
 | 
			
		||||
#ifndef C_MSVC
 | 
			
		||||
  int ret;
 | 
			
		||||
#else
 | 
			
		||||
  BLASULONG ret;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  do {
 | 
			
		||||
    while (*address) {YIELDING;};
 | 
			
		||||
 | 
			
		||||
#ifndef C_MSVC
 | 
			
		||||
    __asm__ __volatile__(
 | 
			
		||||
			 "xchgl %0, %1\n"
 | 
			
		||||
			 : "=r"(ret), "=m"(*address)
 | 
			
		||||
			 : "0"(1), "m"(*address)
 | 
			
		||||
			 : "memory");
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
    ret=InterlockedExchange64((volatile LONG64 *)(address), 1);
 | 
			
		||||
#endif
 | 
			
		||||
  } while (ret);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#define BLAS_LOCK_DEFINED
 | 
			
		||||
 | 
			
		||||
static __inline BLASULONG rpcc(void){
 | 
			
		||||
#ifdef C_MSVC
 | 
			
		||||
  return __rdtsc();
 | 
			
		||||
#else
 | 
			
		||||
  BLASULONG a, d;
 | 
			
		||||
 | 
			
		||||
  __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
 | 
			
		||||
 | 
			
		||||
  return ((BLASULONG)a + ((BLASULONG)d << 32));
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
#define RPCC_DEFINED
 | 
			
		||||
 | 
			
		||||
#define RPCC64BIT
 | 
			
		||||
 | 
			
		||||
#ifndef C_MSVC
 | 
			
		||||
static __inline BLASULONG getstackaddr(void){
 | 
			
		||||
  BLASULONG addr;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -93,22 +112,32 @@ static __inline BLASULONG getstackaddr(void){
 | 
			
		|||
 | 
			
		||||
  return addr;
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
 | 
			
		||||
 | 
			
		||||
#ifdef C_MSVC
 | 
			
		||||
  int cpuinfo[4];
 | 
			
		||||
  __cpuid(cpuinfo, op);
 | 
			
		||||
  *eax=cpuinfo[0];
 | 
			
		||||
  *ebx=cpuinfo[1];
 | 
			
		||||
  *ecx=cpuinfo[2];
 | 
			
		||||
  *edx=cpuinfo[3];
 | 
			
		||||
#else
 | 
			
		||||
        __asm__ __volatile__("cpuid"
 | 
			
		||||
			     : "=a" (*eax),
 | 
			
		||||
			     "=b" (*ebx),
 | 
			
		||||
			     "=c" (*ecx),
 | 
			
		||||
			     "=d" (*edx)
 | 
			
		||||
			     : "0" (op));
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
#define WHEREAMI
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
static inline int WhereAmI(void){
 | 
			
		||||
static __inline int WhereAmI(void){
 | 
			
		||||
  int eax, ebx, ecx, edx;
 | 
			
		||||
  int apicid;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -150,10 +179,14 @@ static inline int WhereAmI(void){
 | 
			
		|||
#define GET_IMAGE_CANCEL
 | 
			
		||||
 | 
			
		||||
#ifdef SMP
 | 
			
		||||
#ifdef USE64BITINT
 | 
			
		||||
#if defined(USE64BITINT)
 | 
			
		||||
static __inline blasint blas_quickdivide(blasint x, blasint y){
 | 
			
		||||
  return x / y;
 | 
			
		||||
}
 | 
			
		||||
#elif defined (C_MSVC)
 | 
			
		||||
static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){
 | 
			
		||||
  return x / y;
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
extern unsigned int blas_quick_divide_table[];
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -226,7 +259,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
 | 
			
		|||
 | 
			
		||||
#ifdef ASSEMBLER
 | 
			
		||||
 | 
			
		||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
 | 
			
		||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
 | 
			
		||||
//Enable some optimazation for barcelona.
 | 
			
		||||
#define BARCELONA_OPTIMIZATION
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										18
									
								
								common_z.h
								
								
								
								
							
							
						
						
									
										18
									
								
								common_z.h
								
								
								
								
							| 
						 | 
				
			
			@ -220,6 +220,15 @@
 | 
			
		|||
#define ZOMATCOPY_K_CTC         zomatcopy_k_ctc
 | 
			
		||||
#define ZOMATCOPY_K_RTC         zomatcopy_k_rtc
 | 
			
		||||
 | 
			
		||||
#define ZIMATCOPY_K_CN          zimatcopy_k_cn
 | 
			
		||||
#define ZIMATCOPY_K_RN          zimatcopy_k_rn
 | 
			
		||||
#define ZIMATCOPY_K_CT          zimatcopy_k_ct
 | 
			
		||||
#define ZIMATCOPY_K_RT          zimatcopy_k_rt
 | 
			
		||||
#define ZIMATCOPY_K_CNC         zimatcopy_k_cnc
 | 
			
		||||
#define ZIMATCOPY_K_RNC         zimatcopy_k_rnc
 | 
			
		||||
#define ZIMATCOPY_K_CTC         zimatcopy_k_ctc
 | 
			
		||||
#define ZIMATCOPY_K_RTC         zimatcopy_k_rtc
 | 
			
		||||
 | 
			
		||||
#define ZGEADD_K                zgeadd_k 
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
| 
						 | 
				
			
			@ -404,6 +413,15 @@
 | 
			
		|||
#define ZOMATCOPY_K_CTC         gotoblas -> zomatcopy_k_ctc
 | 
			
		||||
#define ZOMATCOPY_K_RTC         gotoblas -> zomatcopy_k_rtc
 | 
			
		||||
 | 
			
		||||
#define ZIMATCOPY_K_CN          gotoblas -> zimatcopy_k_cn
 | 
			
		||||
#define ZIMATCOPY_K_RN          gotoblas -> zimatcopy_k_rn
 | 
			
		||||
#define ZIMATCOPY_K_CT          gotoblas -> zimatcopy_k_ct
 | 
			
		||||
#define ZIMATCOPY_K_RT          gotoblas -> zimatcopy_k_rt
 | 
			
		||||
#define ZIMATCOPY_K_CNC         gotoblas -> zimatcopy_k_cnc
 | 
			
		||||
#define ZIMATCOPY_K_RNC         gotoblas -> zimatcopy_k_rnc
 | 
			
		||||
#define ZIMATCOPY_K_CTC         gotoblas -> zimatcopy_k_ctc
 | 
			
		||||
#define ZIMATCOPY_K_RTC         gotoblas -> zimatcopy_k_rtc
 | 
			
		||||
 | 
			
		||||
#define ZGEADD_K                gotoblas -> zgeadd_k
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										8
									
								
								cpuid.h
								
								
								
								
							
							
						
						
									
										8
									
								
								cpuid.h
								
								
								
								
							| 
						 | 
				
			
			@ -39,6 +39,10 @@
 | 
			
		|||
#ifndef CPUID_H
 | 
			
		||||
#define CPUID_H
 | 
			
		||||
 | 
			
		||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
 | 
			
		||||
#define INTEL_AMD
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define VENDOR_INTEL      1
 | 
			
		||||
#define VENDOR_UMC        2
 | 
			
		||||
#define VENDOR_AMD        3
 | 
			
		||||
| 
						 | 
				
			
			@ -59,7 +63,7 @@
 | 
			
		|||
#define FAMILY_PM     7
 | 
			
		||||
#define FAMILY_IA64   8
 | 
			
		||||
 | 
			
		||||
#if defined(__i386__) || defined(__x86_64__)
 | 
			
		||||
#ifdef INTEL_AMD
 | 
			
		||||
#define GET_EXFAMILY  1
 | 
			
		||||
#define GET_EXMODEL   2
 | 
			
		||||
#define GET_TYPE      3
 | 
			
		||||
| 
						 | 
				
			
			@ -109,6 +113,7 @@
 | 
			
		|||
#define CORE_PILEDRIVER  23
 | 
			
		||||
#define CORE_HASWELL     24
 | 
			
		||||
#define CORE_STEAMROLLER 25
 | 
			
		||||
#define CORE_EXCAVATOR   26
 | 
			
		||||
 | 
			
		||||
#define HAVE_SSE      (1 <<  0)
 | 
			
		||||
#define HAVE_SSE2     (1 <<  1)
 | 
			
		||||
| 
						 | 
				
			
			@ -203,5 +208,6 @@ typedef struct {
 | 
			
		|||
#define CPUTYPE_PILEDRIVER              47
 | 
			
		||||
#define CPUTYPE_HASWELL 		48
 | 
			
		||||
#define CPUTYPE_STEAMROLLER 		49
 | 
			
		||||
#define CPUTYPE_EXCAVATOR 		50
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -192,6 +192,7 @@ void get_cpuconfig(void)
 | 
			
		|||
	{
 | 
			
		||||
	       case CPU_CORTEXA9:
 | 
			
		||||
    			printf("#define CORTEXA9\n");
 | 
			
		||||
    			printf("#define ARMV7\n");
 | 
			
		||||
    			printf("#define HAVE_VFP\n");
 | 
			
		||||
    			printf("#define HAVE_VFPV3\n");
 | 
			
		||||
			if ( get_feature("neon"))	printf("#define HAVE_NEON\n");
 | 
			
		||||
| 
						 | 
				
			
			@ -207,6 +208,7 @@ void get_cpuconfig(void)
 | 
			
		|||
 | 
			
		||||
	       case CPU_CORTEXA15:
 | 
			
		||||
    			printf("#define CORTEXA15\n");
 | 
			
		||||
    			printf("#define ARMV7\n");
 | 
			
		||||
    			printf("#define HAVE_VFP\n");
 | 
			
		||||
    			printf("#define HAVE_VFPV3\n");
 | 
			
		||||
			if ( get_feature("neon"))	printf("#define HAVE_NEON\n");
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -115,6 +115,7 @@ int detect(void){
 | 
			
		|||
  if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
 | 
			
		||||
  if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
 | 
			
		||||
  if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
 | 
			
		||||
  if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER6;
 | 
			
		||||
  if (!strncasecmp(p, "Cell",   4)) return CPUTYPE_CELL;
 | 
			
		||||
  if (!strncasecmp(p, "7447",   4)) return CPUTYPE_PPCG4;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										169
									
								
								cpuid_x86.c
								
								
								
								
							
							
						
						
									
										169
									
								
								cpuid_x86.c
								
								
								
								
							| 
						 | 
				
			
			@ -40,6 +40,12 @@
 | 
			
		|||
#include <string.h>
 | 
			
		||||
#include "cpuid.h"
 | 
			
		||||
 | 
			
		||||
#if defined(_MSC_VER) && !defined(__clang__)
 | 
			
		||||
#define C_INLINE __inline
 | 
			
		||||
#else
 | 
			
		||||
#define C_INLINE inline
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
#ifdef NO_AVX
 | 
			
		||||
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
 | 
			
		||||
| 
						 | 
				
			
			@ -53,12 +59,26 @@
 | 
			
		|||
#endif
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#if defined(_MSC_VER) && !defined(__clang__)
 | 
			
		||||
 | 
			
		||||
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
 | 
			
		||||
{
 | 
			
		||||
  int cpuInfo[4] = {-1};
 | 
			
		||||
  __cpuid(cpuInfo, op);
 | 
			
		||||
  *eax = cpuInfo[0];
 | 
			
		||||
  *ebx = cpuInfo[1];
 | 
			
		||||
  *ecx = cpuInfo[2];
 | 
			
		||||
  *edx = cpuInfo[3];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
#ifndef CPUIDEMU
 | 
			
		||||
 | 
			
		||||
#if defined(__APPLE__) && defined(__i386__)
 | 
			
		||||
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
 | 
			
		||||
#else
 | 
			
		||||
static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
 | 
			
		||||
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
 | 
			
		||||
#if defined(__i386__) && defined(__PIC__)
 | 
			
		||||
  __asm__ __volatile__
 | 
			
		||||
    ("mov %%ebx, %%edi;"
 | 
			
		||||
| 
						 | 
				
			
			@ -115,14 +135,16 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
 | 
			
		|||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static inline int have_cpuid(void){
 | 
			
		||||
#endif // _MSC_VER
 | 
			
		||||
 | 
			
		||||
static C_INLINE int have_cpuid(void){
 | 
			
		||||
  int eax, ebx, ecx, edx;
 | 
			
		||||
 | 
			
		||||
  cpuid(0, &eax, &ebx, &ecx, &edx);
 | 
			
		||||
  return eax;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int have_excpuid(void){
 | 
			
		||||
static C_INLINE int have_excpuid(void){
 | 
			
		||||
  int eax, ebx, ecx, edx;
 | 
			
		||||
 | 
			
		||||
  cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
 | 
			
		||||
| 
						 | 
				
			
			@ -130,10 +152,14 @@ static inline int have_excpuid(void){
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
#ifndef NO_AVX
 | 
			
		||||
static inline void xgetbv(int op, int * eax, int * edx){
 | 
			
		||||
static C_INLINE void xgetbv(int op, int * eax, int * edx){
 | 
			
		||||
  //Use binary code for xgetbv
 | 
			
		||||
#if defined(_MSC_VER) && !defined(__clang__)
 | 
			
		||||
  *eax = __xgetbv(op);
 | 
			
		||||
#else
 | 
			
		||||
  __asm__ __volatile__
 | 
			
		||||
    (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1098,6 +1124,16 @@ int get_cpuname(void){
 | 
			
		|||
            return CPUTYPE_HASWELL;
 | 
			
		||||
#else
 | 
			
		||||
	    return CPUTYPE_SANDYBRIDGE;
 | 
			
		||||
#endif
 | 
			
		||||
          else
 | 
			
		||||
	    return CPUTYPE_NEHALEM;
 | 
			
		||||
	case 13:
 | 
			
		||||
	  //Broadwell
 | 
			
		||||
          if(support_avx())
 | 
			
		||||
#ifndef NO_AVX2
 | 
			
		||||
            return CPUTYPE_HASWELL;
 | 
			
		||||
#else
 | 
			
		||||
	    return CPUTYPE_SANDYBRIDGE;
 | 
			
		||||
#endif
 | 
			
		||||
          else
 | 
			
		||||
	    return CPUTYPE_NEHALEM;
 | 
			
		||||
| 
						 | 
				
			
			@ -1112,6 +1148,52 @@ int get_cpuname(void){
 | 
			
		|||
            return CPUTYPE_HASWELL;
 | 
			
		||||
#else
 | 
			
		||||
	    return CPUTYPE_SANDYBRIDGE;
 | 
			
		||||
#endif
 | 
			
		||||
          else
 | 
			
		||||
	    return CPUTYPE_NEHALEM;
 | 
			
		||||
	case 7:
 | 
			
		||||
	case 15:
 | 
			
		||||
	  //Broadwell
 | 
			
		||||
          if(support_avx())
 | 
			
		||||
#ifndef NO_AVX2
 | 
			
		||||
            return CPUTYPE_HASWELL;
 | 
			
		||||
#else
 | 
			
		||||
	    return CPUTYPE_SANDYBRIDGE;
 | 
			
		||||
#endif
 | 
			
		||||
          else
 | 
			
		||||
	    return CPUTYPE_NEHALEM;
 | 
			
		||||
	case 14:
 | 
			
		||||
	  //Skylake
 | 
			
		||||
          if(support_avx())
 | 
			
		||||
#ifndef NO_AVX2
 | 
			
		||||
            return CPUTYPE_HASWELL;
 | 
			
		||||
#else
 | 
			
		||||
	    return CPUTYPE_SANDYBRIDGE;
 | 
			
		||||
#endif
 | 
			
		||||
          else
 | 
			
		||||
	    return CPUTYPE_NEHALEM;
 | 
			
		||||
        }
 | 
			
		||||
        break;
 | 
			
		||||
      case 5:
 | 
			
		||||
        switch (model) {
 | 
			
		||||
	case 6:
 | 
			
		||||
	  //Broadwell
 | 
			
		||||
          if(support_avx())
 | 
			
		||||
#ifndef NO_AVX2
 | 
			
		||||
            return CPUTYPE_HASWELL;
 | 
			
		||||
#else
 | 
			
		||||
	    return CPUTYPE_SANDYBRIDGE;
 | 
			
		||||
#endif
 | 
			
		||||
          else
 | 
			
		||||
	    return CPUTYPE_NEHALEM;
 | 
			
		||||
	case 5:
 | 
			
		||||
        case 14:
 | 
			
		||||
	  // Skylake
 | 
			
		||||
          if(support_avx())
 | 
			
		||||
#ifndef NO_AVX2
 | 
			
		||||
            return CPUTYPE_HASWELL;
 | 
			
		||||
#else
 | 
			
		||||
	    return CPUTYPE_SANDYBRIDGE;
 | 
			
		||||
#endif
 | 
			
		||||
          else
 | 
			
		||||
	    return CPUTYPE_NEHALEM;
 | 
			
		||||
| 
						 | 
				
			
			@ -1163,11 +1245,20 @@ int get_cpuname(void){
 | 
			
		|||
	  else
 | 
			
		||||
	    return CPUTYPE_BARCELONA; //OS don't support AVX.
 | 
			
		||||
	case 0:
 | 
			
		||||
	  switch(exmodel){
 | 
			
		||||
	  case 3:
 | 
			
		||||
	    if(support_avx())
 | 
			
		||||
	      return CPUTYPE_STEAMROLLER;
 | 
			
		||||
	    else
 | 
			
		||||
	      return CPUTYPE_BARCELONA; //OS don't support AVX.
 | 
			
		||||
 | 
			
		||||
	  case 6:
 | 
			
		||||
	    if(support_avx())
 | 
			
		||||
	      return CPUTYPE_EXCAVATOR;
 | 
			
		||||
	    else
 | 
			
		||||
	      return CPUTYPE_BARCELONA; //OS don't support AVX.
 | 
			
		||||
	  }
 | 
			
		||||
	  break;
 | 
			
		||||
	}
 | 
			
		||||
	break;
 | 
			
		||||
      case  5:
 | 
			
		||||
| 
						 | 
				
			
			@ -1297,6 +1388,7 @@ static char *cpuname[] = {
 | 
			
		|||
  "PILEDRIVER",
 | 
			
		||||
  "HASWELL",
 | 
			
		||||
  "STEAMROLLER",
 | 
			
		||||
  "EXCAVATOR",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static char *lowercpuname[] = {
 | 
			
		||||
| 
						 | 
				
			
			@ -1349,6 +1441,7 @@ static char *lowercpuname[] = {
 | 
			
		|||
  "piledriver",
 | 
			
		||||
  "haswell",
 | 
			
		||||
  "steamroller",
 | 
			
		||||
  "excavator",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static char *corename[] = {
 | 
			
		||||
| 
						 | 
				
			
			@ -1378,6 +1471,7 @@ static char *corename[] = {
 | 
			
		|||
  "PILEDRIVER",
 | 
			
		||||
  "HASWELL",
 | 
			
		||||
  "STEAMROLLER",
 | 
			
		||||
  "EXCAVATOR",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static char *corename_lower[] = {
 | 
			
		||||
| 
						 | 
				
			
			@ -1407,6 +1501,7 @@ static char *corename_lower[] = {
 | 
			
		|||
  "piledriver",
 | 
			
		||||
  "haswell",
 | 
			
		||||
  "steamroller",
 | 
			
		||||
  "excavator",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1525,6 +1620,16 @@ int get_coretype(void){
 | 
			
		|||
            return CORE_HASWELL;
 | 
			
		||||
#else
 | 
			
		||||
	    return CORE_SANDYBRIDGE;
 | 
			
		||||
#endif
 | 
			
		||||
          else
 | 
			
		||||
	    return CORE_NEHALEM;
 | 
			
		||||
	case 13:
 | 
			
		||||
	  //broadwell
 | 
			
		||||
          if(support_avx())
 | 
			
		||||
#ifndef NO_AVX2
 | 
			
		||||
            return CORE_HASWELL;
 | 
			
		||||
#else
 | 
			
		||||
	    return CORE_SANDYBRIDGE;
 | 
			
		||||
#endif
 | 
			
		||||
          else
 | 
			
		||||
	    return CORE_NEHALEM;
 | 
			
		||||
| 
						 | 
				
			
			@ -1539,6 +1644,52 @@ int get_coretype(void){
 | 
			
		|||
            return CORE_HASWELL;
 | 
			
		||||
#else
 | 
			
		||||
	    return CORE_SANDYBRIDGE;
 | 
			
		||||
#endif
 | 
			
		||||
          else
 | 
			
		||||
	    return CORE_NEHALEM;
 | 
			
		||||
	case 7:
 | 
			
		||||
	case 15:
 | 
			
		||||
	  //broadwell
 | 
			
		||||
          if(support_avx())
 | 
			
		||||
#ifndef NO_AVX2
 | 
			
		||||
            return CORE_HASWELL;
 | 
			
		||||
#else
 | 
			
		||||
	    return CORE_SANDYBRIDGE;
 | 
			
		||||
#endif
 | 
			
		||||
          else
 | 
			
		||||
	    return CORE_NEHALEM;
 | 
			
		||||
	case 14:
 | 
			
		||||
	  //Skylake
 | 
			
		||||
          if(support_avx())
 | 
			
		||||
#ifndef NO_AVX2
 | 
			
		||||
            return CORE_HASWELL;
 | 
			
		||||
#else
 | 
			
		||||
	    return CORE_SANDYBRIDGE;
 | 
			
		||||
#endif
 | 
			
		||||
          else
 | 
			
		||||
	    return CORE_NEHALEM;
 | 
			
		||||
        }
 | 
			
		||||
        break;
 | 
			
		||||
      case 5:
 | 
			
		||||
        switch (model) {
 | 
			
		||||
	case 6:
 | 
			
		||||
	  //broadwell
 | 
			
		||||
          if(support_avx())
 | 
			
		||||
#ifndef NO_AVX2
 | 
			
		||||
            return CORE_HASWELL;
 | 
			
		||||
#else
 | 
			
		||||
	    return CORE_SANDYBRIDGE;
 | 
			
		||||
#endif
 | 
			
		||||
          else
 | 
			
		||||
	    return CORE_NEHALEM;
 | 
			
		||||
	case 5:
 | 
			
		||||
	case 14:
 | 
			
		||||
	  // Skylake
 | 
			
		||||
          if(support_avx())
 | 
			
		||||
#ifndef NO_AVX2
 | 
			
		||||
            return CORE_HASWELL;
 | 
			
		||||
#else
 | 
			
		||||
	    return CORE_SANDYBRIDGE;
 | 
			
		||||
#endif
 | 
			
		||||
          else
 | 
			
		||||
	    return CORE_NEHALEM;
 | 
			
		||||
| 
						 | 
				
			
			@ -1574,10 +1725,20 @@ int get_coretype(void){
 | 
			
		|||
	    return CORE_BARCELONA; //OS don't support AVX.
 | 
			
		||||
	
 | 
			
		||||
	case 0:
 | 
			
		||||
	  switch(exmodel){
 | 
			
		||||
	  case 3:
 | 
			
		||||
	    if(support_avx())
 | 
			
		||||
	      return CORE_STEAMROLLER;
 | 
			
		||||
	    else
 | 
			
		||||
	      return CORE_BARCELONA; //OS don't support AVX.
 | 
			
		||||
 | 
			
		||||
	  case 6:
 | 
			
		||||
	    if(support_avx())
 | 
			
		||||
	      return CORE_EXCAVATOR;
 | 
			
		||||
	    else
 | 
			
		||||
	      return CORE_BARCELONA; //OS don't support AVX.
 | 
			
		||||
	  }
 | 
			
		||||
	  break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										4
									
								
								ctest.c
								
								
								
								
							
							
						
						
									
										4
									
								
								ctest.c
								
								
								
								
							| 
						 | 
				
			
			@ -44,6 +44,10 @@ COMPILER_DEC
 | 
			
		|||
COMPILER_GNU
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(__ANDROID__)
 | 
			
		||||
OS_ANDROID
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(__linux__)
 | 
			
		||||
OS_LINUX
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,46 @@
 | 
			
		|||
include_directories(${CMAKE_SOURCE_DIR})
 | 
			
		||||
 | 
			
		||||
enable_language(Fortran)
 | 
			
		||||
 | 
			
		||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DADD${BU} -DCBLAS")
 | 
			
		||||
 | 
			
		||||
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh
 | 
			
		||||
"$1 < $2\n"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
foreach(float_type ${FLOAT_TYPES})
 | 
			
		||||
  string(SUBSTRING ${float_type} 0 1 float_char_upper)
 | 
			
		||||
  string(TOLOWER ${float_char_upper} float_char)
 | 
			
		||||
  #level1
 | 
			
		||||
  add_executable(x${float_char}cblat1
 | 
			
		||||
    c_${float_char}blat1.f
 | 
			
		||||
    c_${float_char}blas1.c)
 | 
			
		||||
  target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}_static)
 | 
			
		||||
  add_test(NAME "x${float_char}cblat1"
 | 
			
		||||
    COMMAND "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat1")
 | 
			
		||||
 | 
			
		||||
  #level2
 | 
			
		||||
  add_executable(x${float_char}cblat2
 | 
			
		||||
    c_${float_char}blat2.f
 | 
			
		||||
    c_${float_char}blas2.c
 | 
			
		||||
    c_${float_char}2chke.c
 | 
			
		||||
    auxiliary.c
 | 
			
		||||
    c_xerbla.c
 | 
			
		||||
    constant.c)
 | 
			
		||||
  target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}_static)
 | 
			
		||||
  add_test(NAME "x${float_char}cblat2"
 | 
			
		||||
    COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat2" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2")
 | 
			
		||||
 | 
			
		||||
  #level3
 | 
			
		||||
  add_executable(x${float_char}cblat3
 | 
			
		||||
    c_${float_char}blat3.f
 | 
			
		||||
    c_${float_char}blas3.c
 | 
			
		||||
    c_${float_char}3chke.c
 | 
			
		||||
    auxiliary.c
 | 
			
		||||
    c_xerbla.c
 | 
			
		||||
    constant.c)
 | 
			
		||||
  target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}_static)
 | 
			
		||||
  add_test(NAME "x${float_char}cblat3"
 | 
			
		||||
    COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat3" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3")
 | 
			
		||||
 | 
			
		||||
endforeach()
 | 
			
		||||
| 
						 | 
				
			
			@ -27,12 +27,18 @@ ctestl2o = c_cblas2.o c_c2chke.o auxiliary.o c_xerbla.o constant.o
 | 
			
		|||
 | 
			
		||||
ctestl3o = c_cblas3.o c_c3chke.o auxiliary.o c_xerbla.o constant.o
 | 
			
		||||
 | 
			
		||||
ctestl3o_3m = c_cblas3_3m.o c_c3chke_3m.o auxiliary.o c_xerbla.o constant.o
 | 
			
		||||
 | 
			
		||||
ztestl1o = c_zblas1.o
 | 
			
		||||
 | 
			
		||||
ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o constant.o
 | 
			
		||||
 | 
			
		||||
ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o constant.o
 | 
			
		||||
 | 
			
		||||
ztestl3o_3m = c_zblas3_3m.o c_z3chke_3m.o auxiliary.o c_xerbla.o constant.o
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
all :: all1 all2 all3
 | 
			
		||||
 | 
			
		||||
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
 | 
			
		||||
| 
						 | 
				
			
			@ -115,8 +121,8 @@ xccblat2: $(ctestl2o) c_cblat2.o $(TOPDIR)/$(LIBNAME)
 | 
			
		|||
xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME)
 | 
			
		||||
	$(FC) $(FLDFLAGS) -o xccblat3 c_cblat3.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
 | 
			
		||||
 | 
			
		||||
xccblat3_3m: $(ctestl3o) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
 | 
			
		||||
	$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
 | 
			
		||||
xccblat3_3m: $(ctestl3o_3m) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
 | 
			
		||||
	$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
 | 
			
		||||
 | 
			
		||||
# Double complex
 | 
			
		||||
xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME)
 | 
			
		||||
| 
						 | 
				
			
			@ -127,8 +133,8 @@ xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME)
 | 
			
		|||
	$(FC) $(FLDFLAGS) -o xzcblat3 c_zblat3.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
xzcblat3_3m: $(ztestl3o) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
 | 
			
		||||
	$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
 | 
			
		||||
xzcblat3_3m: $(ztestl3o_3m) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
 | 
			
		||||
	$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
include $(TOPDIR)/Makefile.tail
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										230
									
								
								ctest/c_c3chke.c
								
								
								
								
							
							
						
						
									
										230
									
								
								ctest/c_c3chke.c
								
								
								
								
							| 
						 | 
				
			
			@ -46,235 +46,7 @@ void  F77_c3chke(char *  rout) {
 | 
			
		|||
   }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   if (strncmp( sf,"cblas_cgemm3m"   ,13)==0) {
 | 
			
		||||
      cblas_rout = "cblas_cgemm3"   ;
 | 
			
		||||
 | 
			
		||||
      cblas_info = 1;
 | 
			
		||||
      cblas_cgemm3m( INVALID,  CblasNoTrans, CblasNoTrans, 0, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 1;
 | 
			
		||||
      cblas_cgemm3m( INVALID,  CblasNoTrans, CblasTrans, 0, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 1;
 | 
			
		||||
      cblas_cgemm3m( INVALID,  CblasTrans, CblasNoTrans, 0, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 1;
 | 
			
		||||
      cblas_cgemm3m( INVALID,  CblasTrans, CblasTrans, 0, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 2; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  INVALID, CblasNoTrans, 0, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 2; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  INVALID, CblasTrans, 0, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 3; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, INVALID, 0, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 3; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, INVALID, 0, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 4; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 4; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasTrans, INVALID, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 4; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasNoTrans, INVALID, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 4; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasTrans, INVALID, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 5; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 5; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasTrans, 0, INVALID, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 5; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasNoTrans, 0, INVALID, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 5; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasTrans, 0, INVALID, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 6; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 6; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasTrans, 0, 0, INVALID,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 6; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasNoTrans, 0, 0, INVALID,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 6; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasTrans, 0, 0, INVALID,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 9; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasNoTrans, 2, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 2 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 9; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasTrans, 2, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 2 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 9; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasNoTrans, 0, 0, 2,
 | 
			
		||||
                   ALPHA, A, 1, B, 2, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 9; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasTrans, 0, 0, 2,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 11; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasNoTrans, 0, 0, 2,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 11; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasNoTrans, 0, 0, 2,
 | 
			
		||||
                   ALPHA, A, 2, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 11; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasTrans, 0, 2, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 11; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasTrans, 0, 2, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 14; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasNoTrans, 2, 0, 0,
 | 
			
		||||
                   ALPHA, A, 2, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 14; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasTrans, 2, 0, 0,
 | 
			
		||||
                   ALPHA, A, 2, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 14; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasNoTrans, 2, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 14; RowMajorStrg = FALSE;
 | 
			
		||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasTrans, 2, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 4; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 4; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasTrans, INVALID, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 4; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasNoTrans, INVALID, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 4; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasTrans, INVALID, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 5; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 5; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasTrans, 0, INVALID, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 5; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasNoTrans, 0, INVALID, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 5; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasTrans, 0, INVALID, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 6; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 6; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasTrans, 0, 0, INVALID,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 6; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasNoTrans, 0, 0, INVALID,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 6; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasTrans, 0, 0, INVALID,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 9;  RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasNoTrans, 0, 0, 2,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 2 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 9; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasTrans, 0, 0, 2,
 | 
			
		||||
                   ALPHA, A, 1, B, 2, BETA, C, 2 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 9; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasNoTrans, 2, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 2, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 9; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasTrans, 2, 0, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 11; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasNoTrans, 0, 2, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 11; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasNoTrans, 0, 2, 0,
 | 
			
		||||
                   ALPHA, A, 2, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 11; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasTrans, 0, 0, 2,
 | 
			
		||||
                   ALPHA, A, 2, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 11; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasTrans, 0, 0, 2,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 14; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasNoTrans, 0, 2, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 2, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 14; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasTrans, 0, 2, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 14; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasNoTrans, 0, 2, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 2, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
      cblas_info = 14; RowMajorStrg = TRUE;
 | 
			
		||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasTrans, 0, 2, 0,
 | 
			
		||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
			
		||||
      chkxer();
 | 
			
		||||
 | 
			
		||||
   } else if (strncmp( sf,"cblas_cgemm"   ,11)==0) {
 | 
			
		||||
   if (strncmp( sf,"cblas_cgemm"   ,11)==0) {
 | 
			
		||||
            cblas_rout = "cblas_cgemm"   ;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| 
						 | 
				
			
			@ -567,81 +567,3 @@ void F77_ctrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void F77_cgemm3m(int *order, char *transpa, char *transpb, int *m, int *n,
 | 
			
		||||
     int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
			
		||||
     CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
 | 
			
		||||
     CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
			
		||||
 | 
			
		||||
  CBLAS_TEST_COMPLEX *A, *B, *C;
 | 
			
		||||
  int i,j,LDA, LDB, LDC;
 | 
			
		||||
  enum CBLAS_TRANSPOSE transa, transb;
 | 
			
		||||
 | 
			
		||||
  get_transpose_type(transpa, &transa);
 | 
			
		||||
  get_transpose_type(transpb, &transb);
 | 
			
		||||
 | 
			
		||||
  if (*order == TEST_ROW_MJR) {
 | 
			
		||||
     if (transa == CblasNoTrans) {
 | 
			
		||||
        LDA = *k+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*m; i++ )
 | 
			
		||||
           for( j=0; j<*k; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     else {
 | 
			
		||||
        LDA = *m+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*k; i++ )
 | 
			
		||||
           for( j=0; j<*m; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
 | 
			
		||||
     if (transb == CblasNoTrans) {
 | 
			
		||||
        LDB = *n+1;
 | 
			
		||||
        B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
 | 
			
		||||
        for( i=0; i<*k; i++ )
 | 
			
		||||
           for( j=0; j<*n; j++ ) {
 | 
			
		||||
              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
			
		||||
              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     else {
 | 
			
		||||
        LDB = *k+1;
 | 
			
		||||
        B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*n; i++ )
 | 
			
		||||
           for( j=0; j<*k; j++ ) {
 | 
			
		||||
              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
			
		||||
              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
 | 
			
		||||
     LDC = *n+1;
 | 
			
		||||
     C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*m; i++ ) {
 | 
			
		||||
           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
			
		||||
           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
			
		||||
        }
 | 
			
		||||
     cblas_cgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
 | 
			
		||||
                  B, LDB, beta, C, LDC );
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*m; i++ ) {
 | 
			
		||||
           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
			
		||||
           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
			
		||||
        }
 | 
			
		||||
     free(A);
 | 
			
		||||
     free(B);
 | 
			
		||||
     free(C);
 | 
			
		||||
  }
 | 
			
		||||
  else if (*order == TEST_COL_MJR)
 | 
			
		||||
     cblas_cgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
 | 
			
		||||
                  b, *ldb, beta, c, *ldc );
 | 
			
		||||
  else
 | 
			
		||||
     cblas_cgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
 | 
			
		||||
                  b, *ldb, beta, c, *ldc );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,647 @@
 | 
			
		|||
/*
 | 
			
		||||
 *     Written by D.P. Manley, Digital Equipment Corporation.
 | 
			
		||||
 *     Prefixed "C_" to BLAS routines and their declarations.
 | 
			
		||||
 *
 | 
			
		||||
 *     Modified by T. H. Do, 4/15/98, SGI/CRAY Research.
 | 
			
		||||
 */
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include "common.h"
 | 
			
		||||
#include "cblas_test.h"
 | 
			
		||||
 | 
			
		||||
#define  TEST_COL_MJR	0
 | 
			
		||||
#define  TEST_ROW_MJR	1
 | 
			
		||||
#define  UNDEFINED     -1
 | 
			
		||||
 | 
			
		||||
void F77_cgemm(int *order, char *transpa, char *transpb, int *m, int *n,
 | 
			
		||||
     int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
			
		||||
     CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
 | 
			
		||||
     CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
			
		||||
 | 
			
		||||
  CBLAS_TEST_COMPLEX *A, *B, *C;
 | 
			
		||||
  int i,j,LDA, LDB, LDC;
 | 
			
		||||
  enum CBLAS_TRANSPOSE transa, transb;
 | 
			
		||||
 | 
			
		||||
  get_transpose_type(transpa, &transa);
 | 
			
		||||
  get_transpose_type(transpb, &transb);
 | 
			
		||||
 | 
			
		||||
  if (*order == TEST_ROW_MJR) {
 | 
			
		||||
     if (transa == CblasNoTrans) {
 | 
			
		||||
        LDA = *k+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*m; i++ )
 | 
			
		||||
           for( j=0; j<*k; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     else {
 | 
			
		||||
        LDA = *m+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*k; i++ )
 | 
			
		||||
           for( j=0; j<*m; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
 | 
			
		||||
     if (transb == CblasNoTrans) {
 | 
			
		||||
        LDB = *n+1;
 | 
			
		||||
        B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
 | 
			
		||||
        for( i=0; i<*k; i++ )
 | 
			
		||||
           for( j=0; j<*n; j++ ) {
 | 
			
		||||
              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
			
		||||
              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     else {
 | 
			
		||||
        LDB = *k+1;
 | 
			
		||||
        B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*n; i++ )
 | 
			
		||||
           for( j=0; j<*k; j++ ) {
 | 
			
		||||
              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
			
		||||
              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
 | 
			
		||||
     LDC = *n+1;
 | 
			
		||||
     C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*m; i++ ) {
 | 
			
		||||
           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
			
		||||
           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
			
		||||
        }
 | 
			
		||||
     cblas_cgemm( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
 | 
			
		||||
                  B, LDB, beta, C, LDC );
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*m; i++ ) {
 | 
			
		||||
           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
			
		||||
           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
			
		||||
        }
 | 
			
		||||
     free(A);
 | 
			
		||||
     free(B);
 | 
			
		||||
     free(C);
 | 
			
		||||
  }
 | 
			
		||||
  else if (*order == TEST_COL_MJR)
 | 
			
		||||
     cblas_cgemm( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
 | 
			
		||||
                  b, *ldb, beta, c, *ldc );
 | 
			
		||||
  else
 | 
			
		||||
     cblas_cgemm( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
 | 
			
		||||
                  b, *ldb, beta, c, *ldc );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void F77_chemm(int *order, char *rtlf, char *uplow, int *m, int *n,
 | 
			
		||||
        CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
			
		||||
	CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
 | 
			
		||||
        CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
			
		||||
 | 
			
		||||
  CBLAS_TEST_COMPLEX *A, *B, *C;
 | 
			
		||||
  int i,j,LDA, LDB, LDC;
 | 
			
		||||
  enum CBLAS_UPLO uplo;
 | 
			
		||||
  enum CBLAS_SIDE side;
 | 
			
		||||
 | 
			
		||||
  get_uplo_type(uplow,&uplo);
 | 
			
		||||
  get_side_type(rtlf,&side);
 | 
			
		||||
 | 
			
		||||
  if (*order == TEST_ROW_MJR) {
 | 
			
		||||
     if (side == CblasLeft) {
 | 
			
		||||
        LDA = *m+1;
 | 
			
		||||
        A= (CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*m; i++ )
 | 
			
		||||
           for( j=0; j<*m; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     else{
 | 
			
		||||
        LDA = *n+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
			
		||||
        for( i=0; i<*n; i++ )
 | 
			
		||||
           for( j=0; j<*n; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     LDB = *n+1;
 | 
			
		||||
     B=(CBLAS_TEST_COMPLEX* )malloc( (*m)*LDB*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
			
		||||
     for( i=0; i<*m; i++ )
 | 
			
		||||
        for( j=0; j<*n; j++ ) {
 | 
			
		||||
           B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
			
		||||
           B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
			
		||||
        }
 | 
			
		||||
     LDC = *n+1;
 | 
			
		||||
     C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*m; i++ ) {
 | 
			
		||||
           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
			
		||||
           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
			
		||||
        }
 | 
			
		||||
     cblas_chemm( CblasRowMajor, side, uplo, *m, *n, alpha, A, LDA, B, LDB,
 | 
			
		||||
                  beta, C, LDC );
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*m; i++ ) {
 | 
			
		||||
           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
			
		||||
           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
			
		||||
        }
 | 
			
		||||
     free(A);
 | 
			
		||||
     free(B);
 | 
			
		||||
     free(C);
 | 
			
		||||
  }
 | 
			
		||||
  else if (*order == TEST_COL_MJR)
 | 
			
		||||
     cblas_chemm( CblasColMajor, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
 | 
			
		||||
                  beta, c, *ldc );
 | 
			
		||||
  else
 | 
			
		||||
     cblas_chemm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
 | 
			
		||||
                  beta, c, *ldc );
 | 
			
		||||
}
 | 
			
		||||
void F77_csymm(int *order, char *rtlf, char *uplow, int *m, int *n,
 | 
			
		||||
          CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
			
		||||
	  CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
 | 
			
		||||
          CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
			
		||||
 | 
			
		||||
  CBLAS_TEST_COMPLEX *A, *B, *C;
 | 
			
		||||
  int i,j,LDA, LDB, LDC;
 | 
			
		||||
  enum CBLAS_UPLO uplo;
 | 
			
		||||
  enum CBLAS_SIDE side;
 | 
			
		||||
 | 
			
		||||
  get_uplo_type(uplow,&uplo);
 | 
			
		||||
  get_side_type(rtlf,&side);
 | 
			
		||||
 | 
			
		||||
  if (*order == TEST_ROW_MJR) {
 | 
			
		||||
     if (side == CblasLeft) {
 | 
			
		||||
        LDA = *m+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*m; i++ )
 | 
			
		||||
           for( j=0; j<*m; j++ )
 | 
			
		||||
              A[i*LDA+j]=a[j*(*lda)+i];
 | 
			
		||||
     }
 | 
			
		||||
     else{
 | 
			
		||||
        LDA = *n+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
			
		||||
        for( i=0; i<*n; i++ )
 | 
			
		||||
           for( j=0; j<*n; j++ )
 | 
			
		||||
              A[i*LDA+j]=a[j*(*lda)+i];
 | 
			
		||||
     }
 | 
			
		||||
     LDB = *n+1;
 | 
			
		||||
     B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX ));
 | 
			
		||||
     for( i=0; i<*m; i++ )
 | 
			
		||||
        for( j=0; j<*n; j++ )
 | 
			
		||||
           B[i*LDB+j]=b[j*(*ldb)+i];
 | 
			
		||||
     LDC = *n+1;
 | 
			
		||||
     C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*m; i++ )
 | 
			
		||||
           C[i*LDC+j]=c[j*(*ldc)+i];
 | 
			
		||||
     cblas_csymm( CblasRowMajor, side, uplo, *m, *n, alpha, A, LDA, B, LDB,
 | 
			
		||||
                  beta, C, LDC );
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*m; i++ )
 | 
			
		||||
           c[j*(*ldc)+i]=C[i*LDC+j];
 | 
			
		||||
     free(A);
 | 
			
		||||
     free(B);
 | 
			
		||||
     free(C);
 | 
			
		||||
  }
 | 
			
		||||
  else if (*order == TEST_COL_MJR)
 | 
			
		||||
     cblas_csymm( CblasColMajor, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
 | 
			
		||||
                  beta, c, *ldc );
 | 
			
		||||
  else
 | 
			
		||||
     cblas_csymm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
 | 
			
		||||
                  beta, c, *ldc );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void F77_cherk(int *order, char *uplow, char *transp, int *n, int *k,
 | 
			
		||||
     float *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
			
		||||
     float *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
			
		||||
 | 
			
		||||
  int i,j,LDA,LDC;
 | 
			
		||||
  CBLAS_TEST_COMPLEX *A, *C;
 | 
			
		||||
  enum CBLAS_UPLO uplo;
 | 
			
		||||
  enum CBLAS_TRANSPOSE trans;
 | 
			
		||||
 | 
			
		||||
  get_uplo_type(uplow,&uplo);
 | 
			
		||||
  get_transpose_type(transp,&trans);
 | 
			
		||||
 | 
			
		||||
  if (*order == TEST_ROW_MJR) {
 | 
			
		||||
     if (trans == CblasNoTrans) {
 | 
			
		||||
        LDA = *k+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
			
		||||
        for( i=0; i<*n; i++ )
 | 
			
		||||
           for( j=0; j<*k; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     else{
 | 
			
		||||
        LDA = *n+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
			
		||||
        for( i=0; i<*k; i++ )
 | 
			
		||||
           for( j=0; j<*n; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     LDC = *n+1;
 | 
			
		||||
     C=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
			
		||||
     for( i=0; i<*n; i++ )
 | 
			
		||||
        for( j=0; j<*n; j++ ) {
 | 
			
		||||
           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
			
		||||
           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
			
		||||
        }
 | 
			
		||||
     cblas_cherk(CblasRowMajor, uplo, trans, *n, *k, *alpha, A, LDA, *beta,
 | 
			
		||||
	         C, LDC );
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*n; i++ ) {
 | 
			
		||||
           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
			
		||||
           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
			
		||||
        }
 | 
			
		||||
     free(A);
 | 
			
		||||
     free(C);
 | 
			
		||||
  }
 | 
			
		||||
  else if (*order == TEST_COL_MJR)
 | 
			
		||||
     cblas_cherk(CblasColMajor, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
 | 
			
		||||
	         c, *ldc );
 | 
			
		||||
  else
 | 
			
		||||
     cblas_cherk(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
 | 
			
		||||
	         c, *ldc );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void F77_csyrk(int *order, char *uplow, char *transp, int *n, int *k,
 | 
			
		||||
     CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
			
		||||
     CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
			
		||||
 | 
			
		||||
  int i,j,LDA,LDC;
 | 
			
		||||
  CBLAS_TEST_COMPLEX *A, *C;
 | 
			
		||||
  enum CBLAS_UPLO uplo;
 | 
			
		||||
  enum CBLAS_TRANSPOSE trans;
 | 
			
		||||
 | 
			
		||||
  get_uplo_type(uplow,&uplo);
 | 
			
		||||
  get_transpose_type(transp,&trans);
 | 
			
		||||
 | 
			
		||||
  if (*order == TEST_ROW_MJR) {
 | 
			
		||||
     if (trans == CblasNoTrans) {
 | 
			
		||||
        LDA = *k+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*n; i++ )
 | 
			
		||||
           for( j=0; j<*k; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     else{
 | 
			
		||||
        LDA = *n+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
			
		||||
        for( i=0; i<*k; i++ )
 | 
			
		||||
           for( j=0; j<*n; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     LDC = *n+1;
 | 
			
		||||
     C=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
			
		||||
     for( i=0; i<*n; i++ )
 | 
			
		||||
        for( j=0; j<*n; j++ ) {
 | 
			
		||||
           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
			
		||||
           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
			
		||||
        }
 | 
			
		||||
     cblas_csyrk(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA, beta,
 | 
			
		||||
	         C, LDC );
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*n; i++ ) {
 | 
			
		||||
           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
			
		||||
           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
			
		||||
        }
 | 
			
		||||
     free(A);
 | 
			
		||||
     free(C);
 | 
			
		||||
  }
 | 
			
		||||
  else if (*order == TEST_COL_MJR)
 | 
			
		||||
     cblas_csyrk(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda, beta,
 | 
			
		||||
	         c, *ldc );
 | 
			
		||||
  else
 | 
			
		||||
     cblas_csyrk(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, beta,
 | 
			
		||||
	         c, *ldc );
 | 
			
		||||
}
 | 
			
		||||
void F77_cher2k(int *order, char *uplow, char *transp, int *n, int *k,
 | 
			
		||||
        CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
			
		||||
	CBLAS_TEST_COMPLEX *b, int *ldb, float *beta,
 | 
			
		||||
        CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
			
		||||
  int i,j,LDA,LDB,LDC;
 | 
			
		||||
  CBLAS_TEST_COMPLEX *A, *B, *C;
 | 
			
		||||
  enum CBLAS_UPLO uplo;
 | 
			
		||||
  enum CBLAS_TRANSPOSE trans;
 | 
			
		||||
 | 
			
		||||
  get_uplo_type(uplow,&uplo);
 | 
			
		||||
  get_transpose_type(transp,&trans);
 | 
			
		||||
 | 
			
		||||
  if (*order == TEST_ROW_MJR) {
 | 
			
		||||
     if (trans == CblasNoTrans) {
 | 
			
		||||
        LDA = *k+1;
 | 
			
		||||
        LDB = *k+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ));
 | 
			
		||||
        B=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDB*sizeof(CBLAS_TEST_COMPLEX ));
 | 
			
		||||
        for( i=0; i<*n; i++ )
 | 
			
		||||
           for( j=0; j<*k; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
			
		||||
              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     else {
 | 
			
		||||
        LDA = *n+1;
 | 
			
		||||
        LDB = *n+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc( LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
			
		||||
        B=(CBLAS_TEST_COMPLEX* )malloc( LDB*(*k)*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
			
		||||
        for( i=0; i<*k; i++ )
 | 
			
		||||
           for( j=0; j<*n; j++ ){
 | 
			
		||||
	      A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
			
		||||
              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     LDC = *n+1;
 | 
			
		||||
     C=(CBLAS_TEST_COMPLEX* )malloc( (*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
			
		||||
     for( i=0; i<*n; i++ )
 | 
			
		||||
        for( j=0; j<*n; j++ ) {
 | 
			
		||||
           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
			
		||||
           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
			
		||||
        }
 | 
			
		||||
     cblas_cher2k(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA,
 | 
			
		||||
		  B, LDB, *beta, C, LDC );
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*n; i++ ) {
 | 
			
		||||
           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
			
		||||
           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
			
		||||
        }
 | 
			
		||||
     free(A);
 | 
			
		||||
     free(B);
 | 
			
		||||
     free(C);
 | 
			
		||||
  }
 | 
			
		||||
  else if (*order == TEST_COL_MJR)
 | 
			
		||||
     cblas_cher2k(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda,
 | 
			
		||||
		   b, *ldb, *beta, c, *ldc );
 | 
			
		||||
  else
 | 
			
		||||
     cblas_cher2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda,
 | 
			
		||||
		   b, *ldb, *beta, c, *ldc );
 | 
			
		||||
}
 | 
			
		||||
void F77_csyr2k(int *order, char *uplow, char *transp, int *n, int *k,
 | 
			
		||||
         CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
			
		||||
	 CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
 | 
			
		||||
         CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
			
		||||
  int i,j,LDA,LDB,LDC;
 | 
			
		||||
  CBLAS_TEST_COMPLEX *A, *B, *C;
 | 
			
		||||
  enum CBLAS_UPLO uplo;
 | 
			
		||||
  enum CBLAS_TRANSPOSE trans;
 | 
			
		||||
 | 
			
		||||
  get_uplo_type(uplow,&uplo);
 | 
			
		||||
  get_transpose_type(transp,&trans);
 | 
			
		||||
 | 
			
		||||
  if (*order == TEST_ROW_MJR) {
 | 
			
		||||
     if (trans == CblasNoTrans) {
 | 
			
		||||
        LDA = *k+1;
 | 
			
		||||
        LDB = *k+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        B=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDB*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*n; i++ )
 | 
			
		||||
           for( j=0; j<*k; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
			
		||||
              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     else {
 | 
			
		||||
        LDA = *n+1;
 | 
			
		||||
        LDB = *n+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*k)*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*k; i++ )
 | 
			
		||||
           for( j=0; j<*n; j++ ){
 | 
			
		||||
	      A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
			
		||||
              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     LDC = *n+1;
 | 
			
		||||
     C=(CBLAS_TEST_COMPLEX* )malloc( (*n)*LDC*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
     for( i=0; i<*n; i++ )
 | 
			
		||||
        for( j=0; j<*n; j++ ) {
 | 
			
		||||
           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
			
		||||
           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
			
		||||
        }
 | 
			
		||||
     cblas_csyr2k(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA,
 | 
			
		||||
		  B, LDB, beta, C, LDC );
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*n; i++ ) {
 | 
			
		||||
           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
			
		||||
           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
			
		||||
        }
 | 
			
		||||
     free(A);
 | 
			
		||||
     free(B);
 | 
			
		||||
     free(C);
 | 
			
		||||
  }
 | 
			
		||||
  else if (*order == TEST_COL_MJR)
 | 
			
		||||
     cblas_csyr2k(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda,
 | 
			
		||||
		   b, *ldb, beta, c, *ldc );
 | 
			
		||||
  else
 | 
			
		||||
     cblas_csyr2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda,
 | 
			
		||||
		   b, *ldb, beta, c, *ldc );
 | 
			
		||||
}
 | 
			
		||||
void F77_ctrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
 | 
			
		||||
       int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a,
 | 
			
		||||
       int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) {
 | 
			
		||||
  int i,j,LDA,LDB;
 | 
			
		||||
  CBLAS_TEST_COMPLEX *A, *B;
 | 
			
		||||
  enum CBLAS_SIDE side;
 | 
			
		||||
  enum CBLAS_DIAG diag;
 | 
			
		||||
  enum CBLAS_UPLO uplo;
 | 
			
		||||
  enum CBLAS_TRANSPOSE trans;
 | 
			
		||||
 | 
			
		||||
  get_uplo_type(uplow,&uplo);
 | 
			
		||||
  get_transpose_type(transp,&trans);
 | 
			
		||||
  get_diag_type(diagn,&diag);
 | 
			
		||||
  get_side_type(rtlf,&side);
 | 
			
		||||
 | 
			
		||||
  if (*order == TEST_ROW_MJR) {
 | 
			
		||||
     if (side == CblasLeft) {
 | 
			
		||||
        LDA = *m+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*m; i++ )
 | 
			
		||||
           for( j=0; j<*m; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     else{
 | 
			
		||||
        LDA = *n+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*n; i++ )
 | 
			
		||||
           for( j=0; j<*n; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     LDB = *n+1;
 | 
			
		||||
     B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
     for( i=0; i<*m; i++ )
 | 
			
		||||
        for( j=0; j<*n; j++ ) {
 | 
			
		||||
           B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
			
		||||
           B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
			
		||||
        }
 | 
			
		||||
     cblas_ctrmm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha,
 | 
			
		||||
		 A, LDA, B, LDB );
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*m; i++ ) {
 | 
			
		||||
           b[j*(*ldb)+i].real=B[i*LDB+j].real;
 | 
			
		||||
           b[j*(*ldb)+i].imag=B[i*LDB+j].imag;
 | 
			
		||||
        }
 | 
			
		||||
     free(A);
 | 
			
		||||
     free(B);
 | 
			
		||||
  }
 | 
			
		||||
  else if (*order == TEST_COL_MJR)
 | 
			
		||||
     cblas_ctrmm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha,
 | 
			
		||||
		   a, *lda, b, *ldb);
 | 
			
		||||
  else
 | 
			
		||||
     cblas_ctrmm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha,
 | 
			
		||||
		   a, *lda, b, *ldb);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void F77_ctrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
 | 
			
		||||
         int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a,
 | 
			
		||||
         int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) {
 | 
			
		||||
  int i,j,LDA,LDB;
 | 
			
		||||
  CBLAS_TEST_COMPLEX *A, *B;
 | 
			
		||||
  enum CBLAS_SIDE side;
 | 
			
		||||
  enum CBLAS_DIAG diag;
 | 
			
		||||
  enum CBLAS_UPLO uplo;
 | 
			
		||||
  enum CBLAS_TRANSPOSE trans;
 | 
			
		||||
 | 
			
		||||
  get_uplo_type(uplow,&uplo);
 | 
			
		||||
  get_transpose_type(transp,&trans);
 | 
			
		||||
  get_diag_type(diagn,&diag);
 | 
			
		||||
  get_side_type(rtlf,&side);
 | 
			
		||||
 | 
			
		||||
  if (*order == TEST_ROW_MJR) {
 | 
			
		||||
     if (side == CblasLeft) {
 | 
			
		||||
        LDA = *m+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc( (*m)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
			
		||||
        for( i=0; i<*m; i++ )
 | 
			
		||||
           for( j=0; j<*m; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     else{
 | 
			
		||||
        LDA = *n+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*n; i++ )
 | 
			
		||||
           for( j=0; j<*n; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     LDB = *n+1;
 | 
			
		||||
     B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
     for( i=0; i<*m; i++ )
 | 
			
		||||
        for( j=0; j<*n; j++ ) {
 | 
			
		||||
           B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
			
		||||
           B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
			
		||||
        }
 | 
			
		||||
     cblas_ctrsm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha,
 | 
			
		||||
		 A, LDA, B, LDB );
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*m; i++ ) {
 | 
			
		||||
           b[j*(*ldb)+i].real=B[i*LDB+j].real;
 | 
			
		||||
           b[j*(*ldb)+i].imag=B[i*LDB+j].imag;
 | 
			
		||||
        }
 | 
			
		||||
     free(A);
 | 
			
		||||
     free(B);
 | 
			
		||||
  }
 | 
			
		||||
  else if (*order == TEST_COL_MJR)
 | 
			
		||||
     cblas_ctrsm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha,
 | 
			
		||||
		   a, *lda, b, *ldb);
 | 
			
		||||
  else
 | 
			
		||||
     cblas_ctrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha,
 | 
			
		||||
		   a, *lda, b, *ldb);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void F77_cgemm3m(int *order, char *transpa, char *transpb, int *m, int *n,
 | 
			
		||||
     int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
			
		||||
     CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
 | 
			
		||||
     CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
			
		||||
 | 
			
		||||
  CBLAS_TEST_COMPLEX *A, *B, *C;
 | 
			
		||||
  int i,j,LDA, LDB, LDC;
 | 
			
		||||
  enum CBLAS_TRANSPOSE transa, transb;
 | 
			
		||||
 | 
			
		||||
  get_transpose_type(transpa, &transa);
 | 
			
		||||
  get_transpose_type(transpb, &transb);
 | 
			
		||||
 | 
			
		||||
  if (*order == TEST_ROW_MJR) {
 | 
			
		||||
     if (transa == CblasNoTrans) {
 | 
			
		||||
        LDA = *k+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*m; i++ )
 | 
			
		||||
           for( j=0; j<*k; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     else {
 | 
			
		||||
        LDA = *m+1;
 | 
			
		||||
        A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*k; i++ )
 | 
			
		||||
           for( j=0; j<*m; j++ ) {
 | 
			
		||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
			
		||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
 | 
			
		||||
     if (transb == CblasNoTrans) {
 | 
			
		||||
        LDB = *n+1;
 | 
			
		||||
        B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
 | 
			
		||||
        for( i=0; i<*k; i++ )
 | 
			
		||||
           for( j=0; j<*n; j++ ) {
 | 
			
		||||
              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
			
		||||
              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
     else {
 | 
			
		||||
        LDB = *k+1;
 | 
			
		||||
        B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
        for( i=0; i<*n; i++ )
 | 
			
		||||
           for( j=0; j<*k; j++ ) {
 | 
			
		||||
              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
			
		||||
              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
			
		||||
           }
 | 
			
		||||
     }
 | 
			
		||||
 | 
			
		||||
     LDC = *n+1;
 | 
			
		||||
     C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*m; i++ ) {
 | 
			
		||||
           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
			
		||||
           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
			
		||||
        }
 | 
			
		||||
     cblas_cgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
 | 
			
		||||
                  B, LDB, beta, C, LDC );
 | 
			
		||||
     for( j=0; j<*n; j++ )
 | 
			
		||||
        for( i=0; i<*m; i++ ) {
 | 
			
		||||
           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
			
		||||
           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
			
		||||
        }
 | 
			
		||||
     free(A);
 | 
			
		||||
     free(B);
 | 
			
		||||
     free(C);
 | 
			
		||||
  }
 | 
			
		||||
  else if (*order == TEST_COL_MJR)
 | 
			
		||||
     cblas_cgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
 | 
			
		||||
                  b, *ldb, beta, c, *ldc );
 | 
			
		||||
  else
 | 
			
		||||
     cblas_cgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
 | 
			
		||||
                  b, *ldb, beta, c, *ldc );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Loading…
	
		Reference in New Issue