Merge branch 'develop'
This commit is contained in:
		
						commit
						53e849f4fc
					
				| 
						 | 
					@ -15,6 +15,7 @@ lapack-netlib/make.inc
 | 
				
			||||||
lapack-netlib/lapacke/include/lapacke_mangling.h
 | 
					lapack-netlib/lapacke/include/lapacke_mangling.h
 | 
				
			||||||
lapack-netlib/TESTING/testing_results.txt
 | 
					lapack-netlib/TESTING/testing_results.txt
 | 
				
			||||||
*.so
 | 
					*.so
 | 
				
			||||||
 | 
					*.so.*
 | 
				
			||||||
*.a
 | 
					*.a
 | 
				
			||||||
.svn
 | 
					.svn
 | 
				
			||||||
*~
 | 
					*~
 | 
				
			||||||
| 
						 | 
					@ -65,3 +66,5 @@ test/sblat3
 | 
				
			||||||
test/zblat1
 | 
					test/zblat1
 | 
				
			||||||
test/zblat2
 | 
					test/zblat2
 | 
				
			||||||
test/zblat3
 | 
					test/zblat3
 | 
				
			||||||
 | 
					build
 | 
				
			||||||
 | 
					build.*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,4 +1,13 @@
 | 
				
			||||||
language: c
 | 
					language: c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					notifications:
 | 
				
			||||||
 | 
					  webhooks:
 | 
				
			||||||
 | 
					    urls:
 | 
				
			||||||
 | 
					      - https://webhooks.gitter.im/e/8a6e4470a0cebd090344
 | 
				
			||||||
 | 
					    on_success: change  # options: [always|never|change] default: always
 | 
				
			||||||
 | 
					    on_failure: always  # options: [always|never|change] default: always
 | 
				
			||||||
 | 
					    on_start: never     # options: [always|never|change] default: always
 | 
				
			||||||
 | 
					
 | 
				
			||||||
compiler:
 | 
					compiler:
 | 
				
			||||||
  - gcc
 | 
					  - gcc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,190 @@
 | 
				
			||||||
 | 
					##
 | 
				
			||||||
 | 
					## Author: Hank Anderson <hank@statease.com>
 | 
				
			||||||
 | 
					##
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cmake_minimum_required(VERSION 2.8.4)
 | 
				
			||||||
 | 
					project(OpenBLAS)
 | 
				
			||||||
 | 
					set(OpenBLAS_MAJOR_VERSION 0)
 | 
				
			||||||
 | 
					set(OpenBLAS_MINOR_VERSION 2)
 | 
				
			||||||
 | 
					set(OpenBLAS_PATCH_VERSION 14)
 | 
				
			||||||
 | 
					set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					enable_language(ASM)
 | 
				
			||||||
 | 
					enable_language(C)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if(MSVC)
 | 
				
			||||||
 | 
					set(OpenBLAS_LIBNAME libopenblas)
 | 
				
			||||||
 | 
					else()
 | 
				
			||||||
 | 
					set(OpenBLAS_LIBNAME openblas)
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#######
 | 
				
			||||||
 | 
					if(MSVC)
 | 
				
			||||||
 | 
					option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF)
 | 
				
			||||||
 | 
					option(BUILD_DEBUG "Build Debug Version" OFF)
 | 
				
			||||||
 | 
					#######
 | 
				
			||||||
 | 
					if(BUILD_WITHOUT_LAPACK)
 | 
				
			||||||
 | 
					set(NO_LAPACK 1)
 | 
				
			||||||
 | 
					set(NO_LAPACKE 1)
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if(BUILD_DEBUG)
 | 
				
			||||||
 | 
					set(CMAKE_BUILD_TYPE Debug)
 | 
				
			||||||
 | 
					else()
 | 
				
			||||||
 | 
					set(CMAKE_BUILD_TYPE Release)
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if(BUILD_WITHOUT_CBLAS)
 | 
				
			||||||
 | 
					set(NO_CBLAS 1)
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#######
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake")
 | 
				
			||||||
 | 
					include("${CMAKE_SOURCE_DIR}/cmake/system.cmake")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(BLASDIRS interface driver/level2 driver/level3 driver/others)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DYNAMIC_ARCH)
 | 
				
			||||||
 | 
					  list(APPEND BLASDIRS kernel)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DEFINED UTEST_CHECK)
 | 
				
			||||||
 | 
					  set(SANITY_CHECK 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DEFINED SANITY_CHECK)
 | 
				
			||||||
 | 
					  list(APPEND BLASDIRS reference)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(SUBDIRS	${BLASDIRS})
 | 
				
			||||||
 | 
					if (NOT NO_LAPACK)
 | 
				
			||||||
 | 
					  list(APPEND SUBDIRS lapack)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# set which float types we want to build for
 | 
				
			||||||
 | 
					if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
 | 
				
			||||||
 | 
					  # if none are defined, build for all
 | 
				
			||||||
 | 
					  set(BUILD_SINGLE true)
 | 
				
			||||||
 | 
					  set(BUILD_DOUBLE true)
 | 
				
			||||||
 | 
					  set(BUILD_COMPLEX true)
 | 
				
			||||||
 | 
					  set(BUILD_COMPLEX16 true)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(FLOAT_TYPES "")
 | 
				
			||||||
 | 
					if (BUILD_SINGLE)
 | 
				
			||||||
 | 
					  message(STATUS "Building Single Precision")
 | 
				
			||||||
 | 
					  list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (BUILD_DOUBLE)
 | 
				
			||||||
 | 
					  message(STATUS "Building Double Precision")
 | 
				
			||||||
 | 
					  list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (BUILD_COMPLEX)
 | 
				
			||||||
 | 
					  message(STATUS "Building Complex Precision")
 | 
				
			||||||
 | 
					  list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (BUILD_COMPLEX16)
 | 
				
			||||||
 | 
					  message(STATUS "Building Double Complex Precision")
 | 
				
			||||||
 | 
					  list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# all :: libs netlib tests shared
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# libs :
 | 
				
			||||||
 | 
					if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
 | 
				
			||||||
 | 
					  message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${NO_STATIC} AND ${NO_SHARED})
 | 
				
			||||||
 | 
					  message(FATAL_ERROR "Neither static nor shared are enabled.")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
 | 
				
			||||||
 | 
					set(TARGET_OBJS "")
 | 
				
			||||||
 | 
					foreach (SUBDIR ${SUBDIRS})
 | 
				
			||||||
 | 
					  add_subdirectory(${SUBDIR})
 | 
				
			||||||
 | 
					  string(REPLACE "/" "_" subdir_obj ${SUBDIR})
 | 
				
			||||||
 | 
					  list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:${subdir_obj}>")
 | 
				
			||||||
 | 
					endforeach ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# netlib:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
 | 
				
			||||||
 | 
					# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
 | 
				
			||||||
 | 
					if (NOT NOFORTRAN AND NOT NO_LAPACK)
 | 
				
			||||||
 | 
					  include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
 | 
				
			||||||
 | 
					if (NOT NO_LAPACKE)
 | 
				
			||||||
 | 
					  include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#Only generate .def for dll on MSVC
 | 
				
			||||||
 | 
					if(MSVC)
 | 
				
			||||||
 | 
					set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# add objects to the openblas lib
 | 
				
			||||||
 | 
					add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if(NOT MSVC)
 | 
				
			||||||
 | 
					#only build shared library for MSVC
 | 
				
			||||||
 | 
					add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
 | 
				
			||||||
 | 
					set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
 | 
				
			||||||
 | 
					set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if(SMP)
 | 
				
			||||||
 | 
					target_link_libraries(${OpenBLAS_LIBNAME} pthread)
 | 
				
			||||||
 | 
					target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#build test and ctest
 | 
				
			||||||
 | 
					enable_testing()
 | 
				
			||||||
 | 
					add_subdirectory(test)
 | 
				
			||||||
 | 
					if(NOT NO_CBLAS)
 | 
				
			||||||
 | 
					add_subdirectory(ctest)
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES 
 | 
				
			||||||
 | 
					  VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}
 | 
				
			||||||
 | 
					  SOVERSION ${OpenBLAS_MAJOR_VERSION}
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# TODO: Why is the config saved here? Is this necessary with CMake?
 | 
				
			||||||
 | 
					#Save the config files for installation
 | 
				
			||||||
 | 
					#	@cp Makefile.conf Makefile.conf_last
 | 
				
			||||||
 | 
					#	@cp config.h config_last.h
 | 
				
			||||||
 | 
					#ifdef QUAD_PRECISION
 | 
				
			||||||
 | 
					#	@echo "#define QUAD_PRECISION">> config_last.h
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifeq ($(EXPRECISION), 1)
 | 
				
			||||||
 | 
					#	@echo "#define EXPRECISION">> config_last.h
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					###
 | 
				
			||||||
 | 
					#ifeq ($(DYNAMIC_ARCH), 1)
 | 
				
			||||||
 | 
					#	@$(MAKE) -C kernel commonlibs || exit 1
 | 
				
			||||||
 | 
					#	@for d in $(DYNAMIC_CORE) ; \
 | 
				
			||||||
 | 
					#	do  $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
 | 
				
			||||||
 | 
					#	done
 | 
				
			||||||
 | 
					#	@echo DYNAMIC_ARCH=1 >> Makefile.conf_last
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef USE_THREAD
 | 
				
			||||||
 | 
					#	@echo USE_THREAD=$(USE_THREAD) >>  Makefile.conf_last
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#	@touch lib.grd
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -127,5 +127,8 @@ In chronological order:
 | 
				
			||||||
* Ton van den Heuvel <https://github.com/ton>
 | 
					* Ton van den Heuvel <https://github.com/ton>
 | 
				
			||||||
  * [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().
 | 
					  * [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					* Martin Koehler <https://github.com/grisuthedragon/>
 | 
				
			||||||
 | 
					  * [2015-09-07] Improved imatcopy
 | 
				
			||||||
 | 
					
 | 
				
			||||||
* [Your name or handle] <[email or website]>
 | 
					* [Your name or handle] <[email or website]>
 | 
				
			||||||
  * [Date] [Brief summary of your changes]
 | 
					  * [Date] [Brief summary of your changes]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,4 +1,57 @@
 | 
				
			||||||
OpenBLAS ChangeLog
 | 
					OpenBLAS ChangeLog
 | 
				
			||||||
 | 
					====================================================================
 | 
				
			||||||
 | 
					Version 0.2.15
 | 
				
			||||||
 | 
					27-Oct-2015
 | 
				
			||||||
 | 
					common:
 | 
				
			||||||
 | 
						* Support cmake on x86/x86-64. Natively compiling on MS Visual Studio.
 | 
				
			||||||
 | 
						  (experimental. Thank Hank Anderson for the initial cmake porting work.)
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
						  On Linux and Mac OSX, OpenBLAS cmake supports assembly kernels.
 | 
				
			||||||
 | 
						  e.g. cmake .
 | 
				
			||||||
 | 
						       make
 | 
				
			||||||
 | 
						       make test (Optional)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						  On Windows MS Visual Studio, OpenBLAS cmake only support C kernels.
 | 
				
			||||||
 | 
						  (OpenBLAS uses AT&T style assembly, which is not supported by MSVC.)
 | 
				
			||||||
 | 
						  e.g. cmake -G "Visual Studio 12 Win64" .
 | 
				
			||||||
 | 
						       Open OpenBLAS.sln and build.
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
						* Enable MAX_STACK_ALLOC flags by default.
 | 
				
			||||||
 | 
						  Improve ger and gemv for small matrices.
 | 
				
			||||||
 | 
						* Improve gemv parallel with small m and large n case.
 | 
				
			||||||
 | 
						* Improve ?imatcopy when lda==ldb (#633. Thanks, Martin Koehler)
 | 
				
			||||||
 | 
						* Add vecLib benchmarks (#565. Thanks, Andreas Noack.)
 | 
				
			||||||
 | 
						* Fix LAPACK lantr for row major matrices (#634. Thanks, Dan Kortschak)
 | 
				
			||||||
 | 
						* Fix LAPACKE lansy (#640. Thanks, Dan Kortschak)
 | 
				
			||||||
 | 
						* Import bug fixes for LAPACKE s/dormlq, c/zunmlq 
 | 
				
			||||||
 | 
						* Raise the signal when pthread_create fails (#668. Thanks, James K. Lowden)
 | 
				
			||||||
 | 
						* Remove g77 from compiler list.
 | 
				
			||||||
 | 
						* Enable AppVeyor Windows CI.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					x86/x86-64:
 | 
				
			||||||
 | 
						* Support pure C generic kernels for x86/x86-64.
 | 
				
			||||||
 | 
						* Support Intel Boardwell and Skylake by Haswell kernels.
 | 
				
			||||||
 | 
						* Support AMD Excavator by Steamroller kernels.
 | 
				
			||||||
 | 
						* Optimize s/d/c/zdot for Intel SandyBridge and Haswell.
 | 
				
			||||||
 | 
						* Optimize s/d/c/zdot for AMD Piledriver and Steamroller.
 | 
				
			||||||
 | 
						* Optimize s/d/c/zapxy for Intel SandyBridge and Haswell.
 | 
				
			||||||
 | 
						* Optimize s/d/c/zapxy for AMD Piledriver and Steamroller.
 | 
				
			||||||
 | 
						* Optimize d/c/zscal for Intel Haswell, dscal for Intel SandyBridge.
 | 
				
			||||||
 | 
						* Optimize d/c/zscal for AMD Bulldozer, Piledriver and Steamroller.
 | 
				
			||||||
 | 
						* Optimize s/dger for Intel SandyBridge.
 | 
				
			||||||
 | 
						* Optimize s/dsymv for Intel SandyBridge.
 | 
				
			||||||
 | 
						* Optimize ssymv for Intel Haswell.
 | 
				
			||||||
 | 
						* Optimize dgemv for Intel Nehalem and Haswell.
 | 
				
			||||||
 | 
						* Optimize dtrmm for Intel Haswell.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ARM:
 | 
				
			||||||
 | 
						* Support Android NDK armeabi-v7a-hard ABI (-mfloat-abi=hard)
 | 
				
			||||||
 | 
						  e.g. make HOSTCC=gcc CC=arm-linux-androideabi-gcc NO_LAPACK=1 TARGET=ARMV7
 | 
				
			||||||
 | 
						* Fix lock, rpcc bugs (#616, #617. Thanks, Grazvydas Ignotas)
 | 
				
			||||||
 | 
					POWER:
 | 
				
			||||||
 | 
						* Support ppc64le platform (ELF ABI v2. #612. Thanks, Matthew Brandyberry.)
 | 
				
			||||||
 | 
						* Support POWER7/8 by POWER6 kernels. (#612. Thanks, Fábio Perez.)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
====================================================================
 | 
					====================================================================
 | 
				
			||||||
Version 0.2.14
 | 
					Version 0.2.14
 | 
				
			||||||
24-Mar-2015
 | 
					24-Mar-2015
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										6
									
								
								Makefile
								
								
								
								
							
							
						
						
									
										6
									
								
								Makefile
								
								
								
								
							| 
						 | 
					@ -20,6 +20,8 @@ ifneq ($(NO_LAPACK), 1)
 | 
				
			||||||
SUBDIRS	+= lapack
 | 
					SUBDIRS	+= lapack
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
 | 
					SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.PHONY : all libs netlib test ctest shared install
 | 
					.PHONY : all libs netlib test ctest shared install
 | 
				
			||||||
| 
						 | 
					@ -131,7 +133,7 @@ ifeq ($(CORE), UNKOWN)
 | 
				
			||||||
	$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
 | 
						$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
ifeq ($(NOFORTRAN), 1)
 | 
					ifeq ($(NOFORTRAN), 1)
 | 
				
			||||||
	$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.)
 | 
						$(info OpenBLAS: Detecting fortran compiler failed. Cannot compile LAPACK. Only compile BLAS.)
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
ifeq ($(NO_STATIC), 1)
 | 
					ifeq ($(NO_STATIC), 1)
 | 
				
			||||||
ifeq ($(NO_SHARED), 1)
 | 
					ifeq ($(NO_SHARED), 1)
 | 
				
			||||||
| 
						 | 
					@ -231,7 +233,7 @@ ifndef NOFORTRAN
 | 
				
			||||||
	-@echo "FORTRAN     = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
 | 
						-@echo "FORTRAN     = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
 | 
				
			||||||
	-@echo "OPTS        = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
						-@echo "OPTS        = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
				
			||||||
	-@echo "POPTS       = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
						-@echo "POPTS       = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
				
			||||||
	-@echo "NOOPT       = $(LAPACK_FFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
						-@echo "NOOPT       = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
				
			||||||
	-@echo "PNOOPT      = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
						-@echo "PNOOPT      = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
				
			||||||
	-@echo "LOADOPTS    = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
						-@echo "LOADOPTS    = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
				
			||||||
	-@echo "CC          = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
						-@echo "CC          = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										14
									
								
								Makefile.arm
								
								
								
								
							
							
						
						
									
										14
									
								
								Makefile.arm
								
								
								
								
							| 
						 | 
					@ -1,13 +1,23 @@
 | 
				
			||||||
# ifeq logical or
 | 
					# ifeq logical or
 | 
				
			||||||
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
 | 
					ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
 | 
				
			||||||
 | 
					ifeq ($(OSNAME), Android)
 | 
				
			||||||
 | 
					CCOMMON_OPT += -marm -mfpu=neon  -mfloat-abi=hard -march=armv7-a
 | 
				
			||||||
 | 
					FCOMMON_OPT += -marm -mfpu=neon  -mfloat-abi=hard -march=armv7-a
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
CCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard -march=armv7-a
 | 
					CCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard -march=armv7-a
 | 
				
			||||||
FCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard -march=armv7-a
 | 
					FCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard -march=armv7-a
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifeq ($(CORE), ARMV7)
 | 
					ifeq ($(CORE), ARMV7)
 | 
				
			||||||
 | 
					ifeq ($(OSNAME), Android)
 | 
				
			||||||
 | 
					CCOMMON_OPT += -marm -mfpu=neon  -mfloat-abi=hard -march=armv7-a
 | 
				
			||||||
 | 
					FCOMMON_OPT += -marm -mfpu=neon  -mfloat-abi=hard -march=armv7-a
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
CCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard -march=armv7-a
 | 
					CCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard -march=armv7-a
 | 
				
			||||||
FCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard -march=armv7-a
 | 
					FCOMMON_OPT += -marm -mfpu=vfpv3  -mfloat-abi=hard -march=armv7-a
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifeq ($(CORE), ARMV6)
 | 
					ifeq ($(CORE), ARMV6)
 | 
				
			||||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard  -march=armv6
 | 
					CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard  -march=armv6
 | 
				
			||||||
| 
						 | 
					@ -16,8 +26,8 @@ endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifeq ($(CORE), ARMV5)
 | 
					ifeq ($(CORE), ARMV5)
 | 
				
			||||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard  -march=armv6
 | 
					CCOMMON_OPT += -marm -march=armv5
 | 
				
			||||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard  -march=armv6
 | 
					FCOMMON_OPT += -marm -march=armv5
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -11,6 +11,7 @@ OPENBLAS_BINARY_DIR := $(PREFIX)/bin
 | 
				
			||||||
OPENBLAS_BUILD_DIR := $(CURDIR)
 | 
					OPENBLAS_BUILD_DIR := $(CURDIR)
 | 
				
			||||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
 | 
					OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
 | 
				
			||||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
 | 
					OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
 | 
				
			||||||
 | 
					OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.PHONY : install
 | 
					.PHONY : install
 | 
				
			||||||
.NOTPARALLEL : install
 | 
					.NOTPARALLEL : install
 | 
				
			||||||
| 
						 | 
					@ -86,8 +87,8 @@ ifeq ($(OSNAME), Darwin)
 | 
				
			||||||
	ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
 | 
						ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
ifeq ($(OSNAME), WINNT)
 | 
					ifeq ($(OSNAME), WINNT)
 | 
				
			||||||
	@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
 | 
						@-cp $(LIBDLLNAME) $(DESTDIR)$(OPENBLAS_BINARY_DIR)
 | 
				
			||||||
	@-cp $(LIBDLLNAME).a $(OPENBLAS_LIBRARY_DIR)
 | 
						@-cp $(LIBDLLNAME).a $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
ifeq ($(OSNAME), CYGWIN_NT)
 | 
					ifeq ($(OSNAME), CYGWIN_NT)
 | 
				
			||||||
	@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
 | 
						@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
 | 
				
			||||||
| 
						 | 
					@ -97,6 +98,7 @@ endif
 | 
				
			||||||
	@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
 | 
						@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
 | 
				
			||||||
	@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)	
 | 
						@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)	
 | 
				
			||||||
	@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
 | 
						@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifndef NO_SHARED
 | 
					ifndef NO_SHARED
 | 
				
			||||||
#ifeq logical or
 | 
					#ifeq logical or
 | 
				
			||||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
 | 
					ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
 | 
				
			||||||
| 
						 | 
					@ -112,5 +114,16 @@ else
 | 
				
			||||||
#only static
 | 
					#only static
 | 
				
			||||||
	@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
 | 
						@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					#Generating OpenBLASConfigVersion.cmake
 | 
				
			||||||
 | 
						@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
 | 
				
			||||||
 | 
						@echo "set (PACKAGE_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
				
			||||||
 | 
						@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
				
			||||||
 | 
						@echo "  set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
				
			||||||
 | 
						@echo "else ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
				
			||||||
 | 
						@echo "  set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
				
			||||||
 | 
						@echo "  if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
				
			||||||
 | 
						@echo "    set (PACKAGE_VERSION_EXACT TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
				
			||||||
 | 
						@echo "  endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
				
			||||||
 | 
						@echo "endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
 | 
				
			||||||
	@echo Install OK!
 | 
						@echo Install OK!
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,7 +3,7 @@
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# This library's version
 | 
					# This library's version
 | 
				
			||||||
VERSION = 0.2.14
 | 
					VERSION = 0.2.15
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
 | 
					# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
 | 
				
			||||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
 | 
					# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
 | 
				
			||||||
| 
						 | 
					@ -162,13 +162,16 @@ COMMON_PROF = -pg
 | 
				
			||||||
# Improve GEMV and GER for small matrices by stack allocation.
 | 
					# Improve GEMV and GER for small matrices by stack allocation.
 | 
				
			||||||
# For details, https://github.com/xianyi/OpenBLAS/pull/482
 | 
					# For details, https://github.com/xianyi/OpenBLAS/pull/482
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
# MAX_STACK_ALLOC=2048
 | 
					 MAX_STACK_ALLOC=2048
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Add a prefix or suffix to all exported symbol names in the shared library.
 | 
					# Add a prefix or suffix to all exported symbol names in the shared library.
 | 
				
			||||||
# Avoid conflicts with other BLAS libraries, especially when using
 | 
					# Avoid conflicts with other BLAS libraries, especially when using
 | 
				
			||||||
# 64 bit integer interfaces in OpenBLAS.
 | 
					# 64 bit integer interfaces in OpenBLAS.
 | 
				
			||||||
# For details, https://github.com/xianyi/OpenBLAS/pull/459
 | 
					# For details, https://github.com/xianyi/OpenBLAS/pull/459
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
 | 
					# The same prefix and suffix are also added to the library name,
 | 
				
			||||||
 | 
					# i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
# SYMBOLPREFIX=
 | 
					# SYMBOLPREFIX=
 | 
				
			||||||
# SYMBOLSUFFIX=
 | 
					# SYMBOLSUFFIX=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -23,6 +23,7 @@ CC = gcc
 | 
				
			||||||
UNAME_S := $(shell uname -s)
 | 
					UNAME_S := $(shell uname -s)
 | 
				
			||||||
ifeq ($(UNAME_S),Darwin)
 | 
					ifeq ($(UNAME_S),Darwin)
 | 
				
			||||||
     CC = clang
 | 
					     CC = clang
 | 
				
			||||||
 | 
					#     EXTRALIB += -Wl,-no_compact_unwind
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -64,6 +65,9 @@ endif
 | 
				
			||||||
ifeq ($(TARGET), STEAMROLLER)
 | 
					ifeq ($(TARGET), STEAMROLLER)
 | 
				
			||||||
GETARCH_FLAGS := -DFORCE_BARCELONA
 | 
					GETARCH_FLAGS := -DFORCE_BARCELONA
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					ifeq ($(TARGET), EXCAVATOR)
 | 
				
			||||||
 | 
					GETARCH_FLAGS := -DFORCE_BARCELONA
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -91,6 +95,9 @@ endif
 | 
				
			||||||
ifeq ($(TARGET_CORE), STEAMROLLER)
 | 
					ifeq ($(TARGET_CORE), STEAMROLLER)
 | 
				
			||||||
GETARCH_FLAGS := -DFORCE_BARCELONA
 | 
					GETARCH_FLAGS := -DFORCE_BARCELONA
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					ifeq ($(TARGET_CORE), EXCAVATOR)
 | 
				
			||||||
 | 
					GETARCH_FLAGS := -DFORCE_BARCELONA
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -195,12 +202,18 @@ DLLWRAP = $(CROSS_SUFFIX)dllwrap
 | 
				
			||||||
OBJCOPY = $(CROSS_SUFFIX)objcopy
 | 
					OBJCOPY = $(CROSS_SUFFIX)objcopy
 | 
				
			||||||
OBJCONV = $(CROSS_SUFFIX)objconv
 | 
					OBJCONV = $(CROSS_SUFFIX)objconv
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# For detect fortran failed, only build BLAS.
 | 
				
			||||||
 | 
					ifeq ($(NOFORTRAN), 1)
 | 
				
			||||||
 | 
					NO_LAPACK = 1
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
#  OS dependent settings
 | 
					#  OS dependent settings
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifeq ($(OSNAME), Darwin)
 | 
					ifeq ($(OSNAME), Darwin)
 | 
				
			||||||
export MACOSX_DEPLOYMENT_TARGET=10.2
 | 
					export MACOSX_DEPLOYMENT_TARGET=10.6
 | 
				
			||||||
MD5SUM = md5 -r
 | 
					MD5SUM = md5 -r
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -323,6 +336,11 @@ ifeq ($(ARCH), x86)
 | 
				
			||||||
ifndef BINARY
 | 
					ifndef BINARY
 | 
				
			||||||
NO_BINARY_MODE	= 1
 | 
					NO_BINARY_MODE	= 1
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ifeq ($(CORE), generic)
 | 
				
			||||||
 | 
					NO_EXPRECISION = 1
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifndef NO_EXPRECISION
 | 
					ifndef NO_EXPRECISION
 | 
				
			||||||
ifeq ($(F_COMPILER), GFORTRAN)
 | 
					ifeq ($(F_COMPILER), GFORTRAN)
 | 
				
			||||||
# ifeq logical or. GCC or LSB
 | 
					# ifeq logical or. GCC or LSB
 | 
				
			||||||
| 
						 | 
					@ -341,6 +359,11 @@ endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifeq ($(ARCH), x86_64)
 | 
					ifeq ($(ARCH), x86_64)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ifeq ($(CORE), generic)
 | 
				
			||||||
 | 
					NO_EXPRECISION = 1
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifndef NO_EXPRECISION
 | 
					ifndef NO_EXPRECISION
 | 
				
			||||||
ifeq ($(F_COMPILER), GFORTRAN)
 | 
					ifeq ($(F_COMPILER), GFORTRAN)
 | 
				
			||||||
# ifeq logical or. GCC or LSB
 | 
					# ifeq logical or. GCC or LSB
 | 
				
			||||||
| 
						 | 
					@ -408,7 +431,7 @@ endif
 | 
				
			||||||
ifeq ($(ARCH), x86_64)
 | 
					ifeq ($(ARCH), x86_64)
 | 
				
			||||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
 | 
					DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
 | 
				
			||||||
ifneq ($(NO_AVX), 1)
 | 
					ifneq ($(NO_AVX), 1)
 | 
				
			||||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER
 | 
					DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
ifneq ($(NO_AVX2), 1)
 | 
					ifneq ($(NO_AVX2), 1)
 | 
				
			||||||
DYNAMIC_CORE += HASWELL
 | 
					DYNAMIC_CORE += HASWELL
 | 
				
			||||||
| 
						 | 
					@ -578,7 +601,7 @@ else
 | 
				
			||||||
FCOMMON_OPT += -m32
 | 
					FCOMMON_OPT += -m32
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
ifdef USE_OPENMP
 | 
					ifeq ($(USE_OPENMP), 1)
 | 
				
			||||||
FCOMMON_OPT += -fopenmp
 | 
					FCOMMON_OPT += -fopenmp
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
| 
						 | 
					@ -590,14 +613,14 @@ ifneq ($(INTERFACE64), 0)
 | 
				
			||||||
FCOMMON_OPT += -i8
 | 
					FCOMMON_OPT += -i8
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
ifdef USE_OPENMP
 | 
					ifeq ($(USE_OPENMP), 1)
 | 
				
			||||||
FCOMMON_OPT += -openmp
 | 
					FCOMMON_OPT += -openmp
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifeq ($(F_COMPILER), FUJITSU)
 | 
					ifeq ($(F_COMPILER), FUJITSU)
 | 
				
			||||||
CCOMMON_OPT += -DF_INTERFACE_FUJITSU
 | 
					CCOMMON_OPT += -DF_INTERFACE_FUJITSU
 | 
				
			||||||
ifdef USE_OPENMP
 | 
					ifeq ($(USE_OPENMP), 1)
 | 
				
			||||||
FCOMMON_OPT += -openmp
 | 
					FCOMMON_OPT += -openmp
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
| 
						 | 
					@ -615,7 +638,7 @@ endif
 | 
				
			||||||
else
 | 
					else
 | 
				
			||||||
FCOMMON_OPT += -q32
 | 
					FCOMMON_OPT += -q32
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
ifdef USE_OPENMP
 | 
					ifeq ($(USE_OPENMP), 1)
 | 
				
			||||||
FCOMMON_OPT += -openmp
 | 
					FCOMMON_OPT += -openmp
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
| 
						 | 
					@ -633,7 +656,7 @@ FCOMMON_OPT += -tp p7-64
 | 
				
			||||||
else
 | 
					else
 | 
				
			||||||
FCOMMON_OPT += -tp p7
 | 
					FCOMMON_OPT += -tp p7
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
ifdef USE_OPENMP
 | 
					ifeq ($(USE_OPENMP), 1)
 | 
				
			||||||
FCOMMON_OPT += -mp
 | 
					FCOMMON_OPT += -mp
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
| 
						 | 
					@ -662,7 +685,7 @@ FCOMMON_OPT += -mabi=n32
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifdef USE_OPENMP
 | 
					ifeq ($(USE_OPENMP), 1)
 | 
				
			||||||
FCOMMON_OPT += -mp
 | 
					FCOMMON_OPT += -mp
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
| 
						 | 
					@ -699,7 +722,7 @@ FCOMMON_OPT += -m64
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifdef USE_OPENMP
 | 
					ifeq ($(USE_OPENMP), 1)
 | 
				
			||||||
FEXTRALIB   += -lstdc++
 | 
					FEXTRALIB   += -lstdc++
 | 
				
			||||||
FCOMMON_OPT += -mp
 | 
					FCOMMON_OPT += -mp
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
| 
						 | 
					@ -747,14 +770,14 @@ FCOMMON_OPT  += -m32
 | 
				
			||||||
else
 | 
					else
 | 
				
			||||||
FCOMMON_OPT  += -m64
 | 
					FCOMMON_OPT  += -m64
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
ifdef USE_OPENMP
 | 
					ifeq ($(USE_OPENMP), 1)
 | 
				
			||||||
FCOMMON_OPT += -xopenmp=parallel
 | 
					FCOMMON_OPT += -xopenmp=parallel
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifeq ($(F_COMPILER), COMPAQ)
 | 
					ifeq ($(F_COMPILER), COMPAQ)
 | 
				
			||||||
CCOMMON_OPT  += -DF_INTERFACE_COMPAQ
 | 
					CCOMMON_OPT  += -DF_INTERFACE_COMPAQ
 | 
				
			||||||
ifdef USE_OPENMP
 | 
					ifeq ($(USE_OPENMP), 1)
 | 
				
			||||||
FCOMMON_OPT += -openmp
 | 
					FCOMMON_OPT += -openmp
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
| 
						 | 
					@ -857,12 +880,6 @@ ifdef USE_SIMPLE_THREADED_LEVEL3
 | 
				
			||||||
CCOMMON_OPT	+= -DUSE_SIMPLE_THREADED_LEVEL3
 | 
					CCOMMON_OPT	+= -DUSE_SIMPLE_THREADED_LEVEL3
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifndef LIBNAMESUFFIX
 | 
					 | 
				
			||||||
LIBPREFIX = libopenblas
 | 
					 | 
				
			||||||
else
 | 
					 | 
				
			||||||
LIBPREFIX = libopenblas_$(LIBNAMESUFFIX)
 | 
					 | 
				
			||||||
endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ifndef SYMBOLPREFIX
 | 
					ifndef SYMBOLPREFIX
 | 
				
			||||||
SYMBOLPREFIX =
 | 
					SYMBOLPREFIX =
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
| 
						 | 
					@ -871,6 +888,12 @@ ifndef SYMBOLSUFFIX
 | 
				
			||||||
SYMBOLSUFFIX =
 | 
					SYMBOLSUFFIX =
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ifndef LIBNAMESUFFIX
 | 
				
			||||||
 | 
					LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
 | 
					LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
KERNELDIR	= $(TOPDIR)/kernel/$(ARCH)
 | 
					KERNELDIR	= $(TOPDIR)/kernel/$(ARCH)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
include $(TOPDIR)/Makefile.$(ARCH)
 | 
					include $(TOPDIR)/Makefile.$(ARCH)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,7 +1,10 @@
 | 
				
			||||||
# OpenBLAS
 | 
					# OpenBLAS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[](https://travis-ci.org/xianyi/OpenBLAS)
 | 
					[](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Travis CI: [](https://travis-ci.org/xianyi/OpenBLAS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					AppVeyor: [](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
 | 
				
			||||||
## Introduction
 | 
					## Introduction
 | 
				
			||||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
 | 
					OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -33,6 +33,7 @@ BOBCAT
 | 
				
			||||||
BULLDOZER
 | 
					BULLDOZER
 | 
				
			||||||
PILEDRIVER
 | 
					PILEDRIVER
 | 
				
			||||||
STEAMROLLER
 | 
					STEAMROLLER
 | 
				
			||||||
 | 
					EXCAVATOR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
c)VIA CPU:
 | 
					c)VIA CPU:
 | 
				
			||||||
SSE_GENERIC
 | 
					SSE_GENERIC
 | 
				
			||||||
| 
						 | 
					@ -43,6 +44,8 @@ NANO
 | 
				
			||||||
POWER4
 | 
					POWER4
 | 
				
			||||||
POWER5
 | 
					POWER5
 | 
				
			||||||
POWER6
 | 
					POWER6
 | 
				
			||||||
 | 
					POWER7
 | 
				
			||||||
 | 
					POWER8
 | 
				
			||||||
PPCG4
 | 
					PPCG4
 | 
				
			||||||
PPC970
 | 
					PPC970
 | 
				
			||||||
PPC970MP
 | 
					PPC970MP
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,42 @@
 | 
				
			||||||
 | 
					version: 0.2.15.{build}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#environment:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					platform: 
 | 
				
			||||||
 | 
					  - x64
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					configuration: Release
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					clone_folder: c:\projects\OpenBLAS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					init:
 | 
				
			||||||
 | 
					  - git config --global core.autocrlf input
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					build:
 | 
				
			||||||
 | 
					  project: OpenBLAS.sln
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					clone_depth: 5
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#branches to build
 | 
				
			||||||
 | 
					branches:
 | 
				
			||||||
 | 
					  only:
 | 
				
			||||||
 | 
					    - master
 | 
				
			||||||
 | 
					    - develop
 | 
				
			||||||
 | 
					    - cmake
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					skip_tags: true
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					matrix:
 | 
				
			||||||
 | 
					  fast_finish: true
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					skip_commits:
 | 
				
			||||||
 | 
					# Add [av skip] to commit messages
 | 
				
			||||||
 | 
					  message: /\[av skip\]/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					before_build:
 | 
				
			||||||
 | 
					  - echo Running cmake...
 | 
				
			||||||
 | 
					  - cd c:\projects\OpenBLAS
 | 
				
			||||||
 | 
					  - cmake -G "Visual Studio 12 Win64" .
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					test_script:
 | 
				
			||||||
 | 
					  - echo Build OK!
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,9 @@
 | 
				
			||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for f in *.goto *.acml *.mkl *.atlas
 | 
				
			||||||
 | 
					do
 | 
				
			||||||
 | 
						if [ -f "$f" ]; then
 | 
				
			||||||
 | 
							mv $f `echo $f|tr '.' '_'`.exe
 | 
				
			||||||
 | 
						fi
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										1014
									
								
								benchmark/Makefile
								
								
								
								
							
							
						
						
									
										1014
									
								
								benchmark/Makefile
								
								
								
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| 
						 | 
					@ -0,0 +1,196 @@
 | 
				
			||||||
 | 
					/***************************************************************************
 | 
				
			||||||
 | 
					Copyright (c) 2014, The OpenBLAS Project
 | 
				
			||||||
 | 
					All rights reserved.
 | 
				
			||||||
 | 
					Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					modification, are permitted provided that the following conditions are
 | 
				
			||||||
 | 
					met:
 | 
				
			||||||
 | 
					1. Redistributions of source code must retain the above copyright
 | 
				
			||||||
 | 
					notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					2. Redistributions in binary form must reproduce the above copyright
 | 
				
			||||||
 | 
					notice, this list of conditions and the following disclaimer in
 | 
				
			||||||
 | 
					the documentation and/or other materials provided with the
 | 
				
			||||||
 | 
					distribution.
 | 
				
			||||||
 | 
					3. Neither the name of the OpenBLAS project nor the names of
 | 
				
			||||||
 | 
					its contributors may be used to endorse or promote products
 | 
				
			||||||
 | 
					derived from this software without specific prior written permission.
 | 
				
			||||||
 | 
					THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
				
			||||||
 | 
					AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
				
			||||||
 | 
					IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
				
			||||||
 | 
					ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | 
				
			||||||
 | 
					LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
				
			||||||
 | 
					DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
				
			||||||
 | 
					SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | 
				
			||||||
 | 
					CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | 
				
			||||||
 | 
					OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
				
			||||||
 | 
					USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					*****************************************************************************/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#ifdef __CYGWIN32__
 | 
				
			||||||
 | 
					#include <sys/time.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#include "common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#undef ASUM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef COMPLEX
 | 
				
			||||||
 | 
					#ifdef DOUBLE
 | 
				
			||||||
 | 
					#define ASUM   BLASFUNC(dzasum)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define ASUM   BLASFUNC(scasum)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#ifdef DOUBLE
 | 
				
			||||||
 | 
					#define ASUM   BLASFUNC(dasum)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define ASUM   BLASFUNC(sasum)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(__WIN32__) || defined(__WIN64__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
				
			||||||
 | 
					#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int gettimeofday(struct timeval *tv, void *tz){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FILETIME ft;
 | 
				
			||||||
 | 
					  unsigned __int64 tmpres = 0;
 | 
				
			||||||
 | 
					  static int tzflag;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (NULL != tv)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      GetSystemTimeAsFileTime(&ft);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tmpres |= ft.dwHighDateTime;
 | 
				
			||||||
 | 
					      tmpres <<= 32;
 | 
				
			||||||
 | 
					      tmpres |= ft.dwLowDateTime;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      /*converting file time to unix epoch*/
 | 
				
			||||||
 | 
					      tmpres /= 10;  /*convert into microseconds*/
 | 
				
			||||||
 | 
					      tmpres -= DELTA_EPOCH_IN_MICROSECS;
 | 
				
			||||||
 | 
					      tv->tv_sec = (long)(tmpres / 1000000UL);
 | 
				
			||||||
 | 
					      tv->tv_usec = (long)(tmpres % 1000000UL);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void *huge_malloc(BLASLONG size){
 | 
				
			||||||
 | 
					  int shmid;
 | 
				
			||||||
 | 
					  void *address;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef SHM_HUGETLB
 | 
				
			||||||
 | 
					#define SHM_HUGETLB 04000
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((shmid =shmget(IPC_PRIVATE,
 | 
				
			||||||
 | 
							     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
 | 
				
			||||||
 | 
							     SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
 | 
				
			||||||
 | 
					    printf( "Memory allocation failed(shmget).\n");
 | 
				
			||||||
 | 
					    exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  address = shmat(shmid, NULL, SHM_RND);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((BLASLONG)address == -1){
 | 
				
			||||||
 | 
					    printf( "Memory allocation failed(shmat).\n");
 | 
				
			||||||
 | 
					    exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  shmctl(shmid, IPC_RMID, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return address;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define malloc huge_malloc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(int argc, char *argv[]){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FLOAT *x;
 | 
				
			||||||
 | 
					  FLOAT result;
 | 
				
			||||||
 | 
					  blasint m, i;
 | 
				
			||||||
 | 
					  blasint inc_x=1;
 | 
				
			||||||
 | 
					  int loops = 1;
 | 
				
			||||||
 | 
					  int l;
 | 
				
			||||||
 | 
					  char *p;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int from =   1;
 | 
				
			||||||
 | 
					  int to   = 200;
 | 
				
			||||||
 | 
					  int step =   1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  struct timeval start, stop;
 | 
				
			||||||
 | 
					  double time1,timeg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  argc--;argv++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
				
			||||||
 | 
					  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_LOOPS")))  loops = atoi(p);
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_INCX")))   inc_x = atoi(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "From : %3d  To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
 | 
				
			||||||
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef linux
 | 
				
			||||||
 | 
					  srandom(getpid());
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "   SIZE       Flops\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(m = from; m <= to; m += step)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   timeg=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   fprintf(stderr, " %6d : ", (int)m);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   for (l=0; l<loops; l++)
 | 
				
			||||||
 | 
					   {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   	for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
 | 
				
			||||||
 | 
								x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
					   	}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	gettimeofday( &start, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	result = ASUM (&m, x, &inc_x);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	gettimeofday( &stop, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg += time1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    timeg /= loops;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef COMPLEX
 | 
				
			||||||
 | 
					    fprintf(stderr, " %10.2f MFlops\n", 4. * (double)m / timeg * 1.e-6);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    fprintf(stderr, " %10.2f MFlops\n", 2. * (double)m / timeg * 1.e-6);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
 | 
				
			||||||
| 
						 | 
					@ -71,8 +71,14 @@ double fabs(double);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(__WIN32__) || defined(__WIN64__)
 | 
					#if defined(__WIN32__) || defined(__WIN64__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
				
			||||||
 | 
					#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int gettimeofday(struct timeval *tv, void *tz){
 | 
					int gettimeofday(struct timeval *tv, void *tz){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  FILETIME ft;
 | 
					  FILETIME ft;
 | 
				
			||||||
| 
						 | 
					@ -99,6 +105,7 @@ int gettimeofday(struct timeval *tv, void *tz){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline double getmflops(int ratio, int m, double secs){
 | 
					static __inline double getmflops(int ratio, int m, double secs){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  double mm = (double)m;
 | 
					  double mm = (double)m;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,201 @@
 | 
				
			||||||
 | 
					/***************************************************************************
 | 
				
			||||||
 | 
					Copyright (c) 2014, The OpenBLAS Project
 | 
				
			||||||
 | 
					All rights reserved.
 | 
				
			||||||
 | 
					Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					modification, are permitted provided that the following conditions are
 | 
				
			||||||
 | 
					met:
 | 
				
			||||||
 | 
					1. Redistributions of source code must retain the above copyright
 | 
				
			||||||
 | 
					notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					2. Redistributions in binary form must reproduce the above copyright
 | 
				
			||||||
 | 
					notice, this list of conditions and the following disclaimer in
 | 
				
			||||||
 | 
					the documentation and/or other materials provided with the
 | 
				
			||||||
 | 
					distribution.
 | 
				
			||||||
 | 
					3. Neither the name of the OpenBLAS project nor the names of
 | 
				
			||||||
 | 
					its contributors may be used to endorse or promote products
 | 
				
			||||||
 | 
					derived from this software without specific prior written permission.
 | 
				
			||||||
 | 
					THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
				
			||||||
 | 
					AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
				
			||||||
 | 
					IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
				
			||||||
 | 
					ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | 
				
			||||||
 | 
					LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
				
			||||||
 | 
					DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
				
			||||||
 | 
					SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | 
				
			||||||
 | 
					CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | 
				
			||||||
 | 
					OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
				
			||||||
 | 
					USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					*****************************************************************************/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#ifdef __CYGWIN32__
 | 
				
			||||||
 | 
					#include <sys/time.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#include "common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#undef COPY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef COMPLEX
 | 
				
			||||||
 | 
					#ifdef DOUBLE
 | 
				
			||||||
 | 
					#define COPY   BLASFUNC(zcopy)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define COPY   BLASFUNC(ccopy)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#ifdef DOUBLE
 | 
				
			||||||
 | 
					#define COPY   BLASFUNC(dcopy)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define COPY   BLASFUNC(scopy)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(__WIN32__) || defined(__WIN64__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
				
			||||||
 | 
					#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int gettimeofday(struct timeval *tv, void *tz){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FILETIME ft;
 | 
				
			||||||
 | 
					  unsigned __int64 tmpres = 0;
 | 
				
			||||||
 | 
					  static int tzflag;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (NULL != tv)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      GetSystemTimeAsFileTime(&ft);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tmpres |= ft.dwHighDateTime;
 | 
				
			||||||
 | 
					      tmpres <<= 32;
 | 
				
			||||||
 | 
					      tmpres |= ft.dwLowDateTime;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      /*converting file time to unix epoch*/
 | 
				
			||||||
 | 
					      tmpres /= 10;  /*convert into microseconds*/
 | 
				
			||||||
 | 
					      tmpres -= DELTA_EPOCH_IN_MICROSECS;
 | 
				
			||||||
 | 
					      tv->tv_sec = (long)(tmpres / 1000000UL);
 | 
				
			||||||
 | 
					      tv->tv_usec = (long)(tmpres % 1000000UL);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void *huge_malloc(BLASLONG size){
 | 
				
			||||||
 | 
					  int shmid;
 | 
				
			||||||
 | 
					  void *address;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef SHM_HUGETLB
 | 
				
			||||||
 | 
					#define SHM_HUGETLB 04000
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((shmid =shmget(IPC_PRIVATE,
 | 
				
			||||||
 | 
							     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
 | 
				
			||||||
 | 
							     SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
 | 
				
			||||||
 | 
					    printf( "Memory allocation failed(shmget).\n");
 | 
				
			||||||
 | 
					    exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  address = shmat(shmid, NULL, SHM_RND);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((BLASLONG)address == -1){
 | 
				
			||||||
 | 
					    printf( "Memory allocation failed(shmat).\n");
 | 
				
			||||||
 | 
					    exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  shmctl(shmid, IPC_RMID, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return address;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define malloc huge_malloc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(int argc, char *argv[]){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FLOAT *x, *y;
 | 
				
			||||||
 | 
					  FLOAT alpha[2] = { 2.0, 2.0 };
 | 
				
			||||||
 | 
					  blasint m, i;
 | 
				
			||||||
 | 
					  blasint inc_x=1,inc_y=1;
 | 
				
			||||||
 | 
					  int loops = 1;
 | 
				
			||||||
 | 
					  int l;
 | 
				
			||||||
 | 
					  char *p;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int from =   1;
 | 
				
			||||||
 | 
					  int to   = 200;
 | 
				
			||||||
 | 
					  int step =   1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  struct timeval start, stop;
 | 
				
			||||||
 | 
					  double time1,timeg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  argc--;argv++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
				
			||||||
 | 
					  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_LOOPS")))  loops = atoi(p);
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_INCX")))   inc_x = atoi(p);
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_INCY")))   inc_y = atoi(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "From : %3d  To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
 | 
				
			||||||
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
 | 
				
			||||||
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef linux
 | 
				
			||||||
 | 
					  srandom(getpid());
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "   SIZE       Flops\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(m = from; m <= to; m += step)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   timeg=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   fprintf(stderr, " %6d : ", (int)m);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   for (l=0; l<loops; l++)
 | 
				
			||||||
 | 
					   {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   	for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
 | 
				
			||||||
 | 
								x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
					   	}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   	for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
 | 
				
			||||||
 | 
								y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
					   	}
 | 
				
			||||||
 | 
					    	gettimeofday( &start, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	COPY (&m, x, &inc_x, y, &inc_y );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	gettimeofday( &stop, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg += time1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    timeg /= loops;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fprintf(stderr,
 | 
				
			||||||
 | 
						    " %10.2f MBytes\n",
 | 
				
			||||||
 | 
						    COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
 | 
				
			||||||
| 
						 | 
					@ -144,6 +144,7 @@ int main(int argc, char *argv[]){
 | 
				
			||||||
  FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
 | 
					  FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
 | 
				
			||||||
  FLOAT wkopt[4];
 | 
					  FLOAT wkopt[4];
 | 
				
			||||||
  char job='V';
 | 
					  char job='V';
 | 
				
			||||||
 | 
					  char jobr='N';
 | 
				
			||||||
  char *p;
 | 
					  char *p;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  blasint m, i, j, info,lwork;
 | 
					  blasint m, i, j, info,lwork;
 | 
				
			||||||
| 
						 | 
					@ -202,9 +203,9 @@ int main(int argc, char *argv[]){
 | 
				
			||||||
    lwork = -1;
 | 
					    lwork = -1;
 | 
				
			||||||
    m=to;
 | 
					    m=to;
 | 
				
			||||||
#ifndef COMPLEX
 | 
					#ifndef COMPLEX
 | 
				
			||||||
    GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
 | 
					    GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
    GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
 | 
					    GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  lwork = (blasint)wkopt[0];
 | 
					  lwork = (blasint)wkopt[0];
 | 
				
			||||||
| 
						 | 
					@ -226,16 +227,16 @@ int main(int argc, char *argv[]){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    lwork = -1;
 | 
					    lwork = -1;
 | 
				
			||||||
#ifndef COMPLEX
 | 
					#ifndef COMPLEX
 | 
				
			||||||
    GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
 | 
					    GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
    GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
 | 
					    GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    lwork = (blasint)wkopt[0];
 | 
					    lwork = (blasint)wkopt[0];
 | 
				
			||||||
#ifndef COMPLEX
 | 
					#ifndef COMPLEX
 | 
				
			||||||
    GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
 | 
					    GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
    GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
 | 
					    GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    gettimeofday( &stop, (struct timezone *)0);
 | 
					    gettimeofday( &stop, (struct timezone *)0);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -122,7 +122,7 @@ int main(int argc, char *argv[]){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  FLOAT *a, *b, *c;
 | 
					  FLOAT *a, *b, *c;
 | 
				
			||||||
  FLOAT alpha[] = {1.0, 1.0};
 | 
					  FLOAT alpha[] = {1.0, 1.0};
 | 
				
			||||||
  FLOAT beta [] = {1.0, 1.0};
 | 
					  FLOAT beta [] = {0.0, 0.0};
 | 
				
			||||||
  char trans='N';
 | 
					  char trans='N';
 | 
				
			||||||
  blasint m, n, i, j;
 | 
					  blasint m, n, i, j;
 | 
				
			||||||
  int loops = 1;
 | 
					  int loops = 1;
 | 
				
			||||||
| 
						 | 
					@ -168,12 +168,21 @@ int main(int argc, char *argv[]){
 | 
				
			||||||
	  has_param_n=1;	  
 | 
						  has_param_n=1;	  
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef linux
 | 
					#ifdef linux
 | 
				
			||||||
  srandom(getpid());
 | 
					  srandom(getpid());
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
						for(j = 0; j < m; j++){
 | 
				
			||||||
 | 
					      		for(i = 0; i < to * COMPSIZE; i++){
 | 
				
			||||||
 | 
								a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
								b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
								c[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
					      		}
 | 
				
			||||||
 | 
					    	}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  fprintf(stderr, "   SIZE       Flops\n");
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "   SIZE          Flops          Time\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for(m = from; m <= to; m += step)
 | 
					  for(m = from; m <= to; m += step)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
| 
						 | 
					@ -188,34 +197,23 @@ int main(int argc, char *argv[]){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
 | 
					    fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
 | 
				
			||||||
 | 
					    gettimeofday( &start, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for (l=0; l<loops; l++)
 | 
					    for (l=0; l<loops; l++)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
  
 | 
					 | 
				
			||||||
    	for(j = 0; j < m; j++){
 | 
					 | 
				
			||||||
      		for(i = 0; i < m * COMPSIZE; i++){
 | 
					 | 
				
			||||||
			a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
					 | 
				
			||||||
			b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
					 | 
				
			||||||
			c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
					 | 
				
			||||||
      		}
 | 
					 | 
				
			||||||
    	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    	gettimeofday( &start, (struct timezone *)0);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    	GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
 | 
					    	GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    	gettimeofday( &stop, (struct timezone *)0);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	timeg += time1;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					   gettimeofday( &stop, (struct timezone *)0);
 | 
				
			||||||
 | 
					   time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    timeg /= loops;
 | 
					    timeg = time1/loops;
 | 
				
			||||||
    fprintf(stderr,
 | 
					    fprintf(stderr,
 | 
				
			||||||
	    " %10.2f MFlops\n",
 | 
						    " %10.2f MFlops %10.6f sec\n",
 | 
				
			||||||
	    COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6);
 | 
						    COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -35,12 +35,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#undef GER
 | 
					#undef GER
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef COMPLEX
 | 
				
			||||||
 | 
					#ifdef DOUBLE
 | 
				
			||||||
 | 
					#define GER   BLASFUNC(zgeru)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define GER   BLASFUNC(cgeru)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
#ifdef DOUBLE
 | 
					#ifdef DOUBLE
 | 
				
			||||||
#define GER   BLASFUNC(dger)
 | 
					#define GER   BLASFUNC(dger)
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
#define GER   BLASFUNC(sger)
 | 
					#define GER   BLASFUNC(sger)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(__WIN32__) || defined(__WIN64__)
 | 
					#if defined(__WIN32__) || defined(__WIN64__)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,218 @@
 | 
				
			||||||
 | 
					/*********************************************************************/
 | 
				
			||||||
 | 
					/* Copyright 2009, 2010 The University of Texas at Austin.           */
 | 
				
			||||||
 | 
					/* All rights reserved.                                              */
 | 
				
			||||||
 | 
					/*                                                                   */
 | 
				
			||||||
 | 
					/* Redistribution and use in source and binary forms, with or        */
 | 
				
			||||||
 | 
					/* without modification, are permitted provided that the following   */
 | 
				
			||||||
 | 
					/* conditions are met:                                               */
 | 
				
			||||||
 | 
					/*                                                                   */
 | 
				
			||||||
 | 
					/*   1. Redistributions of source code must retain the above         */
 | 
				
			||||||
 | 
					/*      copyright notice, this list of conditions and the following  */
 | 
				
			||||||
 | 
					/*      disclaimer.                                                  */
 | 
				
			||||||
 | 
					/*                                                                   */
 | 
				
			||||||
 | 
					/*   2. Redistributions in binary form must reproduce the above      */
 | 
				
			||||||
 | 
					/*      copyright notice, this list of conditions and the following  */
 | 
				
			||||||
 | 
					/*      disclaimer in the documentation and/or other materials       */
 | 
				
			||||||
 | 
					/*      provided with the distribution.                              */
 | 
				
			||||||
 | 
					/*                                                                   */
 | 
				
			||||||
 | 
					/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
 | 
				
			||||||
 | 
					/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
 | 
				
			||||||
 | 
					/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
 | 
				
			||||||
 | 
					/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
 | 
				
			||||||
 | 
					/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
 | 
				
			||||||
 | 
					/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
 | 
				
			||||||
 | 
					/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
 | 
				
			||||||
 | 
					/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
 | 
				
			||||||
 | 
					/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
 | 
				
			||||||
 | 
					/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
 | 
				
			||||||
 | 
					/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
 | 
				
			||||||
 | 
					/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
 | 
				
			||||||
 | 
					/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
 | 
				
			||||||
 | 
					/*    POSSIBILITY OF SUCH DAMAGE.                                    */
 | 
				
			||||||
 | 
					/*                                                                   */
 | 
				
			||||||
 | 
					/* The views and conclusions contained in the software and           */
 | 
				
			||||||
 | 
					/* documentation are those of the authors and should not be          */
 | 
				
			||||||
 | 
					/* interpreted as representing official policies, either expressed   */
 | 
				
			||||||
 | 
					/* or implied, of The University of Texas at Austin.                 */
 | 
				
			||||||
 | 
					/*********************************************************************/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#ifdef __CYGWIN32__
 | 
				
			||||||
 | 
					#include <sys/time.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#include "common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					double fabs(double);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#undef GESV
 | 
				
			||||||
 | 
					#undef GETRS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef COMPLEX
 | 
				
			||||||
 | 
					#ifdef XDOUBLE
 | 
				
			||||||
 | 
					#define GESV   BLASFUNC(qgesv)
 | 
				
			||||||
 | 
					#elif defined(DOUBLE)
 | 
				
			||||||
 | 
					#define GESV   BLASFUNC(dgesv)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define GESV   BLASFUNC(sgesv)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#ifdef XDOUBLE
 | 
				
			||||||
 | 
					#define GESV   BLASFUNC(xgesv)
 | 
				
			||||||
 | 
					#elif defined(DOUBLE)
 | 
				
			||||||
 | 
					#define GESV   BLASFUNC(zgesv)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define GESV   BLASFUNC(cgesv)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(__WIN32__) || defined(__WIN64__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
				
			||||||
 | 
					#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int gettimeofday(struct timeval *tv, void *tz){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FILETIME ft;
 | 
				
			||||||
 | 
					  unsigned __int64 tmpres = 0;
 | 
				
			||||||
 | 
					  static int tzflag;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (NULL != tv)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      GetSystemTimeAsFileTime(&ft);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tmpres |= ft.dwHighDateTime;
 | 
				
			||||||
 | 
					      tmpres <<= 32;
 | 
				
			||||||
 | 
					      tmpres |= ft.dwLowDateTime;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      /*converting file time to unix epoch*/
 | 
				
			||||||
 | 
					      tmpres /= 10;  /*convert into microseconds*/
 | 
				
			||||||
 | 
					      tmpres -= DELTA_EPOCH_IN_MICROSECS;
 | 
				
			||||||
 | 
					      tv->tv_sec = (long)(tmpres / 1000000UL);
 | 
				
			||||||
 | 
					      tv->tv_usec = (long)(tmpres % 1000000UL);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void *huge_malloc(BLASLONG size){
 | 
				
			||||||
 | 
					  int shmid;
 | 
				
			||||||
 | 
					  void *address;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef SHM_HUGETLB
 | 
				
			||||||
 | 
					#define SHM_HUGETLB 04000
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((shmid =shmget(IPC_PRIVATE,
 | 
				
			||||||
 | 
							     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
 | 
				
			||||||
 | 
							     SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
 | 
				
			||||||
 | 
					    printf( "Memory allocation failed(shmget).\n");
 | 
				
			||||||
 | 
					    exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  address = shmat(shmid, NULL, SHM_RND);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((BLASLONG)address == -1){
 | 
				
			||||||
 | 
					    printf( "Memory allocation failed(shmat).\n");
 | 
				
			||||||
 | 
					    exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  shmctl(shmid, IPC_RMID, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return address;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define malloc huge_malloc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(int argc, char *argv[]){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FLOAT *a, *b;
 | 
				
			||||||
 | 
					  blasint *ipiv;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  blasint m, i, j, info;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int from =   1;
 | 
				
			||||||
 | 
					  int to   = 200;
 | 
				
			||||||
 | 
					  int step =   1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  struct timeval start, stop;
 | 
				
			||||||
 | 
					  double time1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  argc--;argv++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
				
			||||||
 | 
					  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "From : %3d  To : %3d Step = %3d\n", from, to, step);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
 | 
				
			||||||
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
 | 
				
			||||||
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
 | 
				
			||||||
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef linux
 | 
				
			||||||
 | 
					  srandom(getpid());
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "   SIZE       Flops              Time\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(m = from; m <= to; m += step){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fprintf(stderr, " %dx%d : ", (int)m, (int)m);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(j = 0; j < m; j++){
 | 
				
			||||||
 | 
					      for(i = 0; i < m * COMPSIZE; i++){
 | 
				
			||||||
 | 
						a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(j = 0; j < m; j++){
 | 
				
			||||||
 | 
					      for(i = 0; i < m * COMPSIZE; i++){
 | 
				
			||||||
 | 
						b[i + j * m * COMPSIZE] = 0.0;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (j = 0; j < m; ++j) {
 | 
				
			||||||
 | 
					      for (i = 0; i < m * COMPSIZE; ++i) {
 | 
				
			||||||
 | 
						b[i] += a[i + j * m * COMPSIZE];
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    gettimeofday( &start, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    GESV (&m, &m, a, &m, ipiv, b, &m,  &info);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    gettimeofday( &stop, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fprintf(stderr,
 | 
				
			||||||
 | 
						    "%10.2f MFlops %10.6f s\n",
 | 
				
			||||||
 | 
						    COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
 | 
				
			||||||
| 
						 | 
					@ -52,6 +52,11 @@ C)
 | 
				
			||||||
	awk '/MFlops/ { print $3,int($9) }'|tail --lines=+2
 | 
						awk '/MFlops/ { print $3,int($9) }'|tail --lines=+2
 | 
				
			||||||
	;;
 | 
						;;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					B)
 | 
				
			||||||
 | 
						# Copy Benchmark	
 | 
				
			||||||
 | 
						awk '/MBytes/ { print $1,int($3) }'|tail --lines=+2
 | 
				
			||||||
 | 
						;;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
*)
 | 
					*)
 | 
				
			||||||
	awk '/MFlops/ { print $1,int($3) }'|tail --lines=+2
 | 
						awk '/MFlops/ { print $1,int($3) }'|tail --lines=+2
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -88,6 +88,10 @@ double fabs(double);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(__WIN32__) || defined(__WIN64__)
 | 
					#if defined(__WIN32__) || defined(__WIN64__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
				
			||||||
 | 
					#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int gettimeofday(struct timeval *tv, void *tz){
 | 
					int gettimeofday(struct timeval *tv, void *tz){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  FILETIME ft;
 | 
					  FILETIME ft;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,202 @@
 | 
				
			||||||
 | 
					/***************************************************************************
 | 
				
			||||||
 | 
					Copyright (c) 2014, The OpenBLAS Project
 | 
				
			||||||
 | 
					All rights reserved.
 | 
				
			||||||
 | 
					Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					modification, are permitted provided that the following conditions are
 | 
				
			||||||
 | 
					met:
 | 
				
			||||||
 | 
					1. Redistributions of source code must retain the above copyright
 | 
				
			||||||
 | 
					notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					2. Redistributions in binary form must reproduce the above copyright
 | 
				
			||||||
 | 
					notice, this list of conditions and the following disclaimer in
 | 
				
			||||||
 | 
					the documentation and/or other materials provided with the
 | 
				
			||||||
 | 
					distribution.
 | 
				
			||||||
 | 
					3. Neither the name of the OpenBLAS project nor the names of
 | 
				
			||||||
 | 
					its contributors may be used to endorse or promote products
 | 
				
			||||||
 | 
					derived from this software without specific prior written permission.
 | 
				
			||||||
 | 
					THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
				
			||||||
 | 
					AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
				
			||||||
 | 
					IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
				
			||||||
 | 
					ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | 
				
			||||||
 | 
					LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
				
			||||||
 | 
					DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
				
			||||||
 | 
					SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | 
				
			||||||
 | 
					CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | 
				
			||||||
 | 
					OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
				
			||||||
 | 
					USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					*****************************************************************************/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#ifdef __CYGWIN32__
 | 
				
			||||||
 | 
					#include <sys/time.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#include "common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#undef SCAL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef COMPLEX
 | 
				
			||||||
 | 
					#ifdef DOUBLE
 | 
				
			||||||
 | 
					#define SCAL   BLASFUNC(zscal)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define SCAL   BLASFUNC(cscal)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#ifdef DOUBLE
 | 
				
			||||||
 | 
					#define SCAL   BLASFUNC(dscal)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define SCAL   BLASFUNC(sscal)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(__WIN32__) || defined(__WIN64__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
				
			||||||
 | 
					#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int gettimeofday(struct timeval *tv, void *tz){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FILETIME ft;
 | 
				
			||||||
 | 
					  unsigned __int64 tmpres = 0;
 | 
				
			||||||
 | 
					  static int tzflag;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (NULL != tv)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      GetSystemTimeAsFileTime(&ft);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tmpres |= ft.dwHighDateTime;
 | 
				
			||||||
 | 
					      tmpres <<= 32;
 | 
				
			||||||
 | 
					      tmpres |= ft.dwLowDateTime;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      /*converting file time to unix epoch*/
 | 
				
			||||||
 | 
					      tmpres /= 10;  /*convert into microseconds*/
 | 
				
			||||||
 | 
					      tmpres -= DELTA_EPOCH_IN_MICROSECS;
 | 
				
			||||||
 | 
					      tv->tv_sec = (long)(tmpres / 1000000UL);
 | 
				
			||||||
 | 
					      tv->tv_usec = (long)(tmpres % 1000000UL);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void *huge_malloc(BLASLONG size){
 | 
				
			||||||
 | 
					  int shmid;
 | 
				
			||||||
 | 
					  void *address;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef SHM_HUGETLB
 | 
				
			||||||
 | 
					#define SHM_HUGETLB 04000
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((shmid =shmget(IPC_PRIVATE,
 | 
				
			||||||
 | 
							     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
 | 
				
			||||||
 | 
							     SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
 | 
				
			||||||
 | 
					    printf( "Memory allocation failed(shmget).\n");
 | 
				
			||||||
 | 
					    exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  address = shmat(shmid, NULL, SHM_RND);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((BLASLONG)address == -1){
 | 
				
			||||||
 | 
					    printf( "Memory allocation failed(shmat).\n");
 | 
				
			||||||
 | 
					    exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  shmctl(shmid, IPC_RMID, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return address;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define malloc huge_malloc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(int argc, char *argv[]){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FLOAT *x, *y;
 | 
				
			||||||
 | 
					  FLOAT alpha[2] = { 2.0, 2.0 };
 | 
				
			||||||
 | 
					  blasint m, i;
 | 
				
			||||||
 | 
					  blasint inc_x=1,inc_y=1;
 | 
				
			||||||
 | 
					  int loops = 1;
 | 
				
			||||||
 | 
					  int l;
 | 
				
			||||||
 | 
					  char *p;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int from =   1;
 | 
				
			||||||
 | 
					  int to   = 200;
 | 
				
			||||||
 | 
					  int step =   1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  struct timeval start, stop;
 | 
				
			||||||
 | 
					  double time1,timeg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  argc--;argv++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
				
			||||||
 | 
					  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_LOOPS")))  loops = atoi(p);
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_INCX")))   inc_x = atoi(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "From : %3d  To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
 | 
				
			||||||
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
 | 
				
			||||||
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef linux
 | 
				
			||||||
 | 
					  srandom(getpid());
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "   SIZE       Flops\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(m = from; m <= to; m += step)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   timeg=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   fprintf(stderr, " %6d : ", (int)m);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   for (l=0; l<loops; l++)
 | 
				
			||||||
 | 
					   {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   	for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
 | 
				
			||||||
 | 
								x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
					   	}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   	for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
 | 
				
			||||||
 | 
								y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
					   	}
 | 
				
			||||||
 | 
					    	gettimeofday( &start, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	SCAL (&m, alpha, x, &inc_x);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	gettimeofday( &stop, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg += time1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    timeg /= loops;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef COMPLEX
 | 
				
			||||||
 | 
					    fprintf(stderr, " %10.2f MFlops\n", 6. * (double)m / timeg * 1.e-6);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    fprintf(stderr, " %10.2f MFlops\n", 1. * (double)m / timeg * 1.e-6);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import numpy
 | 
				
			||||||
 | 
					from numpy.random import randn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run_cgemm(N,l):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
 | 
				
			||||||
 | 
						B = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = time.time();
 | 
				
			||||||
 | 
						for i in range(0,l):
 | 
				
			||||||
 | 
							ref = numpy.dot(A,B)
 | 
				
			||||||
 | 
						end = time.time()
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timediff = (end -start) 
 | 
				
			||||||
 | 
						mflops = ( 8*N*N*N) *l / timediff
 | 
				
			||||||
 | 
						mflops *= 1e-6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = "%dx%d" % (N,N)
 | 
				
			||||||
 | 
						print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
						N=128
 | 
				
			||||||
 | 
						NMAX=2048
 | 
				
			||||||
 | 
						NINC=128
 | 
				
			||||||
 | 
						LOOPS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						z=0
 | 
				
			||||||
 | 
						for arg in sys.argv:
 | 
				
			||||||
 | 
							if z == 1:
 | 
				
			||||||
 | 
								N = int(arg)
 | 
				
			||||||
 | 
							elif z == 2:
 | 
				
			||||||
 | 
								NMAX = int(arg)
 | 
				
			||||||
 | 
							elif z == 3:
 | 
				
			||||||
 | 
								NINC = int(arg)
 | 
				
			||||||
 | 
							elif z == 4:
 | 
				
			||||||
 | 
								LOOPS = int(arg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z = z + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if 'OPENBLAS_LOOPS' in os.environ:
 | 
				
			||||||
 | 
							p = os.environ['OPENBLAS_LOOPS']
 | 
				
			||||||
 | 
							if p:
 | 
				
			||||||
 | 
								LOOPS = int(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
				
			||||||
 | 
						print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in range (N,NMAX+NINC,NINC):
 | 
				
			||||||
 | 
							run_cgemm(i,LOOPS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import numpy
 | 
				
			||||||
 | 
					from numpy.random import randn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run_cgemv(N,l):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
 | 
				
			||||||
 | 
						B = randn(N).astype('float32') + randn(N).astype('float32') * 1j;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = time.time();
 | 
				
			||||||
 | 
						for i in range(0,l):
 | 
				
			||||||
 | 
							ref = numpy.dot(A,B)
 | 
				
			||||||
 | 
						end = time.time()
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timediff = (end -start) 
 | 
				
			||||||
 | 
						mflops = ( 8*N*N) *l / timediff
 | 
				
			||||||
 | 
						mflops *= 1e-6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = "%dx%d" % (N,N)
 | 
				
			||||||
 | 
						print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
						N=128
 | 
				
			||||||
 | 
						NMAX=2048
 | 
				
			||||||
 | 
						NINC=128
 | 
				
			||||||
 | 
						LOOPS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						z=0
 | 
				
			||||||
 | 
						for arg in sys.argv:
 | 
				
			||||||
 | 
							if z == 1:
 | 
				
			||||||
 | 
								N = int(arg)
 | 
				
			||||||
 | 
							elif z == 2:
 | 
				
			||||||
 | 
								NMAX = int(arg)
 | 
				
			||||||
 | 
							elif z == 3:
 | 
				
			||||||
 | 
								NINC = int(arg)
 | 
				
			||||||
 | 
							elif z == 4:
 | 
				
			||||||
 | 
								LOOPS = int(arg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z = z + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if 'OPENBLAS_LOOPS' in os.environ:
 | 
				
			||||||
 | 
							p = os.environ['OPENBLAS_LOOPS']
 | 
				
			||||||
 | 
							if p:
 | 
				
			||||||
 | 
								LOOPS = int(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
				
			||||||
 | 
						print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in range (N,NMAX+NINC,NINC):
 | 
				
			||||||
 | 
							run_cgemv(i,LOOPS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,58 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import numpy
 | 
				
			||||||
 | 
					from numpy.random import randn
 | 
				
			||||||
 | 
					from scipy.linalg.blas import daxpy
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run_daxpy(N,l):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						x = randn(N).astype('float64')
 | 
				
			||||||
 | 
						y = randn(N).astype('float64')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = time.time();
 | 
				
			||||||
 | 
						for i in range(0,l):
 | 
				
			||||||
 | 
							y = daxpy(x,y, a=2.0 )
 | 
				
			||||||
 | 
						end = time.time()
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timediff = (end -start) 
 | 
				
			||||||
 | 
						mflops = ( 2*N ) *l / timediff
 | 
				
			||||||
 | 
						mflops *= 1e-6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = "%d" % (N)
 | 
				
			||||||
 | 
						print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
						N=128
 | 
				
			||||||
 | 
						NMAX=2048
 | 
				
			||||||
 | 
						NINC=128
 | 
				
			||||||
 | 
						LOOPS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						z=0
 | 
				
			||||||
 | 
						for arg in sys.argv:
 | 
				
			||||||
 | 
							if z == 1:
 | 
				
			||||||
 | 
								N = int(arg)
 | 
				
			||||||
 | 
							elif z == 2:
 | 
				
			||||||
 | 
								NMAX = int(arg)
 | 
				
			||||||
 | 
							elif z == 3:
 | 
				
			||||||
 | 
								NINC = int(arg)
 | 
				
			||||||
 | 
							elif z == 4:
 | 
				
			||||||
 | 
								LOOPS = int(arg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z = z + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if 'OPENBLAS_LOOPS' in os.environ:
 | 
				
			||||||
 | 
							p = os.environ['OPENBLAS_LOOPS']
 | 
				
			||||||
 | 
							if p:
 | 
				
			||||||
 | 
								LOOPS = int(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
				
			||||||
 | 
						print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in range (N,NMAX+NINC,NINC):
 | 
				
			||||||
 | 
							run_daxpy(i,LOOPS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import numpy
 | 
				
			||||||
 | 
					from numpy.random import randn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run_ddot(N,l):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = randn(N).astype('float64')
 | 
				
			||||||
 | 
						B = randn(N).astype('float64')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = time.time();
 | 
				
			||||||
 | 
						for i in range(0,l):
 | 
				
			||||||
 | 
							ref = numpy.dot(A,B)
 | 
				
			||||||
 | 
						end = time.time()
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timediff = (end -start) 
 | 
				
			||||||
 | 
						mflops = ( 2*N ) *l / timediff
 | 
				
			||||||
 | 
						mflops *= 1e-6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = "%d" % (N)
 | 
				
			||||||
 | 
						print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
						N=128
 | 
				
			||||||
 | 
						NMAX=2048
 | 
				
			||||||
 | 
						NINC=128
 | 
				
			||||||
 | 
						LOOPS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						z=0
 | 
				
			||||||
 | 
						for arg in sys.argv:
 | 
				
			||||||
 | 
							if z == 1:
 | 
				
			||||||
 | 
								N = int(arg)
 | 
				
			||||||
 | 
							elif z == 2:
 | 
				
			||||||
 | 
								NMAX = int(arg)
 | 
				
			||||||
 | 
							elif z == 3:
 | 
				
			||||||
 | 
								NINC = int(arg)
 | 
				
			||||||
 | 
							elif z == 4:
 | 
				
			||||||
 | 
								LOOPS = int(arg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z = z + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if 'OPENBLAS_LOOPS' in os.environ:
 | 
				
			||||||
 | 
							p = os.environ['OPENBLAS_LOOPS']
 | 
				
			||||||
 | 
							if p:
 | 
				
			||||||
 | 
								LOOPS = int(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
				
			||||||
 | 
						print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in range (N,NMAX+NINC,NINC):
 | 
				
			||||||
 | 
							run_ddot(i,LOOPS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,55 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import numpy
 | 
				
			||||||
 | 
					from numpy.random import randn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run_deig(N,l):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = randn(N,N).astype('float64')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = time.time();
 | 
				
			||||||
 | 
						for i in range(0,l):
 | 
				
			||||||
 | 
							la,v = numpy.linalg.eig(A)
 | 
				
			||||||
 | 
						end = time.time()
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timediff = (end -start) 
 | 
				
			||||||
 | 
						mflops = ( 26.33 *N*N*N) *l / timediff
 | 
				
			||||||
 | 
						mflops *= 1e-6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = "%dx%d" % (N,N)
 | 
				
			||||||
 | 
						print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
						N=128
 | 
				
			||||||
 | 
						NMAX=2048
 | 
				
			||||||
 | 
						NINC=128
 | 
				
			||||||
 | 
						LOOPS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						z=0
 | 
				
			||||||
 | 
						for arg in sys.argv:
 | 
				
			||||||
 | 
							if z == 1:
 | 
				
			||||||
 | 
								N = int(arg)
 | 
				
			||||||
 | 
							elif z == 2:
 | 
				
			||||||
 | 
								NMAX = int(arg)
 | 
				
			||||||
 | 
							elif z == 3:
 | 
				
			||||||
 | 
								NINC = int(arg)
 | 
				
			||||||
 | 
							elif z == 4:
 | 
				
			||||||
 | 
								LOOPS = int(arg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z = z + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if 'OPENBLAS_LOOPS' in os.environ:
 | 
				
			||||||
 | 
							p = os.environ['OPENBLAS_LOOPS']
 | 
				
			||||||
 | 
							if p:
 | 
				
			||||||
 | 
								LOOPS = int(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
				
			||||||
 | 
						print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in range (N,NMAX+NINC,NINC):
 | 
				
			||||||
 | 
							run_deig(i,LOOPS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import numpy
 | 
				
			||||||
 | 
					from numpy.random import randn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run_dgemm(N,l):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = randn(N,N).astype('float64')
 | 
				
			||||||
 | 
						B = randn(N,N).astype('float64')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = time.time();
 | 
				
			||||||
 | 
						for i in range(0,l):
 | 
				
			||||||
 | 
							ref = numpy.dot(A,B)
 | 
				
			||||||
 | 
						end = time.time()
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timediff = (end -start) 
 | 
				
			||||||
 | 
						mflops = ( 2*N*N*N) *l / timediff
 | 
				
			||||||
 | 
						mflops *= 1e-6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = "%dx%d" % (N,N)
 | 
				
			||||||
 | 
						print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
						N=128
 | 
				
			||||||
 | 
						NMAX=2048
 | 
				
			||||||
 | 
						NINC=128
 | 
				
			||||||
 | 
						LOOPS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						z=0
 | 
				
			||||||
 | 
						for arg in sys.argv:
 | 
				
			||||||
 | 
							if z == 1:
 | 
				
			||||||
 | 
								N = int(arg)
 | 
				
			||||||
 | 
							elif z == 2:
 | 
				
			||||||
 | 
								NMAX = int(arg)
 | 
				
			||||||
 | 
							elif z == 3:
 | 
				
			||||||
 | 
								NINC = int(arg)
 | 
				
			||||||
 | 
							elif z == 4:
 | 
				
			||||||
 | 
								LOOPS = int(arg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z = z + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if 'OPENBLAS_LOOPS' in os.environ:
 | 
				
			||||||
 | 
							p = os.environ['OPENBLAS_LOOPS']
 | 
				
			||||||
 | 
							if p:
 | 
				
			||||||
 | 
								LOOPS = int(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
				
			||||||
 | 
						print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in range (N,NMAX+NINC,NINC):
 | 
				
			||||||
 | 
							run_dgemm(i,LOOPS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import numpy
 | 
				
			||||||
 | 
					from numpy.random import randn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run_dgemv(N,l):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = randn(N,N).astype('float64')
 | 
				
			||||||
 | 
						B = randn(N).astype('float64')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = time.time();
 | 
				
			||||||
 | 
						for i in range(0,l):
 | 
				
			||||||
 | 
							ref = numpy.dot(A,B)
 | 
				
			||||||
 | 
						end = time.time()
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timediff = (end -start) 
 | 
				
			||||||
 | 
						mflops = ( 2*N*N) *l / timediff
 | 
				
			||||||
 | 
						mflops *= 1e-6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = "%dx%d" % (N,N)
 | 
				
			||||||
 | 
						print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
						N=128
 | 
				
			||||||
 | 
						NMAX=2048
 | 
				
			||||||
 | 
						NINC=128
 | 
				
			||||||
 | 
						LOOPS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						z=0
 | 
				
			||||||
 | 
						for arg in sys.argv:
 | 
				
			||||||
 | 
							if z == 1:
 | 
				
			||||||
 | 
								N = int(arg)
 | 
				
			||||||
 | 
							elif z == 2:
 | 
				
			||||||
 | 
								NMAX = int(arg)
 | 
				
			||||||
 | 
							elif z == 3:
 | 
				
			||||||
 | 
								NINC = int(arg)
 | 
				
			||||||
 | 
							elif z == 4:
 | 
				
			||||||
 | 
								LOOPS = int(arg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z = z + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if 'OPENBLAS_LOOPS' in os.environ:
 | 
				
			||||||
 | 
							p = os.environ['OPENBLAS_LOOPS']
 | 
				
			||||||
 | 
							if p:
 | 
				
			||||||
 | 
								LOOPS = int(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
				
			||||||
 | 
						print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in range (N,NMAX+NINC,NINC):
 | 
				
			||||||
 | 
							run_dgemv(i,LOOPS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,58 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import numpy
 | 
				
			||||||
 | 
					from numpy.random import randn
 | 
				
			||||||
 | 
					from scipy.linalg.lapack import dgesv
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run_dgesv(N,l):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						a = randn(N,N).astype('float64')
 | 
				
			||||||
 | 
						b = randn(N,N).astype('float64')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = time.time();
 | 
				
			||||||
 | 
						for i in range(0,l):
 | 
				
			||||||
 | 
							dgesv(a,b,1,1)
 | 
				
			||||||
 | 
						end = time.time()
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timediff = (end -start) 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff
 | 
				
			||||||
 | 
						mflops *= 1e-6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = "%dx%d" % (N,N)
 | 
				
			||||||
 | 
						print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
						N=128
 | 
				
			||||||
 | 
						NMAX=2048
 | 
				
			||||||
 | 
						NINC=128
 | 
				
			||||||
 | 
						LOOPS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						z=0
 | 
				
			||||||
 | 
						for arg in sys.argv:
 | 
				
			||||||
 | 
							if z == 1:
 | 
				
			||||||
 | 
								N = int(arg)
 | 
				
			||||||
 | 
							elif z == 2:
 | 
				
			||||||
 | 
								NMAX = int(arg)
 | 
				
			||||||
 | 
							elif z == 3:
 | 
				
			||||||
 | 
								NINC = int(arg)
 | 
				
			||||||
 | 
							elif z == 4:
 | 
				
			||||||
 | 
								LOOPS = int(arg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z = z + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if 'OPENBLAS_LOOPS' in os.environ:
 | 
				
			||||||
 | 
							p = os.environ['OPENBLAS_LOOPS']
 | 
				
			||||||
 | 
							if p:
 | 
				
			||||||
 | 
								LOOPS = int(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
				
			||||||
 | 
						print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in range (N,NMAX+NINC,NINC):
 | 
				
			||||||
 | 
							run_dgesv(i,LOOPS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import numpy
 | 
				
			||||||
 | 
					from numpy.random import randn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run_dsolve(N,l):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = randn(N,N).astype('float64')
 | 
				
			||||||
 | 
						B = randn(N,N).astype('float64')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = time.time();
 | 
				
			||||||
 | 
						for i in range(0,l):
 | 
				
			||||||
 | 
							ref = numpy.linalg.solve(A,B)
 | 
				
			||||||
 | 
						end = time.time()
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timediff = (end -start) 
 | 
				
			||||||
 | 
						mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff
 | 
				
			||||||
 | 
						mflops *= 1e-6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = "%dx%d" % (N,N)
 | 
				
			||||||
 | 
						print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
						N=128
 | 
				
			||||||
 | 
						NMAX=2048
 | 
				
			||||||
 | 
						NINC=128
 | 
				
			||||||
 | 
						LOOPS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						z=0
 | 
				
			||||||
 | 
						for arg in sys.argv:
 | 
				
			||||||
 | 
							if z == 1:
 | 
				
			||||||
 | 
								N = int(arg)
 | 
				
			||||||
 | 
							elif z == 2:
 | 
				
			||||||
 | 
								NMAX = int(arg)
 | 
				
			||||||
 | 
							elif z == 3:
 | 
				
			||||||
 | 
								NINC = int(arg)
 | 
				
			||||||
 | 
							elif z == 4:
 | 
				
			||||||
 | 
								LOOPS = int(arg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z = z + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if 'OPENBLAS_LOOPS' in os.environ:
 | 
				
			||||||
 | 
							p = os.environ['OPENBLAS_LOOPS']
 | 
				
			||||||
 | 
							if p:
 | 
				
			||||||
 | 
								LOOPS = int(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
				
			||||||
 | 
						print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in range (N,NMAX+NINC,NINC):
 | 
				
			||||||
 | 
							run_dsolve(i,LOOPS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import numpy
 | 
				
			||||||
 | 
					from numpy.random import randn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run_sdot(N,l):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = randn(N).astype('float32')
 | 
				
			||||||
 | 
						B = randn(N).astype('float32')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = time.time();
 | 
				
			||||||
 | 
						for i in range(0,l):
 | 
				
			||||||
 | 
							ref = numpy.dot(A,B)
 | 
				
			||||||
 | 
						end = time.time()
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timediff = (end -start) 
 | 
				
			||||||
 | 
						mflops = ( 2*N ) *l / timediff
 | 
				
			||||||
 | 
						mflops *= 1e-6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = "%d" % (N)
 | 
				
			||||||
 | 
						print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
						N=128
 | 
				
			||||||
 | 
						NMAX=2048
 | 
				
			||||||
 | 
						NINC=128
 | 
				
			||||||
 | 
						LOOPS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						z=0
 | 
				
			||||||
 | 
						for arg in sys.argv:
 | 
				
			||||||
 | 
							if z == 1:
 | 
				
			||||||
 | 
								N = int(arg)
 | 
				
			||||||
 | 
							elif z == 2:
 | 
				
			||||||
 | 
								NMAX = int(arg)
 | 
				
			||||||
 | 
							elif z == 3:
 | 
				
			||||||
 | 
								NINC = int(arg)
 | 
				
			||||||
 | 
							elif z == 4:
 | 
				
			||||||
 | 
								LOOPS = int(arg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z = z + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if 'OPENBLAS_LOOPS' in os.environ:
 | 
				
			||||||
 | 
							p = os.environ['OPENBLAS_LOOPS']
 | 
				
			||||||
 | 
							if p:
 | 
				
			||||||
 | 
								LOOPS = int(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
				
			||||||
 | 
						print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in range (N,NMAX+NINC,NINC):
 | 
				
			||||||
 | 
							run_sdot(i,LOOPS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import numpy
 | 
				
			||||||
 | 
					from numpy.random import randn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run_sgemm(N,l):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = randn(N,N).astype('float32')
 | 
				
			||||||
 | 
						B = randn(N,N).astype('float32')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = time.time();
 | 
				
			||||||
 | 
						for i in range(0,l):
 | 
				
			||||||
 | 
							ref = numpy.dot(A,B)
 | 
				
			||||||
 | 
						end = time.time()
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timediff = (end -start) 
 | 
				
			||||||
 | 
						mflops = ( 2*N*N*N) *l / timediff
 | 
				
			||||||
 | 
						mflops *= 1e-6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = "%dx%d" % (N,N)
 | 
				
			||||||
 | 
						print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
						N=128
 | 
				
			||||||
 | 
						NMAX=2048
 | 
				
			||||||
 | 
						NINC=128
 | 
				
			||||||
 | 
						LOOPS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						z=0
 | 
				
			||||||
 | 
						for arg in sys.argv:
 | 
				
			||||||
 | 
							if z == 1:
 | 
				
			||||||
 | 
								N = int(arg)
 | 
				
			||||||
 | 
							elif z == 2:
 | 
				
			||||||
 | 
								NMAX = int(arg)
 | 
				
			||||||
 | 
							elif z == 3:
 | 
				
			||||||
 | 
								NINC = int(arg)
 | 
				
			||||||
 | 
							elif z == 4:
 | 
				
			||||||
 | 
								LOOPS = int(arg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z = z + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if 'OPENBLAS_LOOPS' in os.environ:
 | 
				
			||||||
 | 
							p = os.environ['OPENBLAS_LOOPS']
 | 
				
			||||||
 | 
							if p:
 | 
				
			||||||
 | 
								LOOPS = int(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
				
			||||||
 | 
						print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in range (N,NMAX+NINC,NINC):
 | 
				
			||||||
 | 
							run_sgemm(i,LOOPS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import numpy
 | 
				
			||||||
 | 
					from numpy.random import randn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run_sgemv(N,l):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = randn(N,N).astype('float32')
 | 
				
			||||||
 | 
						B = randn(N).astype('float32')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = time.time();
 | 
				
			||||||
 | 
						for i in range(0,l):
 | 
				
			||||||
 | 
							ref = numpy.dot(A,B)
 | 
				
			||||||
 | 
						end = time.time()
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timediff = (end -start) 
 | 
				
			||||||
 | 
						mflops = ( 2*N*N) *l / timediff
 | 
				
			||||||
 | 
						mflops *= 1e-6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = "%dx%d" % (N,N)
 | 
				
			||||||
 | 
						print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
						N=128
 | 
				
			||||||
 | 
						NMAX=2048
 | 
				
			||||||
 | 
						NINC=128
 | 
				
			||||||
 | 
						LOOPS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						z=0
 | 
				
			||||||
 | 
						for arg in sys.argv:
 | 
				
			||||||
 | 
							if z == 1:
 | 
				
			||||||
 | 
								N = int(arg)
 | 
				
			||||||
 | 
							elif z == 2:
 | 
				
			||||||
 | 
								NMAX = int(arg)
 | 
				
			||||||
 | 
							elif z == 3:
 | 
				
			||||||
 | 
								NINC = int(arg)
 | 
				
			||||||
 | 
							elif z == 4:
 | 
				
			||||||
 | 
								LOOPS = int(arg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z = z + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if 'OPENBLAS_LOOPS' in os.environ:
 | 
				
			||||||
 | 
							p = os.environ['OPENBLAS_LOOPS']
 | 
				
			||||||
 | 
							if p:
 | 
				
			||||||
 | 
								LOOPS = int(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
				
			||||||
 | 
						print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in range (N,NMAX+NINC,NINC):
 | 
				
			||||||
 | 
							run_sgemv(i,LOOPS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import numpy
 | 
				
			||||||
 | 
					from numpy.random import randn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run_zgemm(N,l):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
 | 
				
			||||||
 | 
						B = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = time.time();
 | 
				
			||||||
 | 
						for i in range(0,l):
 | 
				
			||||||
 | 
							ref = numpy.dot(A,B)
 | 
				
			||||||
 | 
						end = time.time()
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timediff = (end -start) 
 | 
				
			||||||
 | 
						mflops = ( 8*N*N*N) *l / timediff
 | 
				
			||||||
 | 
						mflops *= 1e-6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = "%dx%d" % (N,N)
 | 
				
			||||||
 | 
						print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
						N=128
 | 
				
			||||||
 | 
						NMAX=2048
 | 
				
			||||||
 | 
						NINC=128
 | 
				
			||||||
 | 
						LOOPS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						z=0
 | 
				
			||||||
 | 
						for arg in sys.argv:
 | 
				
			||||||
 | 
							if z == 1:
 | 
				
			||||||
 | 
								N = int(arg)
 | 
				
			||||||
 | 
							elif z == 2:
 | 
				
			||||||
 | 
								NMAX = int(arg)
 | 
				
			||||||
 | 
							elif z == 3:
 | 
				
			||||||
 | 
								NINC = int(arg)
 | 
				
			||||||
 | 
							elif z == 4:
 | 
				
			||||||
 | 
								LOOPS = int(arg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z = z + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if 'OPENBLAS_LOOPS' in os.environ:
 | 
				
			||||||
 | 
							p = os.environ['OPENBLAS_LOOPS']
 | 
				
			||||||
 | 
							if p:
 | 
				
			||||||
 | 
								LOOPS = int(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
				
			||||||
 | 
						print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in range (N,NMAX+NINC,NINC):
 | 
				
			||||||
 | 
							run_zgemm(i,LOOPS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import numpy
 | 
				
			||||||
 | 
					from numpy.random import randn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def run_zgemv(N,l):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
 | 
				
			||||||
 | 
						B = randn(N).astype('float64') + randn(N).astype('float64') * 1j;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = time.time();
 | 
				
			||||||
 | 
						for i in range(0,l):
 | 
				
			||||||
 | 
							ref = numpy.dot(A,B)
 | 
				
			||||||
 | 
						end = time.time()
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timediff = (end -start) 
 | 
				
			||||||
 | 
						mflops = ( 8*N*N) *l / timediff
 | 
				
			||||||
 | 
						mflops *= 1e-6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = "%dx%d" % (N,N)
 | 
				
			||||||
 | 
						print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
						N=128
 | 
				
			||||||
 | 
						NMAX=2048
 | 
				
			||||||
 | 
						NINC=128
 | 
				
			||||||
 | 
						LOOPS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						z=0
 | 
				
			||||||
 | 
						for arg in sys.argv:
 | 
				
			||||||
 | 
							if z == 1:
 | 
				
			||||||
 | 
								N = int(arg)
 | 
				
			||||||
 | 
							elif z == 2:
 | 
				
			||||||
 | 
								NMAX = int(arg)
 | 
				
			||||||
 | 
							elif z == 3:
 | 
				
			||||||
 | 
								NINC = int(arg)
 | 
				
			||||||
 | 
							elif z == 4:
 | 
				
			||||||
 | 
								LOOPS = int(arg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							z = z + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if 'OPENBLAS_LOOPS' in os.environ:
 | 
				
			||||||
 | 
							p = os.environ['OPENBLAS_LOOPS']
 | 
				
			||||||
 | 
							if p:
 | 
				
			||||||
 | 
								LOOPS = int(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
 | 
				
			||||||
 | 
						print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in range (N,NMAX+NINC,NINC):
 | 
				
			||||||
 | 
							run_zgemv(i,LOOPS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/octave --silent 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					nfrom = 128 ;
 | 
				
			||||||
 | 
					nto   = 2048;
 | 
				
			||||||
 | 
					nstep = 128;
 | 
				
			||||||
 | 
					loops = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					arg_list = argv();
 | 
				
			||||||
 | 
					for i = 1:nargin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						switch(i)
 | 
				
			||||||
 | 
							case 1
 | 
				
			||||||
 | 
								nfrom = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 2
 | 
				
			||||||
 | 
								nto   = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 3
 | 
				
			||||||
 | 
								nstep = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 4
 | 
				
			||||||
 | 
								loops = str2num(arg_list{i});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endswitch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endfor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p = getenv("OPENBLAS_LOOPS");
 | 
				
			||||||
 | 
					if p 
 | 
				
			||||||
 | 
						loops = str2num(p);
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
				
			||||||
 | 
					printf("        SIZE             FLOPS             TIME\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					n = nfrom;
 | 
				
			||||||
 | 
					while n <= nto
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = single(rand(n,n)) + single(rand(n,n)) * 1i;
 | 
				
			||||||
 | 
						B = single(rand(n,n)) + single(rand(n,n)) * 1i;
 | 
				
			||||||
 | 
						start = clock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						l=0;
 | 
				
			||||||
 | 
						while l < loops
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							C = A * B;
 | 
				
			||||||
 | 
							l = l + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endwhile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg = etime(clock(), start);
 | 
				
			||||||
 | 
						mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						st1 = sprintf("%dx%d : ", n,n);
 | 
				
			||||||
 | 
						printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
				
			||||||
 | 
						n = n + nstep;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endwhile
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/octave --silent 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					nfrom = 128 ;
 | 
				
			||||||
 | 
					nto   = 2048;
 | 
				
			||||||
 | 
					nstep = 128;
 | 
				
			||||||
 | 
					loops = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					arg_list = argv();
 | 
				
			||||||
 | 
					for i = 1:nargin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						switch(i)
 | 
				
			||||||
 | 
							case 1
 | 
				
			||||||
 | 
								nfrom = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 2
 | 
				
			||||||
 | 
								nto   = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 3
 | 
				
			||||||
 | 
								nstep = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 4
 | 
				
			||||||
 | 
								loops = str2num(arg_list{i});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endswitch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endfor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p = getenv("OPENBLAS_LOOPS");
 | 
				
			||||||
 | 
					if p 
 | 
				
			||||||
 | 
						loops = str2num(p);
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
				
			||||||
 | 
					printf("        SIZE             FLOPS             TIME\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					n = nfrom;
 | 
				
			||||||
 | 
					while n <= nto
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = single(rand(n,n)) + single(rand(n,n)) * 1i;
 | 
				
			||||||
 | 
						B = single(rand(n,1)) + single(rand(n,1)) * 1i;
 | 
				
			||||||
 | 
						start = clock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						l=0;
 | 
				
			||||||
 | 
						while l < loops
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							C = A * B;
 | 
				
			||||||
 | 
							l = l + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endwhile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg = etime(clock(), start);
 | 
				
			||||||
 | 
						mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						st1 = sprintf("%dx%d : ", n,n);
 | 
				
			||||||
 | 
						printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
				
			||||||
 | 
						n = n + nstep;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endwhile
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/octave --silent 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					nfrom = 128 ;
 | 
				
			||||||
 | 
					nto   = 2048;
 | 
				
			||||||
 | 
					nstep = 128;
 | 
				
			||||||
 | 
					loops = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					arg_list = argv();
 | 
				
			||||||
 | 
					for i = 1:nargin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						switch(i)
 | 
				
			||||||
 | 
							case 1
 | 
				
			||||||
 | 
								nfrom = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 2
 | 
				
			||||||
 | 
								nto   = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 3
 | 
				
			||||||
 | 
								nstep = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 4
 | 
				
			||||||
 | 
								loops = str2num(arg_list{i});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endswitch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endfor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p = getenv("OPENBLAS_LOOPS");
 | 
				
			||||||
 | 
					if p 
 | 
				
			||||||
 | 
						loops = str2num(p);
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
				
			||||||
 | 
					printf("        SIZE             FLOPS             TIME\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					n = nfrom;
 | 
				
			||||||
 | 
					while n <= nto
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = double(rand(n,n));
 | 
				
			||||||
 | 
						start = clock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						l=0;
 | 
				
			||||||
 | 
						while l < loops
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							[V,lambda] = eig(A);
 | 
				
			||||||
 | 
							l = l + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endwhile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg = etime(clock(), start);
 | 
				
			||||||
 | 
						mflops = ( 26.33 *n*n*n ) *loops / ( timeg * 1.0e6 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						st1 = sprintf("%dx%d : ", n,n);
 | 
				
			||||||
 | 
						printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg );
 | 
				
			||||||
 | 
						n = n + nstep;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endwhile
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/octave --silent 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					nfrom = 128 ;
 | 
				
			||||||
 | 
					nto   = 2048;
 | 
				
			||||||
 | 
					nstep = 128;
 | 
				
			||||||
 | 
					loops = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					arg_list = argv();
 | 
				
			||||||
 | 
					for i = 1:nargin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						switch(i)
 | 
				
			||||||
 | 
							case 1
 | 
				
			||||||
 | 
								nfrom = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 2
 | 
				
			||||||
 | 
								nto   = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 3
 | 
				
			||||||
 | 
								nstep = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 4
 | 
				
			||||||
 | 
								loops = str2num(arg_list{i});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endswitch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endfor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p = getenv("OPENBLAS_LOOPS");
 | 
				
			||||||
 | 
					if p 
 | 
				
			||||||
 | 
						loops = str2num(p);
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
				
			||||||
 | 
					printf("        SIZE             FLOPS             TIME\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					n = nfrom;
 | 
				
			||||||
 | 
					while n <= nto
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = double(rand(n,n));
 | 
				
			||||||
 | 
						B = double(rand(n,n));
 | 
				
			||||||
 | 
						start = clock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						l=0;
 | 
				
			||||||
 | 
						while l < loops
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							C = A * B;
 | 
				
			||||||
 | 
							l = l + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endwhile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg = etime(clock(), start);
 | 
				
			||||||
 | 
						mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						st1 = sprintf("%dx%d : ", n,n);
 | 
				
			||||||
 | 
						printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
				
			||||||
 | 
						n = n + nstep;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endwhile
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/octave --silent 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					nfrom = 128 ;
 | 
				
			||||||
 | 
					nto   = 2048;
 | 
				
			||||||
 | 
					nstep = 128;
 | 
				
			||||||
 | 
					loops = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					arg_list = argv();
 | 
				
			||||||
 | 
					for i = 1:nargin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						switch(i)
 | 
				
			||||||
 | 
							case 1
 | 
				
			||||||
 | 
								nfrom = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 2
 | 
				
			||||||
 | 
								nto   = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 3
 | 
				
			||||||
 | 
								nstep = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 4
 | 
				
			||||||
 | 
								loops = str2num(arg_list{i});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endswitch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endfor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p = getenv("OPENBLAS_LOOPS");
 | 
				
			||||||
 | 
					if p 
 | 
				
			||||||
 | 
						loops = str2num(p);
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
				
			||||||
 | 
					printf("        SIZE             FLOPS             TIME\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					n = nfrom;
 | 
				
			||||||
 | 
					while n <= nto
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = double(rand(n,n));
 | 
				
			||||||
 | 
						B = double(rand(n,1));
 | 
				
			||||||
 | 
						start = clock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						l=0;
 | 
				
			||||||
 | 
						while l < loops
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							C = A * B;
 | 
				
			||||||
 | 
							l = l + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endwhile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg = etime(clock(), start);
 | 
				
			||||||
 | 
						mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						st1 = sprintf("%dx%d : ", n,n);
 | 
				
			||||||
 | 
						printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
				
			||||||
 | 
						n = n + nstep;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endwhile
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,59 @@
 | 
				
			||||||
 | 
					#!/usr/bin/octave --silent 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					nfrom = 128 ;
 | 
				
			||||||
 | 
					nto   = 2048;
 | 
				
			||||||
 | 
					nstep = 128;
 | 
				
			||||||
 | 
					loops = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					arg_list = argv();
 | 
				
			||||||
 | 
					for i = 1:nargin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						switch(i)
 | 
				
			||||||
 | 
							case 1
 | 
				
			||||||
 | 
								nfrom = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 2
 | 
				
			||||||
 | 
								nto   = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 3
 | 
				
			||||||
 | 
								nstep = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 4
 | 
				
			||||||
 | 
								loops = str2num(arg_list{i});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endswitch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endfor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p = getenv("OPENBLAS_LOOPS");
 | 
				
			||||||
 | 
					if p 
 | 
				
			||||||
 | 
						loops = str2num(p);
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
				
			||||||
 | 
					printf("        SIZE             FLOPS             TIME\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					n = nfrom;
 | 
				
			||||||
 | 
					while n <= nto
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = double(rand(n,n));
 | 
				
			||||||
 | 
						B = double(rand(n,n));
 | 
				
			||||||
 | 
						start = clock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						l=0;
 | 
				
			||||||
 | 
						while l < loops
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							x = linsolve(A,B);
 | 
				
			||||||
 | 
							#x = A / B;
 | 
				
			||||||
 | 
							l = l + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endwhile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg = etime(clock(), start);
 | 
				
			||||||
 | 
						#r = norm(A*x - B)/norm(B)
 | 
				
			||||||
 | 
						mflops = ( 2.0/3.0 *n*n*n + 2.0*n*n*n ) *loops / ( timeg * 1.0e6 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						st1 = sprintf("%dx%d : ", n,n);
 | 
				
			||||||
 | 
						printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg );
 | 
				
			||||||
 | 
						n = n + nstep;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endwhile
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/octave --silent 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					nfrom = 128 ;
 | 
				
			||||||
 | 
					nto   = 2048;
 | 
				
			||||||
 | 
					nstep = 128;
 | 
				
			||||||
 | 
					loops = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					arg_list = argv();
 | 
				
			||||||
 | 
					for i = 1:nargin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						switch(i)
 | 
				
			||||||
 | 
							case 1
 | 
				
			||||||
 | 
								nfrom = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 2
 | 
				
			||||||
 | 
								nto   = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 3
 | 
				
			||||||
 | 
								nstep = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 4
 | 
				
			||||||
 | 
								loops = str2num(arg_list{i});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endswitch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endfor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p = getenv("OPENBLAS_LOOPS");
 | 
				
			||||||
 | 
					if p 
 | 
				
			||||||
 | 
						loops = str2num(p);
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
				
			||||||
 | 
					printf("        SIZE             FLOPS             TIME\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					n = nfrom;
 | 
				
			||||||
 | 
					while n <= nto
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = single(rand(n,n));
 | 
				
			||||||
 | 
						B = single(rand(n,n));
 | 
				
			||||||
 | 
						start = clock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						l=0;
 | 
				
			||||||
 | 
						while l < loops
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							C = A * B;
 | 
				
			||||||
 | 
							l = l + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endwhile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg = etime(clock(), start);
 | 
				
			||||||
 | 
						mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						st1 = sprintf("%dx%d : ", n,n);
 | 
				
			||||||
 | 
						printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
				
			||||||
 | 
						n = n + nstep;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endwhile
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/octave --silent 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					nfrom = 128 ;
 | 
				
			||||||
 | 
					nto   = 2048;
 | 
				
			||||||
 | 
					nstep = 128;
 | 
				
			||||||
 | 
					loops = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					arg_list = argv();
 | 
				
			||||||
 | 
					for i = 1:nargin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						switch(i)
 | 
				
			||||||
 | 
							case 1
 | 
				
			||||||
 | 
								nfrom = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 2
 | 
				
			||||||
 | 
								nto   = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 3
 | 
				
			||||||
 | 
								nstep = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 4
 | 
				
			||||||
 | 
								loops = str2num(arg_list{i});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endswitch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endfor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p = getenv("OPENBLAS_LOOPS");
 | 
				
			||||||
 | 
					if p 
 | 
				
			||||||
 | 
						loops = str2num(p);
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
				
			||||||
 | 
					printf("        SIZE             FLOPS             TIME\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					n = nfrom;
 | 
				
			||||||
 | 
					while n <= nto
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = single(rand(n,n));
 | 
				
			||||||
 | 
						B = single(rand(n,1));
 | 
				
			||||||
 | 
						start = clock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						l=0;
 | 
				
			||||||
 | 
						while l < loops
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							C = A * B;
 | 
				
			||||||
 | 
							l = l + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endwhile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg = etime(clock(), start);
 | 
				
			||||||
 | 
						mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						st1 = sprintf("%dx%d : ", n,n);
 | 
				
			||||||
 | 
						printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
				
			||||||
 | 
						n = n + nstep;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endwhile
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/octave --silent 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					nfrom = 128 ;
 | 
				
			||||||
 | 
					nto   = 2048;
 | 
				
			||||||
 | 
					nstep = 128;
 | 
				
			||||||
 | 
					loops = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					arg_list = argv();
 | 
				
			||||||
 | 
					for i = 1:nargin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						switch(i)
 | 
				
			||||||
 | 
							case 1
 | 
				
			||||||
 | 
								nfrom = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 2
 | 
				
			||||||
 | 
								nto   = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 3
 | 
				
			||||||
 | 
								nstep = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 4
 | 
				
			||||||
 | 
								loops = str2num(arg_list{i});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endswitch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endfor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p = getenv("OPENBLAS_LOOPS");
 | 
				
			||||||
 | 
					if p 
 | 
				
			||||||
 | 
						loops = str2num(p);
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
				
			||||||
 | 
					printf("        SIZE             FLOPS             TIME\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					n = nfrom;
 | 
				
			||||||
 | 
					while n <= nto
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = double(rand(n,n)) + double(rand(n,n)) * 1i;
 | 
				
			||||||
 | 
						B = double(rand(n,n)) + double(rand(n,n)) * 1i;
 | 
				
			||||||
 | 
						start = clock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						l=0;
 | 
				
			||||||
 | 
						while l < loops
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							C = A * B;
 | 
				
			||||||
 | 
							l = l + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endwhile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg = etime(clock(), start);
 | 
				
			||||||
 | 
						mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						st1 = sprintf("%dx%d : ", n,n);
 | 
				
			||||||
 | 
						printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
				
			||||||
 | 
						n = n + nstep;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endwhile
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					#!/usr/bin/octave --silent 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					nfrom = 128 ;
 | 
				
			||||||
 | 
					nto   = 2048;
 | 
				
			||||||
 | 
					nstep = 128;
 | 
				
			||||||
 | 
					loops = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					arg_list = argv();
 | 
				
			||||||
 | 
					for i = 1:nargin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						switch(i)
 | 
				
			||||||
 | 
							case 1
 | 
				
			||||||
 | 
								nfrom = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 2
 | 
				
			||||||
 | 
								nto   = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 3
 | 
				
			||||||
 | 
								nstep = str2num(arg_list{i});
 | 
				
			||||||
 | 
							case 4
 | 
				
			||||||
 | 
								loops = str2num(arg_list{i});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endswitch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endfor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p = getenv("OPENBLAS_LOOPS");
 | 
				
			||||||
 | 
					if p 
 | 
				
			||||||
 | 
						loops = str2num(p);
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
 | 
				
			||||||
 | 
					printf("        SIZE             FLOPS             TIME\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					n = nfrom;
 | 
				
			||||||
 | 
					while n <= nto
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A = double(rand(n,n)) + double(rand(n,n)) * 1i;
 | 
				
			||||||
 | 
						B = double(rand(n,1)) + double(rand(n,1)) * 1i;
 | 
				
			||||||
 | 
						start = clock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						l=0;
 | 
				
			||||||
 | 
						while l < loops
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							C = A * B;
 | 
				
			||||||
 | 
							l = l + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						endwhile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg = etime(clock(), start);
 | 
				
			||||||
 | 
						mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						st1 = sprintf("%dx%d : ", n,n);
 | 
				
			||||||
 | 
						printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
 | 
				
			||||||
 | 
						n = n + nstep;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endwhile
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,62 @@
 | 
				
			||||||
 | 
					#!/usr/bin/Rscript
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					argv <- commandArgs(trailingOnly = TRUE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					nfrom = 128
 | 
				
			||||||
 | 
					nto = 2048
 | 
				
			||||||
 | 
					nstep = 128
 | 
				
			||||||
 | 
					loops = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if ( length(argv) > 0 ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for ( z in 1:length(argv) ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if ( z == 1 ) {
 | 
				
			||||||
 | 
								nfrom <- as.numeric(argv[z])
 | 
				
			||||||
 | 
							} else if ( z==2 ) {
 | 
				
			||||||
 | 
								nto <- as.numeric(argv[z])
 | 
				
			||||||
 | 
							} else if ( z==3 ) {
 | 
				
			||||||
 | 
								nstep <- as.numeric(argv[z])
 | 
				
			||||||
 | 
							} else if ( z==4 ) {
 | 
				
			||||||
 | 
								loops <- as.numeric(argv[z])
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p=Sys.getenv("OPENBLAS_LOOPS")
 | 
				
			||||||
 | 
					if ( p != "" ) {
 | 
				
			||||||
 | 
						loops <- as.numeric(p)
 | 
				
			||||||
 | 
					}	
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
 | 
				
			||||||
 | 
					cat(sprintf("      SIZE             Flops                   Time\n"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					n = nfrom
 | 
				
			||||||
 | 
					while ( n <= nto ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						l = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start <- proc.time()[3]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						while ( l <= loops ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ev <- eigen(A)
 | 
				
			||||||
 | 
							l = l + 1
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						end <- proc.time()[3]
 | 
				
			||||||
 | 
						timeg = end - start
 | 
				
			||||||
 | 
						mflops = (26.66 *n*n*n ) * loops / ( timeg * 1.0e6 )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						st = sprintf("%.0fx%.0f :",n , n)
 | 
				
			||||||
 | 
						cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						n = n + nstep
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,63 @@
 | 
				
			||||||
 | 
					#!/usr/bin/Rscript
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					argv <- commandArgs(trailingOnly = TRUE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					nfrom = 128
 | 
				
			||||||
 | 
					nto = 2048
 | 
				
			||||||
 | 
					nstep = 128
 | 
				
			||||||
 | 
					loops = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if ( length(argv) > 0 ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for ( z in 1:length(argv) ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if ( z == 1 ) {
 | 
				
			||||||
 | 
								nfrom <- as.numeric(argv[z])
 | 
				
			||||||
 | 
							} else if ( z==2 ) {
 | 
				
			||||||
 | 
								nto <- as.numeric(argv[z])
 | 
				
			||||||
 | 
							} else if ( z==3 ) {
 | 
				
			||||||
 | 
								nstep <- as.numeric(argv[z])
 | 
				
			||||||
 | 
							} else if ( z==4 ) {
 | 
				
			||||||
 | 
								loops <- as.numeric(argv[z])
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p=Sys.getenv("OPENBLAS_LOOPS")
 | 
				
			||||||
 | 
					if ( p != "" ) {
 | 
				
			||||||
 | 
						loops <- as.numeric(p)
 | 
				
			||||||
 | 
					}	
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
 | 
				
			||||||
 | 
					cat(sprintf("      SIZE             Flops                   Time\n"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					n = nfrom
 | 
				
			||||||
 | 
					while ( n <= nto ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
 | 
				
			||||||
 | 
						B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						l = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start <- proc.time()[3]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						while ( l <= loops ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							C <- A %*% B
 | 
				
			||||||
 | 
							l = l + 1
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						end <- proc.time()[3]
 | 
				
			||||||
 | 
						timeg = end - start
 | 
				
			||||||
 | 
						mflops = ( 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						st = sprintf("%.0fx%.0f :",n , n)
 | 
				
			||||||
 | 
						cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						n = n + nstep
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,63 @@
 | 
				
			||||||
 | 
					#!/usr/bin/Rscript
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					argv <- commandArgs(trailingOnly = TRUE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					nfrom = 128
 | 
				
			||||||
 | 
					nto = 2048
 | 
				
			||||||
 | 
					nstep = 128
 | 
				
			||||||
 | 
					loops = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if ( length(argv) > 0 ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for ( z in 1:length(argv) ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if ( z == 1 ) {
 | 
				
			||||||
 | 
								nfrom <- as.numeric(argv[z])
 | 
				
			||||||
 | 
							} else if ( z==2 ) {
 | 
				
			||||||
 | 
								nto <- as.numeric(argv[z])
 | 
				
			||||||
 | 
							} else if ( z==3 ) {
 | 
				
			||||||
 | 
								nstep <- as.numeric(argv[z])
 | 
				
			||||||
 | 
							} else if ( z==4 ) {
 | 
				
			||||||
 | 
								loops <- as.numeric(argv[z])
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p=Sys.getenv("OPENBLAS_LOOPS")
 | 
				
			||||||
 | 
					if ( p != "" ) {
 | 
				
			||||||
 | 
						loops <- as.numeric(p)
 | 
				
			||||||
 | 
					}	
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
 | 
				
			||||||
 | 
					cat(sprintf("      SIZE             Flops                   Time\n"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					n = nfrom
 | 
				
			||||||
 | 
					while ( n <= nto ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
 | 
				
			||||||
 | 
						B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						l = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start <- proc.time()[3]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						while ( l <= loops ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							solve(A,B)
 | 
				
			||||||
 | 
							l = l + 1
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						end <- proc.time()[3]
 | 
				
			||||||
 | 
						timeg = end - start
 | 
				
			||||||
 | 
						mflops = (2.0/3.0 *n*n*n + 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						st = sprintf("%.0fx%.0f :",n , n)
 | 
				
			||||||
 | 
						cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						n = n + nstep
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,201 @@
 | 
				
			||||||
 | 
					/***************************************************************************
 | 
				
			||||||
 | 
					Copyright (c) 2014, The OpenBLAS Project
 | 
				
			||||||
 | 
					All rights reserved.
 | 
				
			||||||
 | 
					Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					modification, are permitted provided that the following conditions are
 | 
				
			||||||
 | 
					met:
 | 
				
			||||||
 | 
					1. Redistributions of source code must retain the above swapright
 | 
				
			||||||
 | 
					notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					2. Redistributions in binary form must reproduce the above swapright
 | 
				
			||||||
 | 
					notice, this list of conditions and the following disclaimer in
 | 
				
			||||||
 | 
					the documentation and/or other materials provided with the
 | 
				
			||||||
 | 
					distribution.
 | 
				
			||||||
 | 
					3. Neither the name of the OpenBLAS project nor the names of
 | 
				
			||||||
 | 
					its contributors may be used to endorse or promote products
 | 
				
			||||||
 | 
					derived from this software without specific prior written permission.
 | 
				
			||||||
 | 
					THIS SOFTWARE IS PROVIDED BY THE SWAPRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
				
			||||||
 | 
					AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
				
			||||||
 | 
					IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
				
			||||||
 | 
					ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | 
				
			||||||
 | 
					LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
				
			||||||
 | 
					DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
				
			||||||
 | 
					SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | 
				
			||||||
 | 
					CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | 
				
			||||||
 | 
					OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
				
			||||||
 | 
					USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					*****************************************************************************/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#ifdef __CYGWIN32__
 | 
				
			||||||
 | 
					#include <sys/time.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#include "common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#undef SWAP
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef COMPLEX
 | 
				
			||||||
 | 
					#ifdef DOUBLE
 | 
				
			||||||
 | 
					#define SWAP   BLASFUNC(zswap)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define SWAP   BLASFUNC(cswap)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#ifdef DOUBLE
 | 
				
			||||||
 | 
					#define SWAP   BLASFUNC(dswap)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define SWAP   BLASFUNC(sswap)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(__WIN32__) || defined(__WIN64__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
				
			||||||
 | 
					#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int gettimeofday(struct timeval *tv, void *tz){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FILETIME ft;
 | 
				
			||||||
 | 
					  unsigned __int64 tmpres = 0;
 | 
				
			||||||
 | 
					  static int tzflag;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (NULL != tv)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      GetSystemTimeAsFileTime(&ft);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tmpres |= ft.dwHighDateTime;
 | 
				
			||||||
 | 
					      tmpres <<= 32;
 | 
				
			||||||
 | 
					      tmpres |= ft.dwLowDateTime;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      /*converting file time to unix epoch*/
 | 
				
			||||||
 | 
					      tmpres /= 10;  /*convert into microseconds*/
 | 
				
			||||||
 | 
					      tmpres -= DELTA_EPOCH_IN_MICROSECS;
 | 
				
			||||||
 | 
					      tv->tv_sec = (long)(tmpres / 1000000UL);
 | 
				
			||||||
 | 
					      tv->tv_usec = (long)(tmpres % 1000000UL);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void *huge_malloc(BLASLONG size){
 | 
				
			||||||
 | 
					  int shmid;
 | 
				
			||||||
 | 
					  void *address;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef SHM_HUGETLB
 | 
				
			||||||
 | 
					#define SHM_HUGETLB 04000
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((shmid =shmget(IPC_PRIVATE,
 | 
				
			||||||
 | 
							     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
 | 
				
			||||||
 | 
							     SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
 | 
				
			||||||
 | 
					    printf( "Memory allocation failed(shmget).\n");
 | 
				
			||||||
 | 
					    exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  address = shmat(shmid, NULL, SHM_RND);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((BLASLONG)address == -1){
 | 
				
			||||||
 | 
					    printf( "Memory allocation failed(shmat).\n");
 | 
				
			||||||
 | 
					    exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  shmctl(shmid, IPC_RMID, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return address;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define malloc huge_malloc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(int argc, char *argv[]){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FLOAT *x, *y;
 | 
				
			||||||
 | 
					  FLOAT alpha[2] = { 2.0, 2.0 };
 | 
				
			||||||
 | 
					  blasint m, i;
 | 
				
			||||||
 | 
					  blasint inc_x=1,inc_y=1;
 | 
				
			||||||
 | 
					  int loops = 1;
 | 
				
			||||||
 | 
					  int l;
 | 
				
			||||||
 | 
					  char *p;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int from =   1;
 | 
				
			||||||
 | 
					  int to   = 200;
 | 
				
			||||||
 | 
					  int step =   1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  struct timeval start, stop;
 | 
				
			||||||
 | 
					  double time1,timeg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  argc--;argv++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
				
			||||||
 | 
					  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_LOOPS")))  loops = atoi(p);
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_INCX")))   inc_x = atoi(p);
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_INCY")))   inc_y = atoi(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "From : %3d  To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
 | 
				
			||||||
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
 | 
				
			||||||
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef linux
 | 
				
			||||||
 | 
					  srandom(getpid());
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "   SIZE       Flops\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(m = from; m <= to; m += step)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   timeg=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   fprintf(stderr, " %6d : ", (int)m);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   for (l=0; l<loops; l++)
 | 
				
			||||||
 | 
					   {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   	for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
 | 
				
			||||||
 | 
								x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
					   	}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   	for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
 | 
				
			||||||
 | 
								y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
					   	}
 | 
				
			||||||
 | 
					    	gettimeofday( &start, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	SWAP (&m, x, &inc_x, y, &inc_y );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	gettimeofday( &stop, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg += time1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    timeg /= loops;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fprintf(stderr,
 | 
				
			||||||
 | 
						    " %10.2f MBytes\n",
 | 
				
			||||||
 | 
						    COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
 | 
				
			||||||
| 
						 | 
					@ -130,11 +130,21 @@ int main(int argc, char *argv[]){
 | 
				
			||||||
  char trans='N';
 | 
					  char trans='N';
 | 
				
			||||||
  char diag ='U';
 | 
					  char diag ='U';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int l;
 | 
				
			||||||
 | 
					  int loops = 1;
 | 
				
			||||||
 | 
					  double timeg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if ((p = getenv("OPENBLAS_SIDE"))) side=*p; 
 | 
					  if ((p = getenv("OPENBLAS_SIDE"))) side=*p; 
 | 
				
			||||||
  if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
 | 
					  if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
 | 
				
			||||||
  if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
 | 
					  if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
 | 
				
			||||||
  if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
 | 
					  if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  p = getenv("OPENBLAS_LOOPS");
 | 
				
			||||||
 | 
					  if ( p != NULL )
 | 
				
			||||||
 | 
					        loops = atoi(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  blasint m, i, j;
 | 
					  blasint m, i, j;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int from =   1;
 | 
					  int from =   1;
 | 
				
			||||||
| 
						 | 
					@ -150,7 +160,7 @@ int main(int argc, char *argv[]){
 | 
				
			||||||
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
					  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
				
			||||||
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
					  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  fprintf(stderr, "From : %3d  To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag);
 | 
					  fprintf(stderr, "From : %3d  To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c Loops = %d\n", from, to, step,side,uplo,trans,diag,loops);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
 | 
					  if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
 | 
				
			||||||
    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
| 
						 | 
					@ -171,28 +181,35 @@ int main(int argc, char *argv[]){
 | 
				
			||||||
  for(m = from; m <= to; m += step)
 | 
					  for(m = from; m <= to; m += step)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fprintf(stderr, " %6d : ", (int)m);
 | 
						timeg=0.0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(j = 0; j < m; j++){
 | 
					        fprintf(stderr, " %6d : ", (int)m);
 | 
				
			||||||
      for(i = 0; i < m * COMPSIZE; i++){
 | 
					 | 
				
			||||||
	a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
					 | 
				
			||||||
	b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    gettimeofday( &start, (struct timezone *)0);
 | 
						for (l=0; l<loops; l++)
 | 
				
			||||||
 | 
					    	{
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    gettimeofday( &stop, (struct timezone *)0);
 | 
					   		 for(j = 0; j < m; j++){
 | 
				
			||||||
 | 
					      			for(i = 0; i < m * COMPSIZE; i++){
 | 
				
			||||||
 | 
									a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
									b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
					      		 	}
 | 
				
			||||||
 | 
					    		 }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
					    		gettimeofday( &start, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    gettimeofday( &start, (struct timezone *)0);
 | 
					    		TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fprintf(stderr,
 | 
					    		gettimeofday( &stop, (struct timezone *)0);
 | 
				
			||||||
	    " %10.2f MFlops\n",
 | 
					
 | 
				
			||||||
	    COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
 | 
					    		time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							timeg += time1;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						time1 = timeg/loops;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,196 @@
 | 
				
			||||||
 | 
					/***************************************************************************
 | 
				
			||||||
 | 
					Copyright (c) 2014, The OpenBLAS Project
 | 
				
			||||||
 | 
					All rights reserved.
 | 
				
			||||||
 | 
					Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					modification, are permitted provided that the following conditions are
 | 
				
			||||||
 | 
					met:
 | 
				
			||||||
 | 
					1. Redistributions of source code must retain the above copyright
 | 
				
			||||||
 | 
					notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					2. Redistributions in binary form must reproduce the above copyright
 | 
				
			||||||
 | 
					notice, this list of conditions and the following disclaimer in
 | 
				
			||||||
 | 
					the documentation and/or other materials provided with the
 | 
				
			||||||
 | 
					distribution.
 | 
				
			||||||
 | 
					3. Neither the name of the OpenBLAS project nor the names of
 | 
				
			||||||
 | 
					its contributors may be used to endorse or promote products
 | 
				
			||||||
 | 
					derived from this software without specific prior written permission.
 | 
				
			||||||
 | 
					THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
				
			||||||
 | 
					AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
				
			||||||
 | 
					IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
				
			||||||
 | 
					ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | 
				
			||||||
 | 
					LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
				
			||||||
 | 
					DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
				
			||||||
 | 
					SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | 
				
			||||||
 | 
					CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | 
				
			||||||
 | 
					OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
				
			||||||
 | 
					USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					*****************************************************************************/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#ifdef __CYGWIN32__
 | 
				
			||||||
 | 
					#include <sys/time.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#define RETURN_BY_STACK 1
 | 
				
			||||||
 | 
					#include "common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#undef DOT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef DOUBLE
 | 
				
			||||||
 | 
					#define DOT   BLASFUNC(zdotu)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define DOT   BLASFUNC(cdotu)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(__WIN32__) || defined(__WIN64__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
				
			||||||
 | 
					#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int gettimeofday(struct timeval *tv, void *tz){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FILETIME ft;
 | 
				
			||||||
 | 
					  unsigned __int64 tmpres = 0;
 | 
				
			||||||
 | 
					  static int tzflag;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (NULL != tv)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      GetSystemTimeAsFileTime(&ft);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tmpres |= ft.dwHighDateTime;
 | 
				
			||||||
 | 
					      tmpres <<= 32;
 | 
				
			||||||
 | 
					      tmpres |= ft.dwLowDateTime;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      /*converting file time to unix epoch*/
 | 
				
			||||||
 | 
					      tmpres /= 10;  /*convert into microseconds*/
 | 
				
			||||||
 | 
					      tmpres -= DELTA_EPOCH_IN_MICROSECS;
 | 
				
			||||||
 | 
					      tv->tv_sec = (long)(tmpres / 1000000UL);
 | 
				
			||||||
 | 
					      tv->tv_usec = (long)(tmpres % 1000000UL);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void *huge_malloc(BLASLONG size){
 | 
				
			||||||
 | 
					  int shmid;
 | 
				
			||||||
 | 
					  void *address;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef SHM_HUGETLB
 | 
				
			||||||
 | 
					#define SHM_HUGETLB 04000
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((shmid =shmget(IPC_PRIVATE,
 | 
				
			||||||
 | 
							     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
 | 
				
			||||||
 | 
							     SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
 | 
				
			||||||
 | 
					    printf( "Memory allocation failed(shmget).\n");
 | 
				
			||||||
 | 
					    exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  address = shmat(shmid, NULL, SHM_RND);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((BLASLONG)address == -1){
 | 
				
			||||||
 | 
					    printf( "Memory allocation failed(shmat).\n");
 | 
				
			||||||
 | 
					    exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  shmctl(shmid, IPC_RMID, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return address;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define malloc huge_malloc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(int argc, char *argv[]){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FLOAT *x, *y;
 | 
				
			||||||
 | 
					  FLOAT _Complex result;
 | 
				
			||||||
 | 
					  blasint m, i;
 | 
				
			||||||
 | 
					  blasint inc_x=1,inc_y=1;
 | 
				
			||||||
 | 
					  int loops = 1;
 | 
				
			||||||
 | 
					  int l;
 | 
				
			||||||
 | 
					  char *p;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int from =   1;
 | 
				
			||||||
 | 
					  int to   = 200;
 | 
				
			||||||
 | 
					  int step =   1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  struct timeval start, stop;
 | 
				
			||||||
 | 
					  double time1,timeg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  argc--;argv++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
				
			||||||
 | 
					  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_LOOPS")))  loops = atoi(p);
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_INCX")))   inc_x = atoi(p);
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_INCY")))   inc_y = atoi(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "From : %3d  To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
 | 
				
			||||||
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
 | 
				
			||||||
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef linux
 | 
				
			||||||
 | 
					  srandom(getpid());
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "   SIZE       Flops\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(m = from; m <= to; m += step)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   timeg=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   fprintf(stderr, " %6d : ", (int)m);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   for (l=0; l<loops; l++)
 | 
				
			||||||
 | 
					   {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   	for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
 | 
				
			||||||
 | 
								x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
					   	}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   	for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
 | 
				
			||||||
 | 
								y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
					   	}
 | 
				
			||||||
 | 
					    	gettimeofday( &start, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	DOT (&result, &m, x, &inc_x, y, &inc_y );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	gettimeofday( &stop, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg += time1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    timeg /= loops;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fprintf(stderr,
 | 
				
			||||||
 | 
						    " %10.2f MFlops\n",
 | 
				
			||||||
 | 
						    COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,195 @@
 | 
				
			||||||
 | 
					/***************************************************************************
 | 
				
			||||||
 | 
					Copyright (c) 2014, The OpenBLAS Project
 | 
				
			||||||
 | 
					All rights reserved.
 | 
				
			||||||
 | 
					Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					modification, are permitted provided that the following conditions are
 | 
				
			||||||
 | 
					met:
 | 
				
			||||||
 | 
					1. Redistributions of source code must retain the above copyright
 | 
				
			||||||
 | 
					notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					2. Redistributions in binary form must reproduce the above copyright
 | 
				
			||||||
 | 
					notice, this list of conditions and the following disclaimer in
 | 
				
			||||||
 | 
					the documentation and/or other materials provided with the
 | 
				
			||||||
 | 
					distribution.
 | 
				
			||||||
 | 
					3. Neither the name of the OpenBLAS project nor the names of
 | 
				
			||||||
 | 
					its contributors may be used to endorse or promote products
 | 
				
			||||||
 | 
					derived from this software without specific prior written permission.
 | 
				
			||||||
 | 
					THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
				
			||||||
 | 
					AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
				
			||||||
 | 
					IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
				
			||||||
 | 
					ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
 | 
				
			||||||
 | 
					LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
				
			||||||
 | 
					DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
				
			||||||
 | 
					SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | 
				
			||||||
 | 
					CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | 
				
			||||||
 | 
					OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
				
			||||||
 | 
					USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					*****************************************************************************/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#ifdef __CYGWIN32__
 | 
				
			||||||
 | 
					#include <sys/time.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#include "common.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#undef DOT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef DOUBLE
 | 
				
			||||||
 | 
					#define DOT   BLASFUNC(zdotu)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define DOT   BLASFUNC(cdotu)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(__WIN32__) || defined(__WIN64__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef DELTA_EPOCH_IN_MICROSECS
 | 
				
			||||||
 | 
					#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int gettimeofday(struct timeval *tv, void *tz){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FILETIME ft;
 | 
				
			||||||
 | 
					  unsigned __int64 tmpres = 0;
 | 
				
			||||||
 | 
					  static int tzflag;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (NULL != tv)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      GetSystemTimeAsFileTime(&ft);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tmpres |= ft.dwHighDateTime;
 | 
				
			||||||
 | 
					      tmpres <<= 32;
 | 
				
			||||||
 | 
					      tmpres |= ft.dwLowDateTime;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      /*converting file time to unix epoch*/
 | 
				
			||||||
 | 
					      tmpres /= 10;  /*convert into microseconds*/
 | 
				
			||||||
 | 
					      tmpres -= DELTA_EPOCH_IN_MICROSECS;
 | 
				
			||||||
 | 
					      tv->tv_sec = (long)(tmpres / 1000000UL);
 | 
				
			||||||
 | 
					      tv->tv_usec = (long)(tmpres % 1000000UL);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void *huge_malloc(BLASLONG size){
 | 
				
			||||||
 | 
					  int shmid;
 | 
				
			||||||
 | 
					  void *address;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef SHM_HUGETLB
 | 
				
			||||||
 | 
					#define SHM_HUGETLB 04000
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((shmid =shmget(IPC_PRIVATE,
 | 
				
			||||||
 | 
							     (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
 | 
				
			||||||
 | 
							     SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
 | 
				
			||||||
 | 
					    printf( "Memory allocation failed(shmget).\n");
 | 
				
			||||||
 | 
					    exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  address = shmat(shmid, NULL, SHM_RND);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((BLASLONG)address == -1){
 | 
				
			||||||
 | 
					    printf( "Memory allocation failed(shmat).\n");
 | 
				
			||||||
 | 
					    exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  shmctl(shmid, IPC_RMID, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return address;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define malloc huge_malloc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(int argc, char *argv[]){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FLOAT *x, *y;
 | 
				
			||||||
 | 
					  FLOAT _Complex result;
 | 
				
			||||||
 | 
					  blasint m, i;
 | 
				
			||||||
 | 
					  blasint inc_x=1,inc_y=1;
 | 
				
			||||||
 | 
					  int loops = 1;
 | 
				
			||||||
 | 
					  int l;
 | 
				
			||||||
 | 
					  char *p;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int from =   1;
 | 
				
			||||||
 | 
					  int to   = 200;
 | 
				
			||||||
 | 
					  int step =   1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  struct timeval start, stop;
 | 
				
			||||||
 | 
					  double time1,timeg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  argc--;argv++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
 | 
				
			||||||
 | 
					  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_LOOPS")))  loops = atoi(p);
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_INCX")))   inc_x = atoi(p);
 | 
				
			||||||
 | 
					  if ((p = getenv("OPENBLAS_INCY")))   inc_y = atoi(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "From : %3d  To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
 | 
				
			||||||
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
 | 
				
			||||||
 | 
					    fprintf(stderr,"Out of Memory!!\n");exit(1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef linux
 | 
				
			||||||
 | 
					  srandom(getpid());
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  fprintf(stderr, "   SIZE       Flops\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(m = from; m <= to; m += step)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   timeg=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   fprintf(stderr, " %6d : ", (int)m);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   for (l=0; l<loops; l++)
 | 
				
			||||||
 | 
					   {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   	for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
 | 
				
			||||||
 | 
								x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
					   	}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   	for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
 | 
				
			||||||
 | 
								y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
 | 
				
			||||||
 | 
					   	}
 | 
				
			||||||
 | 
					    	gettimeofday( &start, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	result = DOT (&m, x, &inc_x, y, &inc_y );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	gettimeofday( &stop, (struct timezone *)0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timeg += time1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    timeg /= loops;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fprintf(stderr,
 | 
				
			||||||
 | 
						    " %10.2f MFlops\n",
 | 
				
			||||||
 | 
						    COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
 | 
				
			||||||
							
								
								
									
										5
									
								
								c_check
								
								
								
								
							
							
						
						
									
										5
									
								
								c_check
								
								
								
								
							| 
						 | 
					@ -4,6 +4,8 @@
 | 
				
			||||||
$hostos   = `uname -s | sed -e s/\-.*//`;    chop($hostos);
 | 
					$hostos   = `uname -s | sed -e s/\-.*//`;    chop($hostos);
 | 
				
			||||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
 | 
					$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
 | 
				
			||||||
$hostarch = "x86_64" if ($hostarch eq "amd64");
 | 
					$hostarch = "x86_64" if ($hostarch eq "amd64");
 | 
				
			||||||
 | 
					$hostarch = "arm" if ($hostarch =~ /^arm.*/);
 | 
				
			||||||
 | 
					$hostarch = "arm64" if ($hostarch eq "aarch64");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
$binary = $ENV{"BINARY"};
 | 
					$binary = $ENV{"BINARY"};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -28,7 +30,7 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) {
 | 
				
			||||||
	$cross_suffix = $1;
 | 
						$cross_suffix = $1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
} else {
 | 
					} else {
 | 
				
			||||||
    if ($ARGV[0] =~ /(.*-)(.*)/) {
 | 
					    if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) {
 | 
				
			||||||
	$cross_suffix = $1;
 | 
						$cross_suffix = $1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -55,6 +57,7 @@ $os = osf             if ($data =~ /OS_OSF/);
 | 
				
			||||||
$os = WINNT           if ($data =~ /OS_WINNT/);
 | 
					$os = WINNT           if ($data =~ /OS_WINNT/);
 | 
				
			||||||
$os = CYGWIN_NT       if ($data =~ /OS_CYGWIN_NT/);
 | 
					$os = CYGWIN_NT       if ($data =~ /OS_CYGWIN_NT/);
 | 
				
			||||||
$os = Interix         if ($data =~ /OS_INTERIX/);
 | 
					$os = Interix         if ($data =~ /OS_INTERIX/);
 | 
				
			||||||
 | 
					$os = Android         if ($data =~ /OS_ANDROID/);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
$architecture = x86    if ($data =~ /ARCH_X86/);
 | 
					$architecture = x86    if ($data =~ /ARCH_X86/);
 | 
				
			||||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
 | 
					$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										350
									
								
								cblas_noconst.h
								
								
								
								
							
							
						
						
									
										350
									
								
								cblas_noconst.h
								
								
								
								
							| 
						 | 
					@ -1,350 +0,0 @@
 | 
				
			||||||
#ifndef CBLAS_H
 | 
					 | 
				
			||||||
#define CBLAS_H
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include <stddef.h>
 | 
					 | 
				
			||||||
#include "common.h"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef __cplusplus
 | 
					 | 
				
			||||||
extern "C" {
 | 
					 | 
				
			||||||
	/* Assume C declarations for C++ */
 | 
					 | 
				
			||||||
#endif  /* __cplusplus */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*Set the number of threads on runtime.*/
 | 
					 | 
				
			||||||
void openblas_set_num_threads(int num_threads);
 | 
					 | 
				
			||||||
void goto_set_num_threads(int num_threads);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*Get the number of threads on runtime.*/
 | 
					 | 
				
			||||||
int openblas_get_num_threads(void);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*Get the number of physical processors (cores).*/
 | 
					 | 
				
			||||||
int openblas_get_num_procs(void);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*Get the build configure on runtime.*/
 | 
					 | 
				
			||||||
char* openblas_get_config(void);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Get the parallelization type which is used by OpenBLAS */
 | 
					 | 
				
			||||||
int openblas_get_parallel(void);
 | 
					 | 
				
			||||||
/* OpenBLAS is compiled for sequential use  */
 | 
					 | 
				
			||||||
#define OPENBLAS_SEQUENTIAL  0
 | 
					 | 
				
			||||||
/* OpenBLAS is compiled using normal threading model */
 | 
					 | 
				
			||||||
#define OPENBLAS_THREAD  1
 | 
					 | 
				
			||||||
/* OpenBLAS is compiled using OpenMP threading model */
 | 
					 | 
				
			||||||
#define OPENBLAS_OPENMP 2
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define CBLAS_INDEX size_t
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
typedef enum CBLAS_ORDER     {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
 | 
					 | 
				
			||||||
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
 | 
					 | 
				
			||||||
typedef enum CBLAS_UPLO      {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
 | 
					 | 
				
			||||||
typedef enum CBLAS_DIAG      {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
 | 
					 | 
				
			||||||
typedef enum CBLAS_SIDE      {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
float  cblas_sdsdot(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
 | 
					 | 
				
			||||||
double cblas_dsdot (blasint n, float *x, blasint incx, float *y, blasint incy);
 | 
					 | 
				
			||||||
float  cblas_sdot(blasint n, float  *x, blasint incx, float  *y, blasint incy);
 | 
					 | 
				
			||||||
double cblas_ddot(blasint n, double *x, blasint incx, double *y, blasint incy);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
openblas_complex_float  cblas_cdotu(blasint n, float  *x, blasint incx, float  *y, blasint incy);
 | 
					 | 
				
			||||||
openblas_complex_float  cblas_cdotc(blasint n, float  *x, blasint incx, float  *y, blasint incy);
 | 
					 | 
				
			||||||
openblas_complex_double cblas_zdotu(blasint n, double *x, blasint incx, double *y, blasint incy);
 | 
					 | 
				
			||||||
openblas_complex_double cblas_zdotc(blasint n, double *x, blasint incx, double *y, blasint incy);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void  cblas_cdotu_sub(blasint n, float  *x, blasint incx, float  *y, blasint incy, openblas_complex_float  *ret);
 | 
					 | 
				
			||||||
void  cblas_cdotc_sub(blasint n, float  *x, blasint incx, float  *y, blasint incy, openblas_complex_float  *ret);
 | 
					 | 
				
			||||||
void  cblas_zdotu_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
 | 
					 | 
				
			||||||
void  cblas_zdotc_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
float  cblas_sasum (blasint n, float  *x, blasint incx);
 | 
					 | 
				
			||||||
double cblas_dasum (blasint n, double *x, blasint incx);
 | 
					 | 
				
			||||||
float  cblas_scasum(blasint n, float  *x, blasint incx);
 | 
					 | 
				
			||||||
double cblas_dzasum(blasint n, double *x, blasint incx);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
float  cblas_snrm2 (blasint N, float  *X, blasint incX);
 | 
					 | 
				
			||||||
double cblas_dnrm2 (blasint N, double *X, blasint incX);
 | 
					 | 
				
			||||||
float  cblas_scnrm2(blasint N, float  *X, blasint incX);
 | 
					 | 
				
			||||||
double cblas_dznrm2(blasint N, double *X, blasint incX);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
CBLAS_INDEX cblas_isamax(blasint n, float  *x, blasint incx);
 | 
					 | 
				
			||||||
CBLAS_INDEX cblas_idamax(blasint n, double *x, blasint incx);
 | 
					 | 
				
			||||||
CBLAS_INDEX cblas_icamax(blasint n, float  *x, blasint incx);
 | 
					 | 
				
			||||||
CBLAS_INDEX cblas_izamax(blasint n, double *x, blasint incx);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_saxpy(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
 | 
					 | 
				
			||||||
void cblas_daxpy(blasint n, double alpha, double *x, blasint incx, double *y, blasint incy);
 | 
					 | 
				
			||||||
void cblas_caxpy(blasint n, float *alpha, float *x, blasint incx, float *y, blasint incy);
 | 
					 | 
				
			||||||
void cblas_zaxpy(blasint n, double *alpha, double *x, blasint incx, double *y, blasint incy);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_scopy(blasint n, float *x, blasint incx, float *y, blasint incy);
 | 
					 | 
				
			||||||
void cblas_dcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
 | 
					 | 
				
			||||||
void cblas_ccopy(blasint n, float *x, blasint incx, float *y, blasint incy);
 | 
					 | 
				
			||||||
void cblas_zcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_sswap(blasint n, float *x, blasint incx, float *y, blasint incy);
 | 
					 | 
				
			||||||
void cblas_dswap(blasint n, double *x, blasint incx, double *y, blasint incy);
 | 
					 | 
				
			||||||
void cblas_cswap(blasint n, float *x, blasint incx, float *y, blasint incy);
 | 
					 | 
				
			||||||
void cblas_zswap(blasint n, double *x, blasint incx, double *y, blasint incy);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_srot(blasint N, float *X, blasint incX, float *Y, blasint incY, float c, float s);
 | 
					 | 
				
			||||||
void cblas_drot(blasint N, double *X, blasint incX, double *Y, blasint incY, double c, double  s);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_srotg(float *a, float *b, float *c, float *s);
 | 
					 | 
				
			||||||
void cblas_drotg(double *a, double *b, double *c, double *s);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_srotm(blasint N, float *X, blasint incX, float *Y, blasint incY, float *P);
 | 
					 | 
				
			||||||
void cblas_drotm(blasint N, double *X, blasint incX, double *Y, blasint incY, double *P);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_srotmg(float *d1, float *d2, float *b1, float b2, float *P);
 | 
					 | 
				
			||||||
void cblas_drotmg(double *d1, double *d2, double *b1, double b2, double *P);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_sscal(blasint N, float alpha, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_dscal(blasint N, double alpha, double *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_cscal(blasint N, float *alpha, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_zscal(blasint N, double *alpha, double *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_csscal(blasint N, float alpha, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_zdscal(blasint N, double alpha, double *X, blasint incX);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_sgemv(enum CBLAS_ORDER order,  enum CBLAS_TRANSPOSE trans,  blasint m, blasint n,
 | 
					 | 
				
			||||||
		 float alpha, float  *a, blasint lda,  float  *x, blasint incx,  float beta,  float  *y, blasint incy);
 | 
					 | 
				
			||||||
void cblas_dgemv(enum CBLAS_ORDER order,  enum CBLAS_TRANSPOSE trans,  blasint m, blasint n,
 | 
					 | 
				
			||||||
		 double alpha, double  *a, blasint lda,  double  *x, blasint incx,  double beta,  double  *y, blasint incy);
 | 
					 | 
				
			||||||
void cblas_cgemv(enum CBLAS_ORDER order,  enum CBLAS_TRANSPOSE trans,  blasint m, blasint n,
 | 
					 | 
				
			||||||
		 float *alpha, float  *a, blasint lda,  float  *x, blasint incx,  float *beta,  float  *y, blasint incy);
 | 
					 | 
				
			||||||
void cblas_zgemv(enum CBLAS_ORDER order,  enum CBLAS_TRANSPOSE trans,  blasint m, blasint n,
 | 
					 | 
				
			||||||
		 double *alpha, double  *a, blasint lda,  double  *x, blasint incx,  double *beta,  double  *y, blasint incy);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_sger (enum CBLAS_ORDER order, blasint M, blasint N, float   alpha, float  *X, blasint incX, float  *Y, blasint incY, float  *A, blasint lda);
 | 
					 | 
				
			||||||
void cblas_dger (enum CBLAS_ORDER order, blasint M, blasint N, double  alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
 | 
					 | 
				
			||||||
void cblas_cgeru(enum CBLAS_ORDER order, blasint M, blasint N, float  *alpha, float  *X, blasint incX, float  *Y, blasint incY, float  *A, blasint lda);
 | 
					 | 
				
			||||||
void cblas_cgerc(enum CBLAS_ORDER order, blasint M, blasint N, float  *alpha, float  *X, blasint incX, float  *Y, blasint incY, float  *A, blasint lda);
 | 
					 | 
				
			||||||
void cblas_zgeru(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
 | 
					 | 
				
			||||||
void cblas_zgerc(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
 | 
					 | 
				
			||||||
void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
 | 
					 | 
				
			||||||
void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
 | 
					 | 
				
			||||||
void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,blasint N, float alpha, float *X,
 | 
					 | 
				
			||||||
                blasint incX, float *Y, blasint incY, float *A, blasint lda);
 | 
					 | 
				
			||||||
void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,
 | 
					 | 
				
			||||||
                blasint incX, double *Y, blasint incY, double *A, blasint lda);
 | 
					 | 
				
			||||||
void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX,
 | 
					 | 
				
			||||||
                float *Y, blasint incY, float *A, blasint lda);
 | 
					 | 
				
			||||||
void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX,
 | 
					 | 
				
			||||||
                double *Y, blasint incY, double *A, blasint lda);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
 | 
					 | 
				
			||||||
                 blasint KL, blasint KU, float alpha, float *A, blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
 | 
					 | 
				
			||||||
void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
 | 
					 | 
				
			||||||
                 blasint KL, blasint KU, double alpha, double *A, blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
 | 
					 | 
				
			||||||
void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
 | 
					 | 
				
			||||||
                 blasint KL, blasint KU, float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
 | 
					 | 
				
			||||||
void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
 | 
					 | 
				
			||||||
                 blasint KL, blasint KU, double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, float alpha, float *A,
 | 
					 | 
				
			||||||
                 blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
 | 
					 | 
				
			||||||
void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, double alpha, double *A,
 | 
					 | 
				
			||||||
                 blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, float *Ap, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, double *Ap, double *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, float *Ap, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, double *Ap, double *X, blasint incX);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, float *Ap, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, double *Ap, double *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, float *Ap, float *X, blasint incX);
 | 
					 | 
				
			||||||
void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
 | 
					 | 
				
			||||||
                 blasint N, double *Ap, double *X, blasint incX);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *A,
 | 
					 | 
				
			||||||
                 blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
 | 
					 | 
				
			||||||
void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *A,
 | 
					 | 
				
			||||||
                 blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
 | 
					 | 
				
			||||||
void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *A,
 | 
					 | 
				
			||||||
                 blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
 | 
					 | 
				
			||||||
void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *A,
 | 
					 | 
				
			||||||
                 blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *Ap,
 | 
					 | 
				
			||||||
                 float *X, blasint incX, float beta, float *Y, blasint incY);
 | 
					 | 
				
			||||||
void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *Ap,
 | 
					 | 
				
			||||||
                 double *X, blasint incX, double beta, double *Y, blasint incY);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Ap);
 | 
					 | 
				
			||||||
void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Ap);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A);
 | 
					 | 
				
			||||||
void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,blasint incX, double *A);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A);
 | 
					 | 
				
			||||||
void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A);
 | 
					 | 
				
			||||||
void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *Ap);
 | 
					 | 
				
			||||||
void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *Ap);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
 | 
					 | 
				
			||||||
		 float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
 | 
					 | 
				
			||||||
void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
 | 
					 | 
				
			||||||
		 double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
 | 
					 | 
				
			||||||
		 float *alpha, float *Ap, float *X, blasint incX, float *beta, float *Y, blasint incY);
 | 
					 | 
				
			||||||
void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
 | 
					 | 
				
			||||||
		 double *alpha, double *Ap, double *X, blasint incX, double *beta, double *Y, blasint incY);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
 | 
					 | 
				
			||||||
		 float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
 | 
					 | 
				
			||||||
		 double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
 | 
					 | 
				
			||||||
		 float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_cgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
 | 
					 | 
				
			||||||
		 float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
 | 
					 | 
				
			||||||
		 double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_zgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
 | 
					 | 
				
			||||||
		 double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
 | 
					 | 
				
			||||||
                 float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
 | 
					 | 
				
			||||||
                 double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
 | 
					 | 
				
			||||||
                 float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
 | 
					 | 
				
			||||||
                 double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
					 | 
				
			||||||
		 blasint N, blasint K, float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
					 | 
				
			||||||
		 blasint N, blasint K, double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
					 | 
				
			||||||
		 blasint N, blasint K, float *alpha, float *A, blasint lda, float *beta, float *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
					 | 
				
			||||||
		 blasint N, blasint K, double *alpha, double *A, blasint lda, double *beta, double *C, blasint ldc);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
					 | 
				
			||||||
		  blasint N, blasint K, float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
					 | 
				
			||||||
		  blasint N, blasint K, double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
					 | 
				
			||||||
		  blasint N, blasint K, float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
 | 
					 | 
				
			||||||
		  blasint N, blasint K, double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
					 | 
				
			||||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
 | 
					 | 
				
			||||||
void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
					 | 
				
			||||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
 | 
					 | 
				
			||||||
void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
					 | 
				
			||||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
 | 
					 | 
				
			||||||
void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
					 | 
				
			||||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
					 | 
				
			||||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
 | 
					 | 
				
			||||||
void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
					 | 
				
			||||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
 | 
					 | 
				
			||||||
void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
					 | 
				
			||||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
 | 
					 | 
				
			||||||
void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
 | 
					 | 
				
			||||||
                 enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
 | 
					 | 
				
			||||||
                 float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
 | 
					 | 
				
			||||||
                 double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
 | 
					 | 
				
			||||||
                 float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
 | 
					 | 
				
			||||||
                 double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
 | 
					 | 
				
			||||||
                  float *alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
 | 
					 | 
				
			||||||
void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
 | 
					 | 
				
			||||||
                  double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*** BLAS extensions ***/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_saxpby(blasint n, float alpha, float *x, blasint incx,float beta, float *y, blasint incy);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_daxpby(blasint n, double alpha, double *x, blasint incx,double beta, double *y, blasint incy);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_caxpby(blasint n, float *alpha, float *x, blasint incx,float *beta, float *y, blasint incy);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_zaxpby(blasint n, double *alpha, double *x, blasint incx,double *beta, double *y, blasint incy);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_somatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  float calpha,  float *a, 
 | 
					 | 
				
			||||||
		      blasint clda, float *b,  blasint cldb); 
 | 
					 | 
				
			||||||
void cblas_domatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  double calpha,  double *a,
 | 
					 | 
				
			||||||
		      blasint clda, double *b,  blasint cldb); 
 | 
					 | 
				
			||||||
void cblas_comatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  void* calpha,  void* a, 
 | 
					 | 
				
			||||||
		      blasint clda, void *b,  blasint cldb); 
 | 
					 | 
				
			||||||
void cblas_zomatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  void* calpha,  void* a, 
 | 
					 | 
				
			||||||
		      blasint clda,  void *b,  blasint cldb); 
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_simatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  float calpha, float *a, 
 | 
					 | 
				
			||||||
		      blasint clda,  blasint cldb); 
 | 
					 | 
				
			||||||
void cblas_dimatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  double calpha, double *a,
 | 
					 | 
				
			||||||
		      blasint clda,  blasint cldb); 
 | 
					 | 
				
			||||||
void cblas_cimatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  float* calpha, float* a, 
 | 
					 | 
				
			||||||
		      blasint clda,  blasint cldb); 
 | 
					 | 
				
			||||||
void cblas_zimatcopy( enum CBLAS_ORDER CORDER,  enum CBLAS_TRANSPOSE CTRANS,  blasint crows,  blasint ccols,  double* calpha, double* a, 
 | 
					 | 
				
			||||||
		      blasint clda,  blasint cldb); 
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows,  blasint ccols,  float calpha, float *a,  blasint clda,  float cbeta, 
 | 
					 | 
				
			||||||
		  float *c,  blasint cldc); 
 | 
					 | 
				
			||||||
void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows,  blasint ccols,  double calpha, double *a,  blasint clda,  double cbeta, 
 | 
					 | 
				
			||||||
		  double *c,  blasint cldc); 
 | 
					 | 
				
			||||||
void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows,  blasint ccols,  float *calpha, float *a,  blasint clda,  float *cbeta, 
 | 
					 | 
				
			||||||
		  float *c,  blasint cldc); 
 | 
					 | 
				
			||||||
void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows,  blasint ccols,  double *calpha, double *a,  blasint clda,  double *cbeta, 
 | 
					 | 
				
			||||||
		  double *c,  blasint cldc); 
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef __cplusplus
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif  /* __cplusplus */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,115 @@
 | 
				
			||||||
 | 
					##
 | 
				
			||||||
 | 
					## Author: Hank Anderson <hank@statease.com>
 | 
				
			||||||
 | 
					## Description: Ported from portion of OpenBLAS/Makefile.system
 | 
				
			||||||
 | 
					##              Sets various variables based on architecture.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${ARCH} STREQUAL "x86")
 | 
				
			||||||
 | 
					    if (NOT BINARY)
 | 
				
			||||||
 | 
					      set(NO_BINARY_MODE 1)
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (NOT NO_EXPRECISION)
 | 
				
			||||||
 | 
					    if (${F_COMPILER} MATCHES "GFORTRAN")
 | 
				
			||||||
 | 
					      # N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa
 | 
				
			||||||
 | 
					      if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
 | 
				
			||||||
 | 
					        set(EXPRECISION	1)
 | 
				
			||||||
 | 
					        set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double")
 | 
				
			||||||
 | 
					        set(FCOMMON_OPT	"${FCOMMON_OPT} -m128bit-long-double")
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					      if (${CMAKE_C_COMPILER} STREQUAL "Clang")
 | 
				
			||||||
 | 
					        set(EXPRECISION	1)
 | 
				
			||||||
 | 
					        set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION")
 | 
				
			||||||
 | 
					        set(FCOMMON_OPT	"${FCOMMON_OPT} -m128bit-long-double")
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_C_COMPILER} STREQUAL "Intel")
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -wd981")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (USE_OPENMP)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${CMAKE_C_COMPILER} STREQUAL "Clang")
 | 
				
			||||||
 | 
					    message(WARNING "Clang doesn't support OpenMP yet.")
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${CMAKE_C_COMPILER} STREQUAL "Intel")
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${CMAKE_C_COMPILER} STREQUAL "PGI")
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
 | 
				
			||||||
 | 
					    set(CEXTRALIB "${CEXTRALIB} -lstdc++")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DYNAMIC_ARCH)
 | 
				
			||||||
 | 
					  if (${ARCH} STREQUAL "x86")
 | 
				
			||||||
 | 
					    set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${ARCH} STREQUAL "x86_64")
 | 
				
			||||||
 | 
					    set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
 | 
				
			||||||
 | 
					    if (NOT NO_AVX)
 | 
				
			||||||
 | 
					      set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					    if (NOT NO_AVX2)
 | 
				
			||||||
 | 
					      set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (NOT DYNAMIC_CORE)
 | 
				
			||||||
 | 
					    unset(DYNAMIC_ARCH)
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${ARCH} STREQUAL "ia64")
 | 
				
			||||||
 | 
					  set(NO_BINARY_MODE 1)
 | 
				
			||||||
 | 
					  set(BINARY_DEFINED 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${F_COMPILER} MATCHES "GFORTRAN")
 | 
				
			||||||
 | 
					    if (${CMAKE_C_COMPILER} STREQUAL "GNU")
 | 
				
			||||||
 | 
					      # EXPRECISION	= 1
 | 
				
			||||||
 | 
					      # CCOMMON_OPT	+= -DEXPRECISION
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${ARCH} STREQUAL "mips64")
 | 
				
			||||||
 | 
					  set(NO_BINARY_MODE 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${ARCH} STREQUAL "alpha")
 | 
				
			||||||
 | 
					  set(NO_BINARY_MODE 1)
 | 
				
			||||||
 | 
					  set(BINARY_DEFINED 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${ARCH} STREQUAL "arm")
 | 
				
			||||||
 | 
					  set(NO_BINARY_MODE 1)
 | 
				
			||||||
 | 
					  set(BINARY_DEFINED 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${ARCH} STREQUAL "arm64")
 | 
				
			||||||
 | 
					  set(NO_BINARY_MODE 1)
 | 
				
			||||||
 | 
					  set(BINARY_DEFINED 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,89 @@
 | 
				
			||||||
 | 
					##
 | 
				
			||||||
 | 
					## Author: Hank Anderson <hank@statease.com>
 | 
				
			||||||
 | 
					## Description: Ported from the OpenBLAS/c_check perl script.
 | 
				
			||||||
 | 
					##              This is triggered by prebuild.cmake and runs before any of the code is built.
 | 
				
			||||||
 | 
					##              Creates config.h and Makefile.conf.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# CMake vars set by this file:
 | 
				
			||||||
 | 
					# OSNAME (use CMAKE_SYSTEM_NAME)
 | 
				
			||||||
 | 
					# ARCH
 | 
				
			||||||
 | 
					# C_COMPILER (use CMAKE_C_COMPILER)
 | 
				
			||||||
 | 
					# BINARY32
 | 
				
			||||||
 | 
					# BINARY64
 | 
				
			||||||
 | 
					# FU
 | 
				
			||||||
 | 
					# CROSS_SUFFIX
 | 
				
			||||||
 | 
					# CROSS
 | 
				
			||||||
 | 
					# CEXTRALIB
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Defines set by this file:
 | 
				
			||||||
 | 
					# OS_
 | 
				
			||||||
 | 
					# ARCH_
 | 
				
			||||||
 | 
					# C_
 | 
				
			||||||
 | 
					# __32BIT__
 | 
				
			||||||
 | 
					# __64BIT__
 | 
				
			||||||
 | 
					# FUNDERSCORE
 | 
				
			||||||
 | 
					# PTHREAD_CREATE_FUNC
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables.
 | 
				
			||||||
 | 
					set(FU "")
 | 
				
			||||||
 | 
					if(APPLE)
 | 
				
			||||||
 | 
					set(FU "_")
 | 
				
			||||||
 | 
					elseif(MSVC)
 | 
				
			||||||
 | 
					set(FU "_")
 | 
				
			||||||
 | 
					elseif(UNIX)
 | 
				
			||||||
 | 
					set(FU "")
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Convert CMake vars into the format that OpenBLAS expects
 | 
				
			||||||
 | 
					string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS)
 | 
				
			||||||
 | 
					if (${HOST_OS} STREQUAL "WINDOWS")
 | 
				
			||||||
 | 
					  set(HOST_OS WINNT)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# added by hpa - check size of void ptr to detect 64-bit compile
 | 
				
			||||||
 | 
					if (NOT DEFINED BINARY)
 | 
				
			||||||
 | 
					  set(BINARY 32)
 | 
				
			||||||
 | 
					  if (CMAKE_SIZEOF_VOID_P EQUAL 8)
 | 
				
			||||||
 | 
					    set(BINARY 64)
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (BINARY EQUAL 64)
 | 
				
			||||||
 | 
					  set(BINARY64 1)
 | 
				
			||||||
 | 
					else ()
 | 
				
			||||||
 | 
					  set(BINARY32 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# CMake docs define these:
 | 
				
			||||||
 | 
					# CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for.
 | 
				
			||||||
 | 
					# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on.
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check
 | 
				
			||||||
 | 
					set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
 | 
				
			||||||
 | 
					if (${ARCH} STREQUAL "AMD64")
 | 
				
			||||||
 | 
					  set(ARCH "x86_64")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong
 | 
				
			||||||
 | 
					if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32)
 | 
				
			||||||
 | 
					  set(ARCH x86)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${ARCH} STREQUAL "X86")
 | 
				
			||||||
 | 
					  set(ARCH x86)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
 | 
				
			||||||
 | 
					if (${COMPILER_ID} STREQUAL "GNU")
 | 
				
			||||||
 | 
					  set(COMPILER_ID "GCC")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					string(TOUPPER ${ARCH} UC_ARCH)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file(WRITE ${TARGET_CONF}
 | 
				
			||||||
 | 
					  "#define OS_${HOST_OS}\t1\n"
 | 
				
			||||||
 | 
					  "#define ARCH_${UC_ARCH}\t1\n"
 | 
				
			||||||
 | 
					  "#define C_${COMPILER_ID}\t1\n"
 | 
				
			||||||
 | 
					  "#define __${BINARY}BIT__\t1\n"
 | 
				
			||||||
 | 
					  "#define FUNDERSCORE\t${FU}\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,103 @@
 | 
				
			||||||
 | 
					##
 | 
				
			||||||
 | 
					## Author: Hank Anderson <hank@statease.com>
 | 
				
			||||||
 | 
					## Description: Ported from portion of OpenBLAS/Makefile.system
 | 
				
			||||||
 | 
					##              Sets C related variables.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -Wall")
 | 
				
			||||||
 | 
					  set(COMMON_PROF "${COMMON_PROF} -fno-inline")
 | 
				
			||||||
 | 
					  set(NO_UNINITIALIZED_WARN "-Wno-uninitialized")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (QUIET_MAKE)
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} ${NO_UNINITIALIZED_WARN} -Wno-unused")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (NO_BINARY_MODE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (${ARCH} STREQUAL "mips64")
 | 
				
			||||||
 | 
					      if (BINARY64)
 | 
				
			||||||
 | 
					        set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64")
 | 
				
			||||||
 | 
					      else ()
 | 
				
			||||||
 | 
					        set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=n32")
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					      set(BINARY_DEFINED 1)
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (${CORE} STREQUAL "LOONGSON3A")
 | 
				
			||||||
 | 
					      set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (${CORE} STREQUAL "LOONGSON3B")
 | 
				
			||||||
 | 
					      set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (${OSNAME} STREQUAL "AIX")
 | 
				
			||||||
 | 
					      set(BINARY_DEFINED 1)
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (NOT BINARY_DEFINED)
 | 
				
			||||||
 | 
					    if (BINARY64)
 | 
				
			||||||
 | 
					      set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
 | 
				
			||||||
 | 
					    else ()
 | 
				
			||||||
 | 
					      set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_C_COMPILER} STREQUAL "PGI")
 | 
				
			||||||
 | 
					  if (BINARY64)
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64")
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
 | 
				
			||||||
 | 
					  if (BINARY64)
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${ARCH} STREQUAL "mips64")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (NOT BINARY64)
 | 
				
			||||||
 | 
					      set(CCOMMON_OPT "${CCOMMON_OPT} -n32")
 | 
				
			||||||
 | 
					    else ()
 | 
				
			||||||
 | 
					      set(CCOMMON_OPT "${CCOMMON_OPT} -n64")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (${CORE} STREQUAL "LOONGSON3A")
 | 
				
			||||||
 | 
					      set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (${CORE} STREQUAL "LOONGSON3B")
 | 
				
			||||||
 | 
					      set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (BINARY64)
 | 
				
			||||||
 | 
					      set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
 | 
				
			||||||
 | 
					    else ()
 | 
				
			||||||
 | 
					      set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_C_COMPILER} STREQUAL "SUN")
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -w")
 | 
				
			||||||
 | 
					  if (${ARCH} STREQUAL "x86")
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,60 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#Only generate .def for dll on MSVC
 | 
				
			||||||
 | 
					if(MSVC)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED ARCH)
 | 
				
			||||||
 | 
					  set(ARCH_IN "x86_64")
 | 
				
			||||||
 | 
					else()
 | 
				
			||||||
 | 
					  set(ARCH_IN ${ARCH})
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CORE} STREQUAL "generic")
 | 
				
			||||||
 | 
					  set(ARCH_IN "GENERIC")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED EXPRECISION)
 | 
				
			||||||
 | 
					  set(EXPRECISION_IN 0)
 | 
				
			||||||
 | 
					else()
 | 
				
			||||||
 | 
					  set(EXPRECISION_IN ${EXPRECISION})
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED NO_CBLAS)
 | 
				
			||||||
 | 
					  set(NO_CBLAS_IN 0)
 | 
				
			||||||
 | 
					else()
 | 
				
			||||||
 | 
					  set(NO_CBLAS_IN ${NO_CBLAS})
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED NO_LAPACK)
 | 
				
			||||||
 | 
					  set(NO_LAPACK_IN 0)
 | 
				
			||||||
 | 
					else()
 | 
				
			||||||
 | 
					  set(NO_LAPACK_IN ${NO_LAPACK})
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED NO_LAPACKE)
 | 
				
			||||||
 | 
					  set(NO_LAPACKE_IN 0)
 | 
				
			||||||
 | 
					else()
 | 
				
			||||||
 | 
					  set(NO_LAPACKE_IN ${NO_LAPACKE})
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED NEED2UNDERSCORES)
 | 
				
			||||||
 | 
					  set(NEED2UNDERSCORES_IN 0)
 | 
				
			||||||
 | 
					else()
 | 
				
			||||||
 | 
					  set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED ONLY_CBLAS)
 | 
				
			||||||
 | 
					  set(ONLY_CBLAS_IN 0)
 | 
				
			||||||
 | 
					else()
 | 
				
			||||||
 | 
					  set(ONLY_CBLAS_IN ${ONLY_CBLAS})
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					add_custom_command(
 | 
				
			||||||
 | 
					  TARGET ${OpenBLAS_LIBNAME} PRE_LINK 
 | 
				
			||||||
 | 
					  COMMAND perl 
 | 
				
			||||||
 | 
					  ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
 | 
				
			||||||
 | 
					  COMMENT "Create openblas.def file"
 | 
				
			||||||
 | 
					  VERBATIM)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,66 @@
 | 
				
			||||||
 | 
					##
 | 
				
			||||||
 | 
					## Author: Hank Anderson <hank@statease.com>
 | 
				
			||||||
 | 
					## Copyright: (c) Stat-Ease, Inc.
 | 
				
			||||||
 | 
					## Created: 12/29/14
 | 
				
			||||||
 | 
					## Last Modified: 12/29/14
 | 
				
			||||||
 | 
					## Description: Ported from the OpenBLAS/f_check perl script.
 | 
				
			||||||
 | 
					##              This is triggered by prebuild.cmake and runs before any of the code is built.
 | 
				
			||||||
 | 
					##              Appends Fortran information to config.h and Makefile.conf.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# CMake vars set by this file:
 | 
				
			||||||
 | 
					# F_COMPILER
 | 
				
			||||||
 | 
					# FC
 | 
				
			||||||
 | 
					# BU
 | 
				
			||||||
 | 
					# NOFORTRAN
 | 
				
			||||||
 | 
					# NEED2UNDERSCORES
 | 
				
			||||||
 | 
					# FEXTRALIB
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Defines set by this file:
 | 
				
			||||||
 | 
					# BUNDERSCORE
 | 
				
			||||||
 | 
					# NEEDBUNDERSCORE
 | 
				
			||||||
 | 
					# NEED2UNDERSCORES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (MSVC)
 | 
				
			||||||
 | 
					  # had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa
 | 
				
			||||||
 | 
					  include(CMakeForceCompiler)
 | 
				
			||||||
 | 
					  CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT NO_LAPACK)
 | 
				
			||||||
 | 
					  enable_language(Fortran)
 | 
				
			||||||
 | 
					else()
 | 
				
			||||||
 | 
					  include(CMakeForceCompiler)
 | 
				
			||||||
 | 
					  CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT ONLY_CBLAS)
 | 
				
			||||||
 | 
					  # N.B. f_check is not cross-platform, so instead try to use CMake variables
 | 
				
			||||||
 | 
					  # run f_check (appends to TARGET files)
 | 
				
			||||||
 | 
					#  message(STATUS "Running f_check...")
 | 
				
			||||||
 | 
					#  execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER}
 | 
				
			||||||
 | 
					#    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile
 | 
				
			||||||
 | 
					  # TODO: set FEXTRALIB flags a la f_check?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  set(BU "_")
 | 
				
			||||||
 | 
					  file(APPEND ${TARGET_CONF}
 | 
				
			||||||
 | 
					    "#define BUNDERSCORE _\n"
 | 
				
			||||||
 | 
					    "#define NEEDBUNDERSCORE 1\n"
 | 
				
			||||||
 | 
					    "#define NEED2UNDERSCORES 0\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					else ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  #When we only build CBLAS, we set NOFORTRAN=2
 | 
				
			||||||
 | 
					  set(NOFORTRAN 2)
 | 
				
			||||||
 | 
					  set(NO_FBLAS 1)
 | 
				
			||||||
 | 
					  #set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler
 | 
				
			||||||
 | 
					  set(BU "_")
 | 
				
			||||||
 | 
					  file(APPEND ${TARGET_CONF}
 | 
				
			||||||
 | 
					    "#define BUNDERSCORE _\n"
 | 
				
			||||||
 | 
					    "#define NEEDBUNDERSCORE 1\n")
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					get_filename_component(F_COMPILER ${CMAKE_Fortran_COMPILER} NAME_WE)
 | 
				
			||||||
 | 
					string(TOUPPER ${F_COMPILER} F_COMPILER)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,200 @@
 | 
				
			||||||
 | 
					##
 | 
				
			||||||
 | 
					## Author: Hank Anderson <hank@statease.com>
 | 
				
			||||||
 | 
					## Description: Ported from portion of OpenBLAS/Makefile.system
 | 
				
			||||||
 | 
					##              Sets Fortran related variables.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${F_COMPILER} STREQUAL "G77")
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77")
 | 
				
			||||||
 | 
					  set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
 | 
				
			||||||
 | 
					  if (NOT NO_BINARY_MODE)
 | 
				
			||||||
 | 
					    if (BINARY64)
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
 | 
				
			||||||
 | 
					    else ()
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${F_COMPILER} STREQUAL "G95")
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95")
 | 
				
			||||||
 | 
					  set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
 | 
				
			||||||
 | 
					  if (NOT NO_BINARY_MODE)
 | 
				
			||||||
 | 
					    if (BINARY64)
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
 | 
				
			||||||
 | 
					    else ()
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${F_COMPILER} STREQUAL "GFORTRAN")
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
 | 
				
			||||||
 | 
					  set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
 | 
				
			||||||
 | 
					  #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
 | 
				
			||||||
 | 
					  if (NOT NO_LAPACK)
 | 
				
			||||||
 | 
					    set(EXTRALIB "{EXTRALIB} -lgfortran")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					  if (NO_BINARY_MODE)
 | 
				
			||||||
 | 
					    if (${ARCH} STREQUAL "mips64")
 | 
				
			||||||
 | 
					      if (BINARY64)
 | 
				
			||||||
 | 
					        set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
 | 
				
			||||||
 | 
					      else ()
 | 
				
			||||||
 | 
					        set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    if (BINARY64)
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
 | 
				
			||||||
 | 
					      if (INTERFACE64)
 | 
				
			||||||
 | 
					        set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					    else ()
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (USE_OPENMP)
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${F_COMPILER} STREQUAL "INTEL")
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL")
 | 
				
			||||||
 | 
					  if (INTERFACE64)
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					  if (USE_OPENMP)
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${F_COMPILER} STREQUAL "FUJITSU")
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU")
 | 
				
			||||||
 | 
					  if (USE_OPENMP)
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${F_COMPILER} STREQUAL "IBM")
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM")
 | 
				
			||||||
 | 
					  # FCOMMON_OPT	+= -qarch=440
 | 
				
			||||||
 | 
					  if (BINARY64)
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -q64")
 | 
				
			||||||
 | 
					    if (INTERFACE64)
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -qintsize=8")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -q32")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					  if (USE_OPENMP)
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${F_COMPILER} STREQUAL "PGI")
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI")
 | 
				
			||||||
 | 
					  set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER")
 | 
				
			||||||
 | 
					  if (BINARY64)
 | 
				
			||||||
 | 
					    if (INTERFACE64)
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7-64")
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					  if (USE_OPENMP)
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${F_COMPILER} STREQUAL "PATHSCALE")
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE")
 | 
				
			||||||
 | 
					  if (BINARY64)
 | 
				
			||||||
 | 
					    if (INTERFACE64)
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (NOT ${ARCH} STREQUAL "mips64")
 | 
				
			||||||
 | 
					    if (NOT BINARY64)
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
 | 
				
			||||||
 | 
					    else ()
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    if (BINARY64)
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
 | 
				
			||||||
 | 
					    else ()
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (USE_OPENMP)
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${F_COMPILER} STREQUAL "OPEN64")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64")
 | 
				
			||||||
 | 
					  if (BINARY64)
 | 
				
			||||||
 | 
					    if (INTERFACE64)
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${ARCH} STREQUAL "mips64")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (NOT BINARY64)
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -n32")
 | 
				
			||||||
 | 
					    else ()
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -n64")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (${CORE} STREQUAL "LOONGSON3A")
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (${CORE} STREQUAL "LOONGSON3B")
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    if (NOT BINARY64)
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
 | 
				
			||||||
 | 
					    else ()
 | 
				
			||||||
 | 
					      set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (USE_OPENMP)
 | 
				
			||||||
 | 
					    set(FEXTRALIB "${FEXTRALIB} -lstdc++")
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${F_COMPILER} STREQUAL "SUN")
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN")
 | 
				
			||||||
 | 
					  if (${ARCH} STREQUAL "x86")
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					  if (USE_OPENMP)
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -xopenmp=parallel")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${F_COMPILER} STREQUAL "COMPAQ")
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ")
 | 
				
			||||||
 | 
					  if (USE_OPENMP)
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# from the root Makefile - this is for lapack-netlib to compile the correct secnd file.
 | 
				
			||||||
 | 
					if (${F_COMPILER} STREQUAL "GFORTRAN")
 | 
				
			||||||
 | 
					  set(TIMER "INT_ETIME")
 | 
				
			||||||
 | 
					else ()
 | 
				
			||||||
 | 
					  set(TIMER "NONE")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,165 @@
 | 
				
			||||||
 | 
					# helper functions for the kernel CMakeLists.txt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file.
 | 
				
			||||||
 | 
					macro(SetDefaultL1)
 | 
				
			||||||
 | 
					  set(SAMAXKERNEL amax.S)
 | 
				
			||||||
 | 
					  set(DAMAXKERNEL amax.S)
 | 
				
			||||||
 | 
					  set(QAMAXKERNEL amax.S)
 | 
				
			||||||
 | 
					  set(CAMAXKERNEL zamax.S)
 | 
				
			||||||
 | 
					  set(ZAMAXKERNEL zamax.S)
 | 
				
			||||||
 | 
					  set(XAMAXKERNEL zamax.S)
 | 
				
			||||||
 | 
					  set(SAMINKERNEL amin.S)
 | 
				
			||||||
 | 
					  set(DAMINKERNEL amin.S)
 | 
				
			||||||
 | 
					  set(QAMINKERNEL amin.S)
 | 
				
			||||||
 | 
					  set(CAMINKERNEL zamin.S)
 | 
				
			||||||
 | 
					  set(ZAMINKERNEL zamin.S)
 | 
				
			||||||
 | 
					  set(XAMINKERNEL zamin.S)
 | 
				
			||||||
 | 
					  set(SMAXKERNEL max.S)
 | 
				
			||||||
 | 
					  set(DMAXKERNEL max.S)
 | 
				
			||||||
 | 
					  set(QMAXKERNEL max.S)
 | 
				
			||||||
 | 
					  set(SMINKERNEL min.S)
 | 
				
			||||||
 | 
					  set(DMINKERNEL min.S)
 | 
				
			||||||
 | 
					  set(QMINKERNEL min.S)
 | 
				
			||||||
 | 
					  set(ISAMAXKERNEL iamax.S)
 | 
				
			||||||
 | 
					  set(IDAMAXKERNEL iamax.S)
 | 
				
			||||||
 | 
					  set(IQAMAXKERNEL iamax.S)
 | 
				
			||||||
 | 
					  set(ICAMAXKERNEL izamax.S)
 | 
				
			||||||
 | 
					  set(IZAMAXKERNEL izamax.S)
 | 
				
			||||||
 | 
					  set(IXAMAXKERNEL izamax.S)
 | 
				
			||||||
 | 
					  set(ISAMINKERNEL iamin.S)
 | 
				
			||||||
 | 
					  set(IDAMINKERNEL iamin.S)
 | 
				
			||||||
 | 
					  set(IQAMINKERNEL iamin.S)
 | 
				
			||||||
 | 
					  set(ICAMINKERNEL izamin.S)
 | 
				
			||||||
 | 
					  set(IZAMINKERNEL izamin.S)
 | 
				
			||||||
 | 
					  set(IXAMINKERNEL izamin.S)
 | 
				
			||||||
 | 
					  set(ISMAXKERNEL iamax.S)
 | 
				
			||||||
 | 
					  set(IDMAXKERNEL iamax.S)
 | 
				
			||||||
 | 
					  set(IQMAXKERNEL iamax.S)
 | 
				
			||||||
 | 
					  set(ISMINKERNEL iamin.S)
 | 
				
			||||||
 | 
					  set(IDMINKERNEL iamin.S)
 | 
				
			||||||
 | 
					  set(IQMINKERNEL iamin.S)
 | 
				
			||||||
 | 
					  set(SASUMKERNEL asum.S)
 | 
				
			||||||
 | 
					  set(DASUMKERNEL asum.S)
 | 
				
			||||||
 | 
					  set(CASUMKERNEL zasum.S)
 | 
				
			||||||
 | 
					  set(ZASUMKERNEL zasum.S)
 | 
				
			||||||
 | 
					  set(QASUMKERNEL asum.S)
 | 
				
			||||||
 | 
					  set(XASUMKERNEL zasum.S)
 | 
				
			||||||
 | 
					  set(SAXPYKERNEL axpy.S)
 | 
				
			||||||
 | 
					  set(DAXPYKERNEL axpy.S)
 | 
				
			||||||
 | 
					  set(CAXPYKERNEL zaxpy.S)
 | 
				
			||||||
 | 
					  set(ZAXPYKERNEL zaxpy.S)
 | 
				
			||||||
 | 
					  set(QAXPYKERNEL axpy.S)
 | 
				
			||||||
 | 
					  set(XAXPYKERNEL zaxpy.S)
 | 
				
			||||||
 | 
					  set(SCOPYKERNEL copy.S)
 | 
				
			||||||
 | 
					  set(DCOPYKERNEL copy.S)
 | 
				
			||||||
 | 
					  set(CCOPYKERNEL zcopy.S)
 | 
				
			||||||
 | 
					  set(ZCOPYKERNEL zcopy.S)
 | 
				
			||||||
 | 
					  set(QCOPYKERNEL copy.S)
 | 
				
			||||||
 | 
					  set(XCOPYKERNEL zcopy.S)
 | 
				
			||||||
 | 
					  set(SDOTKERNEL dot.S)
 | 
				
			||||||
 | 
					  set(DDOTKERNEL dot.S)
 | 
				
			||||||
 | 
					  set(CDOTKERNEL zdot.S)
 | 
				
			||||||
 | 
					  set(ZDOTKERNEL zdot.S)
 | 
				
			||||||
 | 
					  set(QDOTKERNEL dot.S)
 | 
				
			||||||
 | 
					  set(XDOTKERNEL zdot.S)
 | 
				
			||||||
 | 
					  set(SNRM2KERNEL nrm2.S)
 | 
				
			||||||
 | 
					  set(DNRM2KERNEL nrm2.S)
 | 
				
			||||||
 | 
					  set(QNRM2KERNEL nrm2.S)
 | 
				
			||||||
 | 
					  set(CNRM2KERNEL znrm2.S)
 | 
				
			||||||
 | 
					  set(ZNRM2KERNEL znrm2.S)
 | 
				
			||||||
 | 
					  set(XNRM2KERNEL znrm2.S)
 | 
				
			||||||
 | 
					  set(SROTKERNEL rot.S)
 | 
				
			||||||
 | 
					  set(DROTKERNEL rot.S)
 | 
				
			||||||
 | 
					  set(QROTKERNEL rot.S)
 | 
				
			||||||
 | 
					  set(CROTKERNEL zrot.S)
 | 
				
			||||||
 | 
					  set(ZROTKERNEL zrot.S)
 | 
				
			||||||
 | 
					  set(XROTKERNEL zrot.S)
 | 
				
			||||||
 | 
					  set(SSCALKERNEL scal.S)
 | 
				
			||||||
 | 
					  set(DSCALKERNEL scal.S)
 | 
				
			||||||
 | 
					  set(CSCALKERNEL zscal.S)
 | 
				
			||||||
 | 
					  set(ZSCALKERNEL zscal.S)
 | 
				
			||||||
 | 
					  set(QSCALKERNEL scal.S)
 | 
				
			||||||
 | 
					  set(XSCALKERNEL zscal.S)
 | 
				
			||||||
 | 
					  set(SSWAPKERNEL swap.S)
 | 
				
			||||||
 | 
					  set(DSWAPKERNEL swap.S)
 | 
				
			||||||
 | 
					  set(CSWAPKERNEL zswap.S)
 | 
				
			||||||
 | 
					  set(ZSWAPKERNEL zswap.S)
 | 
				
			||||||
 | 
					  set(QSWAPKERNEL swap.S)
 | 
				
			||||||
 | 
					  set(XSWAPKERNEL zswap.S)
 | 
				
			||||||
 | 
					  set(SGEMVNKERNEL gemv_n.S)
 | 
				
			||||||
 | 
					  set(SGEMVTKERNEL gemv_t.S)
 | 
				
			||||||
 | 
					  set(DGEMVNKERNEL gemv_n.S)
 | 
				
			||||||
 | 
					  set(DGEMVTKERNEL gemv_t.S)
 | 
				
			||||||
 | 
					  set(CGEMVNKERNEL zgemv_n.S)
 | 
				
			||||||
 | 
					  set(CGEMVTKERNEL zgemv_t.S)
 | 
				
			||||||
 | 
					  set(ZGEMVNKERNEL zgemv_n.S)
 | 
				
			||||||
 | 
					  set(ZGEMVTKERNEL zgemv_t.S)
 | 
				
			||||||
 | 
					  set(QGEMVNKERNEL gemv_n.S)
 | 
				
			||||||
 | 
					  set(QGEMVTKERNEL gemv_t.S)
 | 
				
			||||||
 | 
					  set(XGEMVNKERNEL zgemv_n.S)
 | 
				
			||||||
 | 
					  set(XGEMVTKERNEL zgemv_t.S)
 | 
				
			||||||
 | 
					  set(SCABS_KERNEL ../generic/cabs.c)
 | 
				
			||||||
 | 
					  set(DCABS_KERNEL ../generic/cabs.c)
 | 
				
			||||||
 | 
					  set(QCABS_KERNEL ../generic/cabs.c)
 | 
				
			||||||
 | 
					  set(LSAME_KERNEL ../generic/lsame.c)
 | 
				
			||||||
 | 
					  set(SAXPBYKERNEL ../arm/axpby.c)
 | 
				
			||||||
 | 
					  set(DAXPBYKERNEL ../arm/axpby.c)
 | 
				
			||||||
 | 
					  set(CAXPBYKERNEL ../arm/zaxpby.c)
 | 
				
			||||||
 | 
					  set(ZAXPBYKERNEL ../arm/zaxpby.c)
 | 
				
			||||||
 | 
					endmacro ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					macro(SetDefaultL2)
 | 
				
			||||||
 | 
					  set(SGEMVNKERNEL gemv_n.S)
 | 
				
			||||||
 | 
					  set(SGEMVTKERNEL gemv_t.S)
 | 
				
			||||||
 | 
					  set(DGEMVNKERNEL gemv_n.S)
 | 
				
			||||||
 | 
					  set(DGEMVTKERNEL gemv_t.S)
 | 
				
			||||||
 | 
					  set(CGEMVNKERNEL zgemv_n.S)
 | 
				
			||||||
 | 
					  set(CGEMVTKERNEL zgemv_t.S)
 | 
				
			||||||
 | 
					  set(ZGEMVNKERNEL zgemv_n.S)
 | 
				
			||||||
 | 
					  set(ZGEMVTKERNEL zgemv_t.S)
 | 
				
			||||||
 | 
					  set(QGEMVNKERNEL gemv_n.S)
 | 
				
			||||||
 | 
					  set(QGEMVTKERNEL gemv_t.S)
 | 
				
			||||||
 | 
					  set(XGEMVNKERNEL zgemv_n.S)
 | 
				
			||||||
 | 
					  set(XGEMVTKERNEL zgemv_t.S)
 | 
				
			||||||
 | 
					  set(SGERKERNEL ../generic/ger.c)
 | 
				
			||||||
 | 
					  set(DGERKERNEL ../generic/ger.c)
 | 
				
			||||||
 | 
					  set(QGERKERNEL ../generic/ger.c)
 | 
				
			||||||
 | 
					  set(CGERUKERNEL ../generic/zger.c)
 | 
				
			||||||
 | 
					  set(CGERCKERNEL ../generic/zger.c)
 | 
				
			||||||
 | 
					  set(ZGERUKERNEL ../generic/zger.c)
 | 
				
			||||||
 | 
					  set(ZGERCKERNEL ../generic/zger.c)
 | 
				
			||||||
 | 
					  set(XGERUKERNEL ../generic/zger.c)
 | 
				
			||||||
 | 
					  set(XGERCKERNEL ../generic/zger.c)
 | 
				
			||||||
 | 
					  set(SSYMV_U_KERNEL ../generic/symv_k.c)
 | 
				
			||||||
 | 
					  set(SSYMV_L_KERNEL ../generic/symv_k.c)
 | 
				
			||||||
 | 
					  set(DSYMV_U_KERNEL ../generic/symv_k.c)
 | 
				
			||||||
 | 
					  set(DSYMV_L_KERNEL ../generic/symv_k.c)
 | 
				
			||||||
 | 
					  set(QSYMV_U_KERNEL ../generic/symv_k.c)
 | 
				
			||||||
 | 
					  set(QSYMV_L_KERNEL ../generic/symv_k.c)
 | 
				
			||||||
 | 
					  set(CSYMV_U_KERNEL ../generic/zsymv_k.c)
 | 
				
			||||||
 | 
					  set(CSYMV_L_KERNEL ../generic/zsymv_k.c)
 | 
				
			||||||
 | 
					  set(ZSYMV_U_KERNEL ../generic/zsymv_k.c)
 | 
				
			||||||
 | 
					  set(ZSYMV_L_KERNEL ../generic/zsymv_k.c)
 | 
				
			||||||
 | 
					  set(XSYMV_U_KERNEL ../generic/zsymv_k.c)
 | 
				
			||||||
 | 
					  set(XSYMV_L_KERNEL ../generic/zsymv_k.c)
 | 
				
			||||||
 | 
					  set(CHEMV_U_KERNEL ../generic/zhemv_k.c)
 | 
				
			||||||
 | 
					  set(CHEMV_L_KERNEL ../generic/zhemv_k.c)
 | 
				
			||||||
 | 
					  set(CHEMV_V_KERNEL ../generic/zhemv_k.c)
 | 
				
			||||||
 | 
					  set(CHEMV_M_KERNEL ../generic/zhemv_k.c)
 | 
				
			||||||
 | 
					  set(ZHEMV_U_KERNEL ../generic/zhemv_k.c)
 | 
				
			||||||
 | 
					  set(ZHEMV_L_KERNEL ../generic/zhemv_k.c)
 | 
				
			||||||
 | 
					  set(ZHEMV_V_KERNEL ../generic/zhemv_k.c)
 | 
				
			||||||
 | 
					  set(ZHEMV_M_KERNEL ../generic/zhemv_k.c)
 | 
				
			||||||
 | 
					  set(XHEMV_U_KERNEL ../generic/zhemv_k.c)
 | 
				
			||||||
 | 
					  set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
 | 
				
			||||||
 | 
					  set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
 | 
				
			||||||
 | 
					  set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
 | 
				
			||||||
 | 
					endmacro ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					macro(SetDefaultL3)
 | 
				
			||||||
 | 
					  set(SGEADD_KERNEL ../generic/geadd.c)
 | 
				
			||||||
 | 
					  set(DGEADD_KERNEL ../generic/geadd.c)
 | 
				
			||||||
 | 
					  set(CGEADD_KERNEL ../generic/zgeadd.c)
 | 
				
			||||||
 | 
					  set(ZGEADD_KERNEL ../generic/zgeadd.c)
 | 
				
			||||||
 | 
					endmacro ()
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,347 @@
 | 
				
			||||||
 | 
					# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(ALLAUX
 | 
				
			||||||
 | 
					  ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f
 | 
				
			||||||
 | 
					  ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f
 | 
				
			||||||
 | 
					  ../INSTALL/ilaver.f ../INSTALL/slamch.f
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(SCLAUX
 | 
				
			||||||
 | 
					  sbdsdc.f
 | 
				
			||||||
 | 
					  sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f  slaebz.f
 | 
				
			||||||
 | 
					  slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
 | 
				
			||||||
 | 
					  slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f
 | 
				
			||||||
 | 
					  slagts.f slamrg.f slanst.f
 | 
				
			||||||
 | 
					  slapy2.f slapy3.f slarnv.f
 | 
				
			||||||
 | 
					  slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f
 | 
				
			||||||
 | 
					  slarrk.f slarrr.f slaneg.f
 | 
				
			||||||
 | 
					  slartg.f slaruv.f slas2.f  slascl.f
 | 
				
			||||||
 | 
					  slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
 | 
				
			||||||
 | 
					  slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
 | 
				
			||||||
 | 
					  slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
 | 
				
			||||||
 | 
					  slasr.f  slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f
 | 
				
			||||||
 | 
					  ssteqr.f ssterf.f slaisnan.f sisnan.f
 | 
				
			||||||
 | 
					  slartgp.f slartgs.f
 | 
				
			||||||
 | 
					  ../INSTALL/second_${TIMER}.f
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(DZLAUX
 | 
				
			||||||
 | 
					  dbdsdc.f
 | 
				
			||||||
 | 
					  dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f  dlaebz.f
 | 
				
			||||||
 | 
					  dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
 | 
				
			||||||
 | 
					  dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
 | 
				
			||||||
 | 
					  dlagts.f dlamrg.f dlanst.f
 | 
				
			||||||
 | 
					  dlapy2.f dlapy3.f dlarnv.f
 | 
				
			||||||
 | 
					  dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f
 | 
				
			||||||
 | 
					  dlarrk.f dlarrr.f dlaneg.f
 | 
				
			||||||
 | 
					  dlartg.f dlaruv.f dlas2.f  dlascl.f
 | 
				
			||||||
 | 
					  dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
 | 
				
			||||||
 | 
					  dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
 | 
				
			||||||
 | 
					  dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
 | 
				
			||||||
 | 
					  dlasr.f  dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
 | 
				
			||||||
 | 
					  dsteqr.f dsterf.f dlaisnan.f disnan.f
 | 
				
			||||||
 | 
					  dlartgp.f dlartgs.f
 | 
				
			||||||
 | 
					  ../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(SLASRC
 | 
				
			||||||
 | 
					  sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
 | 
				
			||||||
 | 
					  sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
 | 
				
			||||||
 | 
					  sgebrd.f sgecon.f sgeequ.f sgees.f  sgeesx.f sgeev.f  sgeevx.f
 | 
				
			||||||
 | 
					  sgegs.f  sgegv.f  sgehd2.f sgehrd.f sgelq2.f sgelqf.f
 | 
				
			||||||
 | 
					  sgels.f  sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
 | 
				
			||||||
 | 
					  sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
 | 
				
			||||||
 | 
					  sgerq2.f sgerqf.f sgesc2.f sgesdd.f  sgesvd.f sgesvx.f
 | 
				
			||||||
 | 
					  sgetc2.f sgetri.f
 | 
				
			||||||
 | 
					  sggbak.f sggbal.f sgges.f  sggesx.f sggev.f  sggevx.f
 | 
				
			||||||
 | 
					  sggglm.f sgghrd.f sgglse.f sggqrf.f
 | 
				
			||||||
 | 
					  sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f
 | 
				
			||||||
 | 
					  sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
 | 
				
			||||||
 | 
					  shsein.f shseqr.f slabrd.f slacon.f slacn2.f
 | 
				
			||||||
 | 
					  slaein.f slaexc.f slag2.f  slags2.f slagtm.f slagv2.f slahqr.f
 | 
				
			||||||
 | 
					  slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
 | 
				
			||||||
 | 
					  slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
 | 
				
			||||||
 | 
					  slansy.f slantb.f slantp.f slantr.f slanv2.f
 | 
				
			||||||
 | 
					  slapll.f slapmt.f
 | 
				
			||||||
 | 
					  slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
 | 
				
			||||||
 | 
					  slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
 | 
				
			||||||
 | 
					  slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
 | 
				
			||||||
 | 
					  slarf.f  slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
 | 
				
			||||||
 | 
					  slarrv.f slartv.f
 | 
				
			||||||
 | 
					  slarz.f  slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
 | 
				
			||||||
 | 
					  slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f
 | 
				
			||||||
 | 
					  sopgtr.f sopmtr.f sorg2l.f sorg2r.f
 | 
				
			||||||
 | 
					  sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
 | 
				
			||||||
 | 
					  sorgrq.f sorgtr.f sorm2l.f sorm2r.f
 | 
				
			||||||
 | 
					  sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
 | 
				
			||||||
 | 
					  sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
 | 
				
			||||||
 | 
					  spbstf.f spbsv.f  spbsvx.f
 | 
				
			||||||
 | 
					  spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f
 | 
				
			||||||
 | 
					  sposvx.f spstrf.f spstf2.f
 | 
				
			||||||
 | 
					  sppcon.f sppequ.f
 | 
				
			||||||
 | 
					  spprfs.f sppsv.f  sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f
 | 
				
			||||||
 | 
					  spteqr.f sptrfs.f sptsv.f  sptsvx.f spttrs.f sptts2.f srscl.f
 | 
				
			||||||
 | 
					  ssbev.f  ssbevd.f ssbevx.f ssbgst.f ssbgv.f  ssbgvd.f ssbgvx.f
 | 
				
			||||||
 | 
					  ssbtrd.f sspcon.f sspev.f  sspevd.f sspevx.f sspgst.f
 | 
				
			||||||
 | 
					  sspgv.f  sspgvd.f sspgvx.f ssprfs.f sspsv.f  sspsvx.f ssptrd.f
 | 
				
			||||||
 | 
					  ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f  sstevd.f sstevr.f
 | 
				
			||||||
 | 
					  sstevx.f
 | 
				
			||||||
 | 
					  ssycon.f ssyev.f  ssyevd.f ssyevr.f ssyevx.f ssygs2.f
 | 
				
			||||||
 | 
					  ssygst.f ssygv.f  ssygvd.f ssygvx.f ssyrfs.f ssysv.f  ssysvx.f
 | 
				
			||||||
 | 
					  ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
 | 
				
			||||||
 | 
					  ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f
 | 
				
			||||||
 | 
					  ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f
 | 
				
			||||||
 | 
					  ssytri_rook.f ssycon_rook.f ssysv_rook.f
 | 
				
			||||||
 | 
					  stbcon.f
 | 
				
			||||||
 | 
					  stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
 | 
				
			||||||
 | 
					  stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
 | 
				
			||||||
 | 
					  stptrs.f
 | 
				
			||||||
 | 
					  strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
 | 
				
			||||||
 | 
					  strtrs.f stzrqf.f stzrzf.f sstemr.f
 | 
				
			||||||
 | 
					  slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
 | 
				
			||||||
 | 
					  stfttr.f stpttf.f stpttr.f strttf.f strttp.f
 | 
				
			||||||
 | 
					  sgejsv.f  sgesvj.f  sgsvj0.f  sgsvj1.f
 | 
				
			||||||
 | 
					  sgeequb.f ssyequb.f spoequb.f sgbequb.f
 | 
				
			||||||
 | 
					  sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f
 | 
				
			||||||
 | 
					  sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
 | 
				
			||||||
 | 
					  sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
 | 
				
			||||||
 | 
					  stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(DSLASRC spotrs.f)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(CLASRC
 | 
				
			||||||
 | 
					  cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f  cgbsvx.f
 | 
				
			||||||
 | 
					  cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
 | 
				
			||||||
 | 
					  cgecon.f cgeequ.f cgees.f  cgeesx.f cgeev.f  cgeevx.f
 | 
				
			||||||
 | 
					  cgegs.f  cgegv.f  cgehd2.f cgehrd.f cgelq2.f cgelqf.f
 | 
				
			||||||
 | 
					  cgels.f  cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
 | 
				
			||||||
 | 
					  cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
 | 
				
			||||||
 | 
					  cgerq2.f cgerqf.f cgesc2.f cgesdd.f  cgesvd.f
 | 
				
			||||||
 | 
					  cgesvx.f cgetc2.f cgetri.f
 | 
				
			||||||
 | 
					  cggbak.f cggbal.f cgges.f  cggesx.f cggev.f  cggevx.f cggglm.f
 | 
				
			||||||
 | 
					  cgghrd.f cgglse.f cggqrf.f cggrqf.f
 | 
				
			||||||
 | 
					  cggsvd.f cggsvp.f
 | 
				
			||||||
 | 
					  cgtcon.f cgtrfs.f cgtsv.f  cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
 | 
				
			||||||
 | 
					  chbevd.f chbevx.f chbgst.f chbgv.f  chbgvd.f chbgvx.f chbtrd.f
 | 
				
			||||||
 | 
					  checon.f cheev.f  cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
 | 
				
			||||||
 | 
					  chegv.f  chegvd.f chegvx.f cherfs.f chesv.f  chesvx.f chetd2.f
 | 
				
			||||||
 | 
					  chetf2.f chetrd.f
 | 
				
			||||||
 | 
					  chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f
 | 
				
			||||||
 | 
					  chetrs.f chetrs2.f
 | 
				
			||||||
 | 
					  chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f
 | 
				
			||||||
 | 
					  chgeqz.f chpcon.f chpev.f  chpevd.f
 | 
				
			||||||
 | 
					  chpevx.f chpgst.f chpgv.f  chpgvd.f chpgvx.f chprfs.f chpsv.f
 | 
				
			||||||
 | 
					  chpsvx.f
 | 
				
			||||||
 | 
					  chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f
 | 
				
			||||||
 | 
					  clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f
 | 
				
			||||||
 | 
					  claed0.f claed7.f claed8.f
 | 
				
			||||||
 | 
					  claein.f claesy.f claev2.f clags2.f clagtm.f
 | 
				
			||||||
 | 
					  clahef.f clahef_rook.f clahqr.f
 | 
				
			||||||
 | 
					  clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
 | 
				
			||||||
 | 
					  clanhb.f clanhe.f
 | 
				
			||||||
 | 
					  clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
 | 
				
			||||||
 | 
					  clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
 | 
				
			||||||
 | 
					  claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
 | 
				
			||||||
 | 
					  claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
 | 
				
			||||||
 | 
					  claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
 | 
				
			||||||
 | 
					  clarf.f  clarfb.f clarfg.f clarft.f clarfgp.f
 | 
				
			||||||
 | 
					  clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
 | 
				
			||||||
 | 
					  clarz.f  clarzb.f clarzt.f clascl.f claset.f clasr.f  classq.f
 | 
				
			||||||
 | 
					  clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
 | 
				
			||||||
 | 
					  clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
 | 
				
			||||||
 | 
					  cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
 | 
				
			||||||
 | 
					  cposv.f  cposvx.f cpstrf.f cpstf2.f
 | 
				
			||||||
 | 
					  cppcon.f cppequ.f cpprfs.f cppsv.f  cppsvx.f cpptrf.f cpptri.f cpptrs.f
 | 
				
			||||||
 | 
					  cptcon.f cpteqr.f cptrfs.f cptsv.f  cptsvx.f cpttrf.f cpttrs.f cptts2.f
 | 
				
			||||||
 | 
					  crot.f   cspcon.f csprfs.f cspsv.f
 | 
				
			||||||
 | 
					  cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
 | 
				
			||||||
 | 
					  cstegr.f cstein.f csteqr.f
 | 
				
			||||||
 | 
					  csycon.f
 | 
				
			||||||
 | 
					  csyrfs.f csysv.f  csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f
 | 
				
			||||||
 | 
					  csyswapr.f csytrs.f csytrs2.f csyconv.f
 | 
				
			||||||
 | 
					  csytf2_rook.f csytrf_rook.f csytrs_rook.f
 | 
				
			||||||
 | 
					  csytri_rook.f csycon_rook.f csysv_rook.f
 | 
				
			||||||
 | 
					  ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
 | 
				
			||||||
 | 
					  ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
 | 
				
			||||||
 | 
					  ctprfs.f ctptri.f
 | 
				
			||||||
 | 
					  ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
 | 
				
			||||||
 | 
					  ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f
 | 
				
			||||||
 | 
					  cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
 | 
				
			||||||
 | 
					  cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
 | 
				
			||||||
 | 
					  cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
 | 
				
			||||||
 | 
					  cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f
 | 
				
			||||||
 | 
					  chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f
 | 
				
			||||||
 | 
					  ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f
 | 
				
			||||||
 | 
					  cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f
 | 
				
			||||||
 | 
					  cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f
 | 
				
			||||||
 | 
					  cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
 | 
				
			||||||
 | 
					  cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
 | 
				
			||||||
 | 
					  ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(ZCLASRC cpotrs.f)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(DLASRC
 | 
				
			||||||
 | 
					  dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
 | 
				
			||||||
 | 
					  dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
 | 
				
			||||||
 | 
					  dgebrd.f dgecon.f dgeequ.f dgees.f  dgeesx.f dgeev.f  dgeevx.f
 | 
				
			||||||
 | 
					  dgegs.f  dgegv.f  dgehd2.f dgehrd.f dgelq2.f dgelqf.f
 | 
				
			||||||
 | 
					  dgels.f  dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
 | 
				
			||||||
 | 
					  dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
 | 
				
			||||||
 | 
					  dgerq2.f dgerqf.f dgesc2.f dgesdd.f  dgesvd.f dgesvx.f
 | 
				
			||||||
 | 
					  dgetc2.f dgetri.f
 | 
				
			||||||
 | 
					  dggbak.f dggbal.f dgges.f  dggesx.f dggev.f  dggevx.f
 | 
				
			||||||
 | 
					  dggglm.f dgghrd.f dgglse.f dggqrf.f
 | 
				
			||||||
 | 
					  dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
 | 
				
			||||||
 | 
					  dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
 | 
				
			||||||
 | 
					  dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
 | 
				
			||||||
 | 
					  dlaein.f dlaexc.f dlag2.f  dlags2.f dlagtm.f dlagv2.f dlahqr.f
 | 
				
			||||||
 | 
					  dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
 | 
				
			||||||
 | 
					  dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
 | 
				
			||||||
 | 
					  dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
 | 
				
			||||||
 | 
					  dlapll.f dlapmt.f
 | 
				
			||||||
 | 
					  dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
 | 
				
			||||||
 | 
					  dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
 | 
				
			||||||
 | 
					  dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
 | 
				
			||||||
 | 
					  dlarf.f  dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
 | 
				
			||||||
 | 
					  dlargv.f dlarrv.f dlartv.f
 | 
				
			||||||
 | 
					  dlarz.f  dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
 | 
				
			||||||
 | 
					  dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f
 | 
				
			||||||
 | 
					  dopgtr.f dopmtr.f dorg2l.f dorg2r.f
 | 
				
			||||||
 | 
					  dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
 | 
				
			||||||
 | 
					  dorgrq.f dorgtr.f dorm2l.f dorm2r.f
 | 
				
			||||||
 | 
					  dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
 | 
				
			||||||
 | 
					  dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
 | 
				
			||||||
 | 
					  dpbstf.f dpbsv.f  dpbsvx.f
 | 
				
			||||||
 | 
					  dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
 | 
				
			||||||
 | 
					  dposvx.f dpotrs.f dpstrf.f dpstf2.f
 | 
				
			||||||
 | 
					  dppcon.f dppequ.f
 | 
				
			||||||
 | 
					  dpprfs.f dppsv.f  dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
 | 
				
			||||||
 | 
					  dpteqr.f dptrfs.f dptsv.f  dptsvx.f dpttrs.f dptts2.f drscl.f
 | 
				
			||||||
 | 
					  dsbev.f  dsbevd.f dsbevx.f dsbgst.f dsbgv.f  dsbgvd.f dsbgvx.f
 | 
				
			||||||
 | 
					  dsbtrd.f  dspcon.f dspev.f  dspevd.f dspevx.f dspgst.f
 | 
				
			||||||
 | 
					  dspgv.f  dspgvd.f dspgvx.f dsprfs.f dspsv.f  dspsvx.f dsptrd.f
 | 
				
			||||||
 | 
					  dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f  dstevd.f dstevr.f
 | 
				
			||||||
 | 
					  dstevx.f
 | 
				
			||||||
 | 
					  dsycon.f dsyev.f  dsyevd.f dsyevr.f
 | 
				
			||||||
 | 
					  dsyevx.f dsygs2.f dsygst.f dsygv.f  dsygvd.f dsygvx.f dsyrfs.f
 | 
				
			||||||
 | 
					  dsysv.f  dsysvx.f
 | 
				
			||||||
 | 
					  dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f
 | 
				
			||||||
 | 
					  dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f
 | 
				
			||||||
 | 
					  dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f
 | 
				
			||||||
 | 
					  dsytri_rook.f dsycon_rook.f dsysv_rook.f
 | 
				
			||||||
 | 
					  dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
 | 
				
			||||||
 | 
					  dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
 | 
				
			||||||
 | 
					  dtptrs.f
 | 
				
			||||||
 | 
					  dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
 | 
				
			||||||
 | 
					  dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f
 | 
				
			||||||
 | 
					  dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
 | 
				
			||||||
 | 
					  dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
 | 
				
			||||||
 | 
					  dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
 | 
				
			||||||
 | 
					  dgejsv.f  dgesvj.f  dgsvj0.f  dgsvj1.f
 | 
				
			||||||
 | 
					  dgeequb.f dsyequb.f dpoequb.f dgbequb.f
 | 
				
			||||||
 | 
					  dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f
 | 
				
			||||||
 | 
					  dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
 | 
				
			||||||
 | 
					  dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
 | 
				
			||||||
 | 
					  dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(ZLASRC
 | 
				
			||||||
 | 
					  zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f  zgbsvx.f
 | 
				
			||||||
 | 
					  zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
 | 
				
			||||||
 | 
					  zgecon.f zgeequ.f zgees.f  zgeesx.f zgeev.f  zgeevx.f
 | 
				
			||||||
 | 
					  zgegs.f  zgegv.f  zgehd2.f zgehrd.f zgelq2.f zgelqf.f
 | 
				
			||||||
 | 
					  zgels.f  zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
 | 
				
			||||||
 | 
					  zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
 | 
				
			||||||
 | 
					  zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f
 | 
				
			||||||
 | 
					  zgetri.f
 | 
				
			||||||
 | 
					  zggbak.f zggbal.f zgges.f  zggesx.f zggev.f  zggevx.f zggglm.f
 | 
				
			||||||
 | 
					  zgghrd.f zgglse.f zggqrf.f zggrqf.f
 | 
				
			||||||
 | 
					  zggsvd.f zggsvp.f
 | 
				
			||||||
 | 
					  zgtcon.f zgtrfs.f zgtsv.f  zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
 | 
				
			||||||
 | 
					  zhbevd.f zhbevx.f zhbgst.f zhbgv.f  zhbgvd.f zhbgvx.f zhbtrd.f
 | 
				
			||||||
 | 
					  zhecon.f zheev.f  zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
 | 
				
			||||||
 | 
					  zhegv.f  zhegvd.f zhegvx.f zherfs.f zhesv.f  zhesvx.f zhetd2.f
 | 
				
			||||||
 | 
					  zhetf2.f zhetrd.f
 | 
				
			||||||
 | 
					  zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f
 | 
				
			||||||
 | 
					  zhetrs.f zhetrs2.f
 | 
				
			||||||
 | 
					  zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f
 | 
				
			||||||
 | 
					  zhgeqz.f zhpcon.f zhpev.f  zhpevd.f
 | 
				
			||||||
 | 
					  zhpevx.f zhpgst.f zhpgv.f  zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f
 | 
				
			||||||
 | 
					  zhpsvx.f
 | 
				
			||||||
 | 
					  zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f
 | 
				
			||||||
 | 
					  zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f
 | 
				
			||||||
 | 
					  zlaed0.f zlaed7.f zlaed8.f
 | 
				
			||||||
 | 
					  zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
 | 
				
			||||||
 | 
					  zlahef.f zlahef_rook.f zlahqr.f
 | 
				
			||||||
 | 
					  zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
 | 
				
			||||||
 | 
					  zlangt.f zlanhb.f
 | 
				
			||||||
 | 
					  zlanhe.f
 | 
				
			||||||
 | 
					  zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
 | 
				
			||||||
 | 
					  zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f
 | 
				
			||||||
 | 
					  zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
 | 
				
			||||||
 | 
					  zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
 | 
				
			||||||
 | 
					  zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
 | 
				
			||||||
 | 
					  zlarcm.f zlarf.f  zlarfb.f
 | 
				
			||||||
 | 
					  zlarfg.f zlarft.f zlarfgp.f
 | 
				
			||||||
 | 
					  zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
 | 
				
			||||||
 | 
					  zlarz.f  zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
 | 
				
			||||||
 | 
					  zlassq.f zlasyf.f zlasyf_rook.f
 | 
				
			||||||
 | 
					  zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f
 | 
				
			||||||
 | 
					  zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
 | 
				
			||||||
 | 
					  zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
 | 
				
			||||||
 | 
					  zposv.f  zposvx.f zpotrs.f zpstrf.f zpstf2.f
 | 
				
			||||||
 | 
					  zppcon.f zppequ.f zpprfs.f zppsv.f  zppsvx.f zpptrf.f zpptri.f zpptrs.f
 | 
				
			||||||
 | 
					  zptcon.f zpteqr.f zptrfs.f zptsv.f  zptsvx.f zpttrf.f zpttrs.f zptts2.f
 | 
				
			||||||
 | 
					  zrot.f   zspcon.f zsprfs.f zspsv.f
 | 
				
			||||||
 | 
					  zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
 | 
				
			||||||
 | 
					  zstegr.f zstein.f zsteqr.f
 | 
				
			||||||
 | 
					  zsycon.f
 | 
				
			||||||
 | 
					  zsyrfs.f zsysv.f  zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f
 | 
				
			||||||
 | 
					  zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f
 | 
				
			||||||
 | 
					  zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f
 | 
				
			||||||
 | 
					  zsytri_rook.f zsycon_rook.f zsysv_rook.f
 | 
				
			||||||
 | 
					  ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
 | 
				
			||||||
 | 
					  ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
 | 
				
			||||||
 | 
					  ztprfs.f ztptri.f
 | 
				
			||||||
 | 
					  ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
 | 
				
			||||||
 | 
					  ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f
 | 
				
			||||||
 | 
					  zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
 | 
				
			||||||
 | 
					  zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f
 | 
				
			||||||
 | 
					  zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
 | 
				
			||||||
 | 
					  zunmtr.f zupgtr.f
 | 
				
			||||||
 | 
					  zupmtr.f izmax1.f dzsum1.f zstemr.f
 | 
				
			||||||
 | 
					  zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f
 | 
				
			||||||
 | 
					  zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f
 | 
				
			||||||
 | 
					  ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f
 | 
				
			||||||
 | 
					  zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f
 | 
				
			||||||
 | 
					  zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f
 | 
				
			||||||
 | 
					  zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
 | 
				
			||||||
 | 
					  zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
 | 
				
			||||||
 | 
					  ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(LA_REL_SRC ${ALLAUX})
 | 
				
			||||||
 | 
					if (BUILD_SINGLE)
 | 
				
			||||||
 | 
					  list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX})
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (BUILD_DOUBLE)
 | 
				
			||||||
 | 
					  list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX})
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (BUILD_COMPLEX)
 | 
				
			||||||
 | 
					  list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX})
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (BUILD_COMPLEX16)
 | 
				
			||||||
 | 
					  list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX})
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# add lapack-netlib folder to the sources
 | 
				
			||||||
 | 
					set(LA_SOURCES "")
 | 
				
			||||||
 | 
					foreach (LA_FILE ${LA_REL_SRC})
 | 
				
			||||||
 | 
					  list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}")
 | 
				
			||||||
 | 
					endforeach ()
 | 
				
			||||||
 | 
					set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}")
 | 
				
			||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| 
						 | 
					@ -0,0 +1,104 @@
 | 
				
			||||||
 | 
					##
 | 
				
			||||||
 | 
					## Author: Hank Anderson <hank@statease.com>
 | 
				
			||||||
 | 
					## Description: Ported from portion of OpenBLAS/Makefile.system
 | 
				
			||||||
 | 
					##              Detects the OS and sets appropriate variables.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
 | 
				
			||||||
 | 
					  set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var
 | 
				
			||||||
 | 
					  set(MD5SUM "md5 -r")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
 | 
				
			||||||
 | 
					  set(MD5SUM "md5 -r")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD")
 | 
				
			||||||
 | 
					  set(MD5SUM "md5 -n")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
 | 
				
			||||||
 | 
					  set(EXTRALIB "${EXTRALIB} -lm")
 | 
				
			||||||
 | 
					  set(NO_EXPRECISION 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_SYSTEM_NAME} STREQUAL "AIX")
 | 
				
			||||||
 | 
					  set(EXTRALIB "${EXTRALIB} -lm")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# TODO: this is probably meant for mingw, not other windows compilers
 | 
				
			||||||
 | 
					if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  set(NEED_PIC 0)
 | 
				
			||||||
 | 
					  set(NO_EXPRECISION 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  set(EXTRALIB "${EXTRALIB} -defaultlib:advapi32")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # probably not going to use these
 | 
				
			||||||
 | 
					  set(SUFFIX "obj")
 | 
				
			||||||
 | 
					  set(PSUFFIX "pobj")
 | 
				
			||||||
 | 
					  set(LIBSUFFIX "a")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT	"${CCOMMON_OPT} -DMS_ABI")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Test for supporting MS_ABI
 | 
				
			||||||
 | 
					    # removed string parsing in favor of CMake's version comparison -hpa
 | 
				
			||||||
 | 
					    execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
 | 
				
			||||||
 | 
					    if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
 | 
				
			||||||
 | 
					      # GCC Version >=4.7
 | 
				
			||||||
 | 
					      # It is compatible with MSVC ABI.
 | 
				
			||||||
 | 
					      set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Ensure the correct stack alignment on Win32
 | 
				
			||||||
 | 
					  # http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
 | 
				
			||||||
 | 
					  if (${ARCH} STREQUAL "x86")
 | 
				
			||||||
 | 
					    if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
 | 
				
			||||||
 | 
					      set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2")
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
 | 
				
			||||||
 | 
					  set(NEED_PIC 0)
 | 
				
			||||||
 | 
					  set(NO_EXPRECISION 1)
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  set(INTERIX_TOOL_DIR STREQUAL "/opt/gcc.3.3/i586-pc-interix3/bin")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (CYGWIN)
 | 
				
			||||||
 | 
					  set(NEED_PIC 0)
 | 
				
			||||||
 | 
					  set(NO_EXPRECISION 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
 | 
				
			||||||
 | 
					  if (SMP)
 | 
				
			||||||
 | 
					    set(EXTRALIB "${EXTRALIB} -lpthread")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (QUAD_PRECISION)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DQUAD_PRECISION")
 | 
				
			||||||
 | 
					  set(NO_EXPRECISION 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${ARCH} STREQUAL "x86")
 | 
				
			||||||
 | 
					  set(NO_EXPRECISION 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (UTEST_CHECK)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK")
 | 
				
			||||||
 | 
					  set(SANITY_CHECK 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (SANITY_CHECK)
 | 
				
			||||||
 | 
					  # TODO: need some way to get $(*F) (target filename)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DSANITY_CHECK -DREFNAME=$(*F)f${BU}")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,113 @@
 | 
				
			||||||
 | 
					##
 | 
				
			||||||
 | 
					## Author: Hank Anderson <hank@statease.com>
 | 
				
			||||||
 | 
					## Description: Ported from OpenBLAS/Makefile.prebuild
 | 
				
			||||||
 | 
					##              This is triggered by system.cmake and runs before any of the code is built.
 | 
				
			||||||
 | 
					##              Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files).
 | 
				
			||||||
 | 
					##              Next it runs f_check and appends some fortran information to the files.
 | 
				
			||||||
 | 
					##              Finally it runs getarch and getarch_2nd for even more environment information.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# CMake vars set by this file:
 | 
				
			||||||
 | 
					# CORE
 | 
				
			||||||
 | 
					# LIBCORE
 | 
				
			||||||
 | 
					# NUM_CORES
 | 
				
			||||||
 | 
					# HAVE_MMX
 | 
				
			||||||
 | 
					# HAVE_SSE
 | 
				
			||||||
 | 
					# HAVE_SSE2
 | 
				
			||||||
 | 
					# HAVE_SSE3
 | 
				
			||||||
 | 
					# MAKE
 | 
				
			||||||
 | 
					# SGEMM_UNROLL_M
 | 
				
			||||||
 | 
					# SGEMM_UNROLL_N
 | 
				
			||||||
 | 
					# DGEMM_UNROLL_M
 | 
				
			||||||
 | 
					# DGEMM_UNROLL_M
 | 
				
			||||||
 | 
					# QGEMM_UNROLL_N
 | 
				
			||||||
 | 
					# QGEMM_UNROLL_N
 | 
				
			||||||
 | 
					# CGEMM_UNROLL_M
 | 
				
			||||||
 | 
					# CGEMM_UNROLL_M
 | 
				
			||||||
 | 
					# ZGEMM_UNROLL_N
 | 
				
			||||||
 | 
					# ZGEMM_UNROLL_N
 | 
				
			||||||
 | 
					# XGEMM_UNROLL_M
 | 
				
			||||||
 | 
					# XGEMM_UNROLL_N
 | 
				
			||||||
 | 
					# CGEMM3M_UNROLL_M
 | 
				
			||||||
 | 
					# CGEMM3M_UNROLL_N
 | 
				
			||||||
 | 
					# ZGEMM3M_UNROLL_M
 | 
				
			||||||
 | 
					# ZGEMM3M_UNROLL_M
 | 
				
			||||||
 | 
					# XGEMM3M_UNROLL_N
 | 
				
			||||||
 | 
					# XGEMM3M_UNROLL_N
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# CPUIDEMU = ../../cpuid/table.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DEFINED CPUIDEMU)
 | 
				
			||||||
 | 
					  set(EXFLAGS "-DCPUIDEMU -DVENDOR=99")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DEFINED TARGET_CORE)
 | 
				
			||||||
 | 
					  # set the C flags for just this file
 | 
				
			||||||
 | 
					  set(GETARCH2_FLAGS "-DBUILD_KERNEL")
 | 
				
			||||||
 | 
					  set(TARGET_MAKE "Makefile_kernel.conf")
 | 
				
			||||||
 | 
					  set(TARGET_CONF "config_kernel.h")
 | 
				
			||||||
 | 
					else()
 | 
				
			||||||
 | 
					  set(TARGET_MAKE "Makefile.conf")
 | 
				
			||||||
 | 
					  set(TARGET_CONF "config.h")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT NOFORTRAN)
 | 
				
			||||||
 | 
					  include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# compile getarch
 | 
				
			||||||
 | 
					set(GETARCH_SRC
 | 
				
			||||||
 | 
					  ${CMAKE_SOURCE_DIR}/getarch.c
 | 
				
			||||||
 | 
					  ${CPUIDEMO}
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT MSVC)
 | 
				
			||||||
 | 
					  list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (MSVC)
 | 
				
			||||||
 | 
					#Use generic for MSVC now
 | 
				
			||||||
 | 
					set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
 | 
				
			||||||
 | 
					set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
 | 
				
			||||||
 | 
					file(MAKE_DIRECTORY ${GETARCH_DIR})
 | 
				
			||||||
 | 
					try_compile(GETARCH_RESULT ${GETARCH_DIR}
 | 
				
			||||||
 | 
					  SOURCES ${GETARCH_SRC}
 | 
				
			||||||
 | 
					  COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
 | 
				
			||||||
 | 
					  OUTPUT_VARIABLE GETARCH_LOG
 | 
				
			||||||
 | 
					  COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					message(STATUS "Running getarch")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
 | 
				
			||||||
 | 
					execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
 | 
				
			||||||
 | 
					execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# append config data from getarch to the TARGET file and read in CMake vars
 | 
				
			||||||
 | 
					file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT})
 | 
				
			||||||
 | 
					ParseGetArchVars(${GETARCH_MAKE_OUT})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
 | 
				
			||||||
 | 
					set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
 | 
				
			||||||
 | 
					file(MAKE_DIRECTORY ${GETARCH2_DIR})
 | 
				
			||||||
 | 
					try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
 | 
				
			||||||
 | 
					  SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
 | 
				
			||||||
 | 
					  COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
 | 
				
			||||||
 | 
					  OUTPUT_VARIABLE GETARCH2_LOG
 | 
				
			||||||
 | 
					  COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
 | 
				
			||||||
 | 
					execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
 | 
				
			||||||
 | 
					execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# append config data from getarch_2nd to the TARGET file and read in CMake vars
 | 
				
			||||||
 | 
					file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT})
 | 
				
			||||||
 | 
					ParseGetArchVars(${GETARCH2_MAKE_OUT})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,552 @@
 | 
				
			||||||
 | 
					##
 | 
				
			||||||
 | 
					## Author: Hank Anderson <hank@statease.com>
 | 
				
			||||||
 | 
					## Description: Ported from OpenBLAS/Makefile.system
 | 
				
			||||||
 | 
					##
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa
 | 
				
			||||||
 | 
					# http://stackoverflow.com/questions/714100/os-detecting-makefile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
 | 
				
			||||||
 | 
					if (DEFINED TARGET_CORE)
 | 
				
			||||||
 | 
					  set(TARGET ${TARGET_CORE})
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Force fallbacks for 32bit
 | 
				
			||||||
 | 
					if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
 | 
				
			||||||
 | 
					  message(STATUS "Compiling a ${BINARY}-bit binary.")
 | 
				
			||||||
 | 
					  set(NO_AVX 1)
 | 
				
			||||||
 | 
					  if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
 | 
				
			||||||
 | 
					    set(TARGET "NEHALEM")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					  if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER")
 | 
				
			||||||
 | 
					    set(TARGET "BARCELONA")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DEFINED TARGET)
 | 
				
			||||||
 | 
					  message(STATUS "Targetting the ${TARGET} architecture.")
 | 
				
			||||||
 | 
					  set(GETARCH_FLAGS "-DFORCE_${TARGET}")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (INTERFACE64)
 | 
				
			||||||
 | 
					  message(STATUS "Using 64-bit integers.")
 | 
				
			||||||
 | 
					  set(GETARCH_FLAGS	"${GETARCH_FLAGS} -DUSE64BITINT")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED GEMM_MULTITHREAD_THRESHOLD)
 | 
				
			||||||
 | 
					  set(GEMM_MULTITHREAD_THRESHOLD 4)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					message(STATUS "GEMM multithread threshold set to ${GEMM_MULTITHREAD_THRESHOLD}.")
 | 
				
			||||||
 | 
					set(GETARCH_FLAGS	"${GETARCH_FLAGS} -DGEMM_MULTITHREAD_THRESHOLD=${GEMM_MULTITHREAD_THRESHOLD}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NO_AVX)
 | 
				
			||||||
 | 
					  message(STATUS "Disabling Advanced Vector Extensions (AVX).")
 | 
				
			||||||
 | 
					  set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NO_AVX2)
 | 
				
			||||||
 | 
					  message(STATUS "Disabling Advanced Vector Extensions 2 (AVX2).")
 | 
				
			||||||
 | 
					  set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (CMAKE_BUILD_TYPE STREQUAL Debug)
 | 
				
			||||||
 | 
					  set(GETARCH_FLAGS "${GETARCH_FLAGS} -g")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# TODO: let CMake handle this? -hpa
 | 
				
			||||||
 | 
					#if (${QUIET_MAKE})
 | 
				
			||||||
 | 
					#  set(MAKE "${MAKE} -s")
 | 
				
			||||||
 | 
					#endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED NO_PARALLEL_MAKE)
 | 
				
			||||||
 | 
					  set(NO_PARALLEL_MAKE 0)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					set(GETARCH_FLAGS	"${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (CMAKE_CXX_COMPILER STREQUAL loongcc)
 | 
				
			||||||
 | 
					  set(GETARCH_FLAGS	"${GETARCH_FLAGS} -static")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if don't use Fortran, it will only compile CBLAS.
 | 
				
			||||||
 | 
					if (ONLY_CBLAS)
 | 
				
			||||||
 | 
					  set(NO_LAPACK 1)
 | 
				
			||||||
 | 
					else ()
 | 
				
			||||||
 | 
					  set(ONLY_CBLAS 0)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED NUM_THREADS)
 | 
				
			||||||
 | 
					  set(NUM_THREADS ${NUM_CORES})
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${NUM_THREADS} EQUAL 1)
 | 
				
			||||||
 | 
					  set(USE_THREAD 0)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DEFINED USE_THREAD)
 | 
				
			||||||
 | 
					  if (NOT ${USE_THREAD})
 | 
				
			||||||
 | 
					    unset(SMP)
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(SMP 1)
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					else ()
 | 
				
			||||||
 | 
					  # N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa
 | 
				
			||||||
 | 
					  if (${NUM_THREADS} EQUAL 1)
 | 
				
			||||||
 | 
					    unset(SMP)
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(SMP 1)
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${SMP})
 | 
				
			||||||
 | 
					  message(STATUS "SMP enabled.")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED NEED_PIC)
 | 
				
			||||||
 | 
					  set(NEED_PIC 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# TODO: I think CMake should be handling all this stuff -hpa
 | 
				
			||||||
 | 
					unset(ARFLAGS)
 | 
				
			||||||
 | 
					set(CPP "${COMPILER} -E")
 | 
				
			||||||
 | 
					set(AR "${CROSS_SUFFIX}ar")
 | 
				
			||||||
 | 
					set(AS "${CROSS_SUFFIX}as")
 | 
				
			||||||
 | 
					set(LD "${CROSS_SUFFIX}ld")
 | 
				
			||||||
 | 
					set(RANLIB "${CROSS_SUFFIX}ranlib")
 | 
				
			||||||
 | 
					set(NM "${CROSS_SUFFIX}nm")
 | 
				
			||||||
 | 
					set(DLLWRAP "${CROSS_SUFFIX}dllwrap")
 | 
				
			||||||
 | 
					set(OBJCOPY "${CROSS_SUFFIX}objcopy")
 | 
				
			||||||
 | 
					set(OBJCONV "${CROSS_SUFFIX}objconv")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# OS dependent settings
 | 
				
			||||||
 | 
					include("${CMAKE_SOURCE_DIR}/cmake/os.cmake")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Architecture dependent settings
 | 
				
			||||||
 | 
					include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# C Compiler dependent settings
 | 
				
			||||||
 | 
					include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT NOFORTRAN)
 | 
				
			||||||
 | 
					  # Fortran Compiler dependent settings
 | 
				
			||||||
 | 
					  include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (BINARY64)
 | 
				
			||||||
 | 
					  if (INTERFACE64)
 | 
				
			||||||
 | 
					    # CCOMMON_OPT += -DUSE64BITINT
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NEED_PIC)
 | 
				
			||||||
 | 
					  if (${CMAKE_C_COMPILER} STREQUAL "IBM")
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large")
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${F_COMPILER} STREQUAL "SUN")
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -pic")
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DYNAMIC_ARCH)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NO_LAPACK)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACK")
 | 
				
			||||||
 | 
					  #Disable LAPACK C interface
 | 
				
			||||||
 | 
					  set(NO_LAPACKE 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NO_LAPACKE)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACKE")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NO_AVX)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${ARCH} STREQUAL "x86")
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NO_AVX2)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (SMP)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${ARCH} STREQUAL "mips64")
 | 
				
			||||||
 | 
					    if (NOT ${CORE} STREQUAL "LOONGSON3B")
 | 
				
			||||||
 | 
					      set(USE_SIMPLE_THREADED_LEVEL3 1)
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (USE_OPENMP)
 | 
				
			||||||
 | 
					    # USE_SIMPLE_THREADED_LEVEL3 = 1
 | 
				
			||||||
 | 
					    # NO_AFFINITY = 1
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (BIGNUMA)
 | 
				
			||||||
 | 
					    set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NO_WARMUP)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_WARMUP")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (CONSISTENT_FPCSR)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DCONSISTENT_FPCSR")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Only for development
 | 
				
			||||||
 | 
					# set(CCOMMON_OPT "${CCOMMON_OPT} -DPARAMTEST")
 | 
				
			||||||
 | 
					# set(CCOMMON_OPT "${CCOMMON_OPT} -DPREFETCHTEST")
 | 
				
			||||||
 | 
					# set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_SWITCHING")
 | 
				
			||||||
 | 
					# set(USE_PAPI 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (USE_PAPI)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_PAPI")
 | 
				
			||||||
 | 
					  set(EXTRALIB "${EXTRALIB} -lpapi -lperfctr")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DYNAMIC_THREADS)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_THREADS")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (USE_SIMPLE_THREADED_LEVEL3)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DEFINED LIBNAMESUFFIX)
 | 
				
			||||||
 | 
					  set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}")
 | 
				
			||||||
 | 
					else ()
 | 
				
			||||||
 | 
					  set(LIBPREFIX "libopenblas")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED SYMBOLPREFIX)
 | 
				
			||||||
 | 
					  set(SYMBOLPREFIX "")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED SYMBOLSUFFIX)
 | 
				
			||||||
 | 
					  set(SYMBOLSUFFIX "")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(KERNELDIR	"${CMAKE_SOURCE_DIR}/kernel/${ARCH}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# TODO: nead to convert these Makefiles
 | 
				
			||||||
 | 
					# include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CORE} STREQUAL "PPC440")
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CORE} STREQUAL "PPC440FP2")
 | 
				
			||||||
 | 
					  set(STATIC_ALLOCATION 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
 | 
				
			||||||
 | 
					  set(NO_AFFINITY 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B")
 | 
				
			||||||
 | 
					  set(NO_AFFINITY 1)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NO_AFFINITY)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AFFINITY")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (FUNCTION_PROFILE)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DFUNCTION_PROFILE")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (HUGETLB_ALLOCATION)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLB")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DEFINED HUGETLBFILE_ALLOCATION)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=${HUGETLBFILE_ALLOCATION})")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (STATIC_ALLOCATION)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_STATIC")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DEVICEDRIVER_ALLOCATION)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_DEVICEDRIVER -DDEVICEDRIVER_NAME=\"/dev/mapper\"")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (MIXED_MEMORY_ALLOCATION)
 | 
				
			||||||
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS")
 | 
				
			||||||
 | 
					  set(TAR	gtar)
 | 
				
			||||||
 | 
					  set(PATCH	gpatch)
 | 
				
			||||||
 | 
					  set(GREP ggrep)
 | 
				
			||||||
 | 
					else ()
 | 
				
			||||||
 | 
					  set(TAR tar)
 | 
				
			||||||
 | 
					  set(PATCH patch)
 | 
				
			||||||
 | 
					  set(GREP grep)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED MD5SUM)
 | 
				
			||||||
 | 
					  set(MD5SUM md5sum)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(AWK awk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(REVISION "-r${OpenBLAS_VERSION}")
 | 
				
			||||||
 | 
					set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DEBUG)
 | 
				
			||||||
 | 
					  set(COMMON_OPT "${COMMON_OPT} -g")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED COMMON_OPT)
 | 
				
			||||||
 | 
					  set(COMMON_OPT "-O2")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#For x86 32-bit
 | 
				
			||||||
 | 
					if (DEFINED BINARY AND BINARY EQUAL 32)
 | 
				
			||||||
 | 
					if (NOT MSVC)
 | 
				
			||||||
 | 
					  set(COMMON_OPT "${COMMON_OPT} -m32")
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
 | 
				
			||||||
 | 
					if(NOT MSVC)
 | 
				
			||||||
 | 
					set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					# TODO: not sure what PFLAGS is -hpa
 | 
				
			||||||
 | 
					set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}")
 | 
				
			||||||
 | 
					# TODO: not sure what FPFLAGS is -hpa
 | 
				
			||||||
 | 
					set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#For LAPACK Fortran codes.
 | 
				
			||||||
 | 
					set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}")
 | 
				
			||||||
 | 
					set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#Disable -fopenmp for LAPACK Fortran codes on Windows.
 | 
				
			||||||
 | 
					if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
 | 
				
			||||||
 | 
					  set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel")
 | 
				
			||||||
 | 
					  foreach (FILTER_FLAG ${FILTER_FLAGS})
 | 
				
			||||||
 | 
					    string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS})
 | 
				
			||||||
 | 
					    string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS})
 | 
				
			||||||
 | 
					  endforeach ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if ("${F_COMPILER}" STREQUAL "GFORTRAN")
 | 
				
			||||||
 | 
					  # lapack-netlib is rife with uninitialized warnings -hpa
 | 
				
			||||||
 | 
					  set(LAPACK_FFLAGS "${LAPACK_FFLAGS} -Wno-maybe-uninitialized")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(LAPACK_CFLAGS "${CMAKE_C_CFLAGS} -DHAVE_LAPACK_CONFIG_H")
 | 
				
			||||||
 | 
					if (INTERFACE64)
 | 
				
			||||||
 | 
					  set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_ILP64")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
 | 
				
			||||||
 | 
					  set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DOPENBLAS_OS_WINDOWS")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
 | 
				
			||||||
 | 
					  set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED SUFFIX)
 | 
				
			||||||
 | 
					  set(SUFFIX o)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED PSUFFIX)
 | 
				
			||||||
 | 
					  set(PSUFFIX po)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT DEFINED LIBSUFFIX)
 | 
				
			||||||
 | 
					  set(LIBSUFFIX a)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DYNAMIC_ARCH)
 | 
				
			||||||
 | 
					  if (DEFINED SMP)
 | 
				
			||||||
 | 
					    set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}")
 | 
				
			||||||
 | 
					    set(LIBNAME_P	"${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}")
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(LIBNAME "${LIBPREFIX}${REVISION}.${LIBSUFFIX}")
 | 
				
			||||||
 | 
					    set(LIBNAME_P	"${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					else ()
 | 
				
			||||||
 | 
					  if (DEFINED SMP)
 | 
				
			||||||
 | 
					    set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}")
 | 
				
			||||||
 | 
					    set(LIBNAME_P	"${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}")
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(LIBNAME	"${LIBPREFIX}_${LIBCORE}${REVISION}.${LIBSUFFIX}")
 | 
				
			||||||
 | 
					    set(LIBNAME_P	"${LIBPREFIX}_${LIBCORE}${REVISION}_p.${LIBSUFFIX}")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(LIBDLLNAME "${LIBPREFIX}.dll")
 | 
				
			||||||
 | 
					set(LIBSONAME "${LIBNAME}.${LIBSUFFIX}.so")
 | 
				
			||||||
 | 
					set(LIBDYNNAME "${LIBNAME}.${LIBSUFFIX}.dylib")
 | 
				
			||||||
 | 
					set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def")
 | 
				
			||||||
 | 
					set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp")
 | 
				
			||||||
 | 
					set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}")
 | 
				
			||||||
 | 
					set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(LIB_COMPONENTS BLAS)
 | 
				
			||||||
 | 
					if (NOT NO_CBLAS)
 | 
				
			||||||
 | 
					  set(LIB_COMPONENTS "${LIB_COMPONENTS} CBLAS")
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (NOT NO_LAPACK)
 | 
				
			||||||
 | 
					  set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACK")
 | 
				
			||||||
 | 
					  if (NOT NO_LAPACKE)
 | 
				
			||||||
 | 
					    set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (ONLY_CBLAS)
 | 
				
			||||||
 | 
					  set(LIB_COMPONENTS CBLAS)
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# For GEMM3M
 | 
				
			||||||
 | 
					set(USE_GEMM3M 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (DEFINED ARCH)
 | 
				
			||||||
 | 
					  if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
 | 
				
			||||||
 | 
					    set(USE_GEMM3M 1)
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (${CORE} STREQUAL "generic")
 | 
				
			||||||
 | 
					    set(USE_GEMM3M 0)
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#export OSNAME
 | 
				
			||||||
 | 
					#export ARCH
 | 
				
			||||||
 | 
					#export CORE
 | 
				
			||||||
 | 
					#export LIBCORE
 | 
				
			||||||
 | 
					#export PGCPATH
 | 
				
			||||||
 | 
					#export CONFIG
 | 
				
			||||||
 | 
					#export CC
 | 
				
			||||||
 | 
					#export FC
 | 
				
			||||||
 | 
					#export BU
 | 
				
			||||||
 | 
					#export FU
 | 
				
			||||||
 | 
					#export NEED2UNDERSCORES
 | 
				
			||||||
 | 
					#export USE_THREAD
 | 
				
			||||||
 | 
					#export NUM_THREADS
 | 
				
			||||||
 | 
					#export NUM_CORES
 | 
				
			||||||
 | 
					#export SMP
 | 
				
			||||||
 | 
					#export MAKEFILE_RULE
 | 
				
			||||||
 | 
					#export NEED_PIC
 | 
				
			||||||
 | 
					#export BINARY
 | 
				
			||||||
 | 
					#export BINARY32
 | 
				
			||||||
 | 
					#export BINARY64
 | 
				
			||||||
 | 
					#export F_COMPILER
 | 
				
			||||||
 | 
					#export C_COMPILER
 | 
				
			||||||
 | 
					#export USE_OPENMP
 | 
				
			||||||
 | 
					#export CROSS
 | 
				
			||||||
 | 
					#export CROSS_SUFFIX
 | 
				
			||||||
 | 
					#export NOFORTRAN
 | 
				
			||||||
 | 
					#export NO_FBLAS
 | 
				
			||||||
 | 
					#export EXTRALIB
 | 
				
			||||||
 | 
					#export CEXTRALIB
 | 
				
			||||||
 | 
					#export FEXTRALIB
 | 
				
			||||||
 | 
					#export HAVE_SSE
 | 
				
			||||||
 | 
					#export HAVE_SSE2
 | 
				
			||||||
 | 
					#export HAVE_SSE3
 | 
				
			||||||
 | 
					#export HAVE_SSSE3
 | 
				
			||||||
 | 
					#export HAVE_SSE4_1
 | 
				
			||||||
 | 
					#export HAVE_SSE4_2
 | 
				
			||||||
 | 
					#export HAVE_SSE4A
 | 
				
			||||||
 | 
					#export HAVE_SSE5
 | 
				
			||||||
 | 
					#export HAVE_AVX
 | 
				
			||||||
 | 
					#export HAVE_VFP
 | 
				
			||||||
 | 
					#export HAVE_VFPV3
 | 
				
			||||||
 | 
					#export HAVE_VFPV4
 | 
				
			||||||
 | 
					#export HAVE_NEON
 | 
				
			||||||
 | 
					#export KERNELDIR
 | 
				
			||||||
 | 
					#export FUNCTION_PROFILE
 | 
				
			||||||
 | 
					#export TARGET_CORE
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					#export SGEMM_UNROLL_M
 | 
				
			||||||
 | 
					#export SGEMM_UNROLL_N
 | 
				
			||||||
 | 
					#export DGEMM_UNROLL_M
 | 
				
			||||||
 | 
					#export DGEMM_UNROLL_N
 | 
				
			||||||
 | 
					#export QGEMM_UNROLL_M
 | 
				
			||||||
 | 
					#export QGEMM_UNROLL_N
 | 
				
			||||||
 | 
					#export CGEMM_UNROLL_M
 | 
				
			||||||
 | 
					#export CGEMM_UNROLL_N
 | 
				
			||||||
 | 
					#export ZGEMM_UNROLL_M
 | 
				
			||||||
 | 
					#export ZGEMM_UNROLL_N
 | 
				
			||||||
 | 
					#export XGEMM_UNROLL_M
 | 
				
			||||||
 | 
					#export XGEMM_UNROLL_N
 | 
				
			||||||
 | 
					#export CGEMM3M_UNROLL_M
 | 
				
			||||||
 | 
					#export CGEMM3M_UNROLL_N
 | 
				
			||||||
 | 
					#export ZGEMM3M_UNROLL_M
 | 
				
			||||||
 | 
					#export ZGEMM3M_UNROLL_N
 | 
				
			||||||
 | 
					#export XGEMM3M_UNROLL_M
 | 
				
			||||||
 | 
					#export XGEMM3M_UNROLL_N
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if (USE_CUDA)
 | 
				
			||||||
 | 
					#  export CUDADIR
 | 
				
			||||||
 | 
					#  export CUCC
 | 
				
			||||||
 | 
					#  export CUFLAGS
 | 
				
			||||||
 | 
					#  export CULIB
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					#.f.$(SUFFIX):
 | 
				
			||||||
 | 
					#	$(FC) $(FFLAGS) -c $<  -o $(@F)
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					#.f.$(PSUFFIX):
 | 
				
			||||||
 | 
					#	$(FC) $(FPFLAGS) -pg -c $<  -o $(@F)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# these are not cross-platform
 | 
				
			||||||
 | 
					#ifdef BINARY64
 | 
				
			||||||
 | 
					#PATHSCALEPATH	= /opt/pathscale/lib/3.1
 | 
				
			||||||
 | 
					#PGIPATH		= /opt/pgi/linux86-64/7.1-5/lib
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#PATHSCALEPATH	= /opt/pathscale/lib/3.1/32
 | 
				
			||||||
 | 
					#PGIPATH		= /opt/pgi/linux86/7.1-5/lib
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ACMLPATH	= /opt/acml/4.3.0
 | 
				
			||||||
 | 
					#ifneq ($(OSNAME), Darwin)
 | 
				
			||||||
 | 
					#MKLPATH         = /opt/intel/mkl/10.2.2.025/lib
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#MKLPATH         = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ATLASPATH	= /opt/atlas/3.9.17/opteron
 | 
				
			||||||
 | 
					#FLAMEPATH	= $(HOME)/flame/lib
 | 
				
			||||||
 | 
					#ifneq ($(OSNAME), SunOS)
 | 
				
			||||||
 | 
					#SUNPATH		= /opt/sunstudio12.1
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#SUNPATH		= /opt/SUNWspro
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,346 @@
 | 
				
			||||||
 | 
					# Functions to help with the OpenBLAS build
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Reads string from getarch into CMake vars. Format of getarch vars is VARNAME=VALUE
 | 
				
			||||||
 | 
					function(ParseGetArchVars GETARCH_IN)
 | 
				
			||||||
 | 
					  string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_IN}")
 | 
				
			||||||
 | 
					  foreach (GETARCH_LINE ${GETARCH_RESULT_LIST})
 | 
				
			||||||
 | 
					    # split the line into var and value, then assign the value to a CMake var
 | 
				
			||||||
 | 
					    string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}")
 | 
				
			||||||
 | 
					    list(GET SPLIT_VAR 0 VAR_NAME)
 | 
				
			||||||
 | 
					    list(GET SPLIT_VAR 1 VAR_VALUE)
 | 
				
			||||||
 | 
					    set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE)
 | 
				
			||||||
 | 
					  endforeach ()
 | 
				
			||||||
 | 
					endfunction ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Reads a Makefile into CMake vars.
 | 
				
			||||||
 | 
					macro(ParseMakefileVars MAKEFILE_IN)
 | 
				
			||||||
 | 
					  message(STATUS "Reading vars from ${MAKEFILE_IN}...")
 | 
				
			||||||
 | 
					  file(STRINGS ${MAKEFILE_IN} makefile_contents)
 | 
				
			||||||
 | 
					  foreach (makefile_line ${makefile_contents})
 | 
				
			||||||
 | 
					    string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}")
 | 
				
			||||||
 | 
					    if (NOT "${line_match}" STREQUAL "")
 | 
				
			||||||
 | 
					      set(var_name ${CMAKE_MATCH_1})
 | 
				
			||||||
 | 
					      set(var_value ${CMAKE_MATCH_2})
 | 
				
			||||||
 | 
					      # check for Makefile variables in the string, e.g. $(TSUFFIX)
 | 
				
			||||||
 | 
					      string(REGEX MATCHALL "\\$\\(([0-9_a-zA-Z]+)\\)" make_var_matches ${var_value})
 | 
				
			||||||
 | 
					      foreach (make_var ${make_var_matches})
 | 
				
			||||||
 | 
					        # strip out Makefile $() markup
 | 
				
			||||||
 | 
					        string(REGEX REPLACE "\\$\\(([0-9_a-zA-Z]+)\\)" "\\1" make_var ${make_var})
 | 
				
			||||||
 | 
					        # now replace the instance of the Makefile variable with the value of the CMake variable (note the double quote)
 | 
				
			||||||
 | 
					        string(REPLACE "$(${make_var})" "${${make_var}}" var_value ${var_value})
 | 
				
			||||||
 | 
					      endforeach ()
 | 
				
			||||||
 | 
					      set(${var_name} ${var_value})
 | 
				
			||||||
 | 
					    else ()
 | 
				
			||||||
 | 
					      string(REGEX MATCH "include \\$\\(KERNELDIR\\)/(.+)$" line_match "${makefile_line}")
 | 
				
			||||||
 | 
					      if (NOT "${line_match}" STREQUAL "")
 | 
				
			||||||
 | 
					        ParseMakefileVars(${KERNELDIR}/${CMAKE_MATCH_1})
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endforeach ()
 | 
				
			||||||
 | 
					endmacro ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Returns all combinations of the input list, as a list with colon-separated combinations
 | 
				
			||||||
 | 
					# E.g. input of A B C returns A B C A:B A:C B:C
 | 
				
			||||||
 | 
					# N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")).
 | 
				
			||||||
 | 
					# #param absent_codes codes to use when an element is absent from a combination. For example, if you have TRANS;UNIT;UPPER you may want the code to be NNL when nothing is present.
 | 
				
			||||||
 | 
					# @returns LIST_OUT a list of combinations
 | 
				
			||||||
 | 
					#          CODES_OUT a list of codes corresponding to each combination, with N meaning the item is not present, and the first letter of the list item meaning it is presen
 | 
				
			||||||
 | 
					function(AllCombinations list_in absent_codes_in)
 | 
				
			||||||
 | 
					  list(LENGTH list_in list_count)
 | 
				
			||||||
 | 
					  set(num_combos 1)
 | 
				
			||||||
 | 
					  # subtract 1 since we will iterate from 0 to num_combos
 | 
				
			||||||
 | 
					  math(EXPR num_combos "(${num_combos} << ${list_count}) - 1")
 | 
				
			||||||
 | 
					  set(LIST_OUT "")
 | 
				
			||||||
 | 
					  set(CODES_OUT "")
 | 
				
			||||||
 | 
					  foreach (c RANGE 0 ${num_combos})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    set(current_combo "")
 | 
				
			||||||
 | 
					    set(current_code "")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # this is a little ridiculous just to iterate through a list w/ indices
 | 
				
			||||||
 | 
					    math(EXPR last_list_index "${list_count} - 1")
 | 
				
			||||||
 | 
					    foreach (list_index RANGE 0 ${last_list_index})
 | 
				
			||||||
 | 
					      math(EXPR bit "1 << ${list_index}")
 | 
				
			||||||
 | 
					      math(EXPR combo_has_bit "${c} & ${bit}")
 | 
				
			||||||
 | 
					      list(GET list_in ${list_index} list_elem)
 | 
				
			||||||
 | 
					      if (combo_has_bit)
 | 
				
			||||||
 | 
					        if (current_combo)
 | 
				
			||||||
 | 
					          set(current_combo "${current_combo}:${list_elem}")
 | 
				
			||||||
 | 
					        else ()
 | 
				
			||||||
 | 
					          set(current_combo ${list_elem})
 | 
				
			||||||
 | 
					        endif ()
 | 
				
			||||||
 | 
					        string(SUBSTRING ${list_elem} 0 1 code_char)
 | 
				
			||||||
 | 
					      else ()
 | 
				
			||||||
 | 
					        list(GET absent_codes_in ${list_index} code_char)
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					      set(current_code "${current_code}${code_char}")
 | 
				
			||||||
 | 
					    endforeach ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (current_combo STREQUAL "")
 | 
				
			||||||
 | 
					      list(APPEND LIST_OUT " ") # Empty set is a valid combination, but CMake isn't appending the empty string for some reason, use a space
 | 
				
			||||||
 | 
					    else ()
 | 
				
			||||||
 | 
					      list(APPEND LIST_OUT ${current_combo})
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					    list(APPEND CODES_OUT ${current_code})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  endforeach ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  set(LIST_OUT ${LIST_OUT} PARENT_SCOPE)
 | 
				
			||||||
 | 
					  set(CODES_OUT ${CODES_OUT} PARENT_SCOPE)
 | 
				
			||||||
 | 
					endfunction ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition
 | 
				
			||||||
 | 
					# @param sources_in the source files to build from
 | 
				
			||||||
 | 
					# @param defines_in (optional) preprocessor definitions that will be applied to all objects
 | 
				
			||||||
 | 
					# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended.
 | 
				
			||||||
 | 
					#                           e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax"
 | 
				
			||||||
 | 
					# @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU)
 | 
				
			||||||
 | 
					# @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters)
 | 
				
			||||||
 | 
					# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
 | 
				
			||||||
 | 
					# @param complex_filename_scheme some routines have separate source files for complex and non-complex float types.
 | 
				
			||||||
 | 
					#                               0 - compiles for all types
 | 
				
			||||||
 | 
					#                               1 - compiles the sources for non-complex types only (SINGLE/DOUBLE)
 | 
				
			||||||
 | 
					#                               2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX)
 | 
				
			||||||
 | 
					#                               3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c)
 | 
				
			||||||
 | 
					#                               4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c)
 | 
				
			||||||
 | 
					#                               STRING - compiles only the given type (e.g. DOUBLE)
 | 
				
			||||||
 | 
					function(GenerateNamedObjects sources_in)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (DEFINED ARGV1)
 | 
				
			||||||
 | 
					    set(defines_in ${ARGV1})
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (DEFINED ARGV2 AND NOT "${ARGV2}" STREQUAL "")
 | 
				
			||||||
 | 
					    set(name_in ${ARGV2})
 | 
				
			||||||
 | 
					    # strip off extension for kernel files that pass in the object name.
 | 
				
			||||||
 | 
					    get_filename_component(name_in ${name_in} NAME_WE)
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (DEFINED ARGV3)
 | 
				
			||||||
 | 
					    set(use_cblas ${ARGV3})
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(use_cblas false)
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (DEFINED ARGV4)
 | 
				
			||||||
 | 
					    set(replace_last_with ${ARGV4})
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (DEFINED ARGV5)
 | 
				
			||||||
 | 
					    set(append_with ${ARGV5})
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (DEFINED ARGV6)
 | 
				
			||||||
 | 
					    set(no_float_type ${ARGV6})
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(no_float_type false)
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (no_float_type)
 | 
				
			||||||
 | 
					    set(float_list "DUMMY") # still need to loop once
 | 
				
			||||||
 | 
					  else ()
 | 
				
			||||||
 | 
					    set(float_list "${FLOAT_TYPES}")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  set(real_only false)
 | 
				
			||||||
 | 
					  set(complex_only false)
 | 
				
			||||||
 | 
					  set(mangle_complex_sources false)
 | 
				
			||||||
 | 
					  if (DEFINED ARGV7 AND NOT "${ARGV7}" STREQUAL "")
 | 
				
			||||||
 | 
					    if (${ARGV7} EQUAL 1)
 | 
				
			||||||
 | 
					      set(real_only true)
 | 
				
			||||||
 | 
					    elseif (${ARGV7} EQUAL 2)
 | 
				
			||||||
 | 
					      set(complex_only true)
 | 
				
			||||||
 | 
					    elseif (${ARGV7} EQUAL 3)
 | 
				
			||||||
 | 
					      set(mangle_complex_sources true)
 | 
				
			||||||
 | 
					    elseif (${ARGV7} EQUAL 4)
 | 
				
			||||||
 | 
					      set(mangle_complex_sources true)
 | 
				
			||||||
 | 
					      set(complex_only true)
 | 
				
			||||||
 | 
					    elseif (NOT ${ARGV7} EQUAL 0)
 | 
				
			||||||
 | 
					      set(float_list ${ARGV7})
 | 
				
			||||||
 | 
					    endif ()
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (complex_only)
 | 
				
			||||||
 | 
					    list(REMOVE_ITEM float_list "SINGLE")
 | 
				
			||||||
 | 
					    list(REMOVE_ITEM float_list "DOUBLE")
 | 
				
			||||||
 | 
					  elseif (real_only)
 | 
				
			||||||
 | 
					    list(REMOVE_ITEM float_list "COMPLEX")
 | 
				
			||||||
 | 
					    list(REMOVE_ITEM float_list "ZCOMPLEX")
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  set(float_char "")
 | 
				
			||||||
 | 
					  set(OBJ_LIST_OUT "")
 | 
				
			||||||
 | 
					  foreach (float_type ${float_list})
 | 
				
			||||||
 | 
					    foreach (source_file ${sources_in})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if (NOT no_float_type)
 | 
				
			||||||
 | 
					        string(SUBSTRING ${float_type} 0 1 float_char)
 | 
				
			||||||
 | 
					        string(TOLOWER ${float_char} float_char)
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if (NOT name_in)
 | 
				
			||||||
 | 
					        get_filename_component(source_name ${source_file} NAME_WE)
 | 
				
			||||||
 | 
					        set(obj_name "${float_char}${source_name}")
 | 
				
			||||||
 | 
					      else ()
 | 
				
			||||||
 | 
					        # replace * with float_char
 | 
				
			||||||
 | 
					        if (${name_in} MATCHES "\\*")
 | 
				
			||||||
 | 
					          string(REPLACE "*" ${float_char} obj_name ${name_in})
 | 
				
			||||||
 | 
					        else ()
 | 
				
			||||||
 | 
					          set(obj_name "${float_char}${name_in}")
 | 
				
			||||||
 | 
					        endif ()
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if (replace_last_with)
 | 
				
			||||||
 | 
					        string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name})
 | 
				
			||||||
 | 
					      else ()
 | 
				
			||||||
 | 
					        set(obj_name "${obj_name}${append_with}")
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # now add the object and set the defines
 | 
				
			||||||
 | 
					      set(obj_defines ${defines_in})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if (use_cblas)
 | 
				
			||||||
 | 
					        set(obj_name "cblas_${obj_name}")
 | 
				
			||||||
 | 
					        list(APPEND obj_defines "CBLAS")
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"")
 | 
				
			||||||
 | 
					      if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX")
 | 
				
			||||||
 | 
					        list(APPEND obj_defines "DOUBLE")
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					      if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
 | 
				
			||||||
 | 
					        list(APPEND obj_defines "COMPLEX")
 | 
				
			||||||
 | 
					        if (mangle_complex_sources)
 | 
				
			||||||
 | 
					          # add a z to the filename
 | 
				
			||||||
 | 
					          get_filename_component(source_name ${source_file} NAME)
 | 
				
			||||||
 | 
					          get_filename_component(source_dir ${source_file} DIRECTORY)
 | 
				
			||||||
 | 
					          string(REPLACE ${source_name} "z${source_name}" source_file ${source_file})
 | 
				
			||||||
 | 
					        endif ()
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if (VERBOSE_GEN)
 | 
				
			||||||
 | 
					        message(STATUS "${obj_name}:${source_file}")
 | 
				
			||||||
 | 
					        message(STATUS "${obj_defines}")
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # create a copy of the source to avoid duplicate obj filename problem with ar.exe
 | 
				
			||||||
 | 
					      get_filename_component(source_extension ${source_file} EXT)
 | 
				
			||||||
 | 
					      set(new_source_file "${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${obj_name}${source_extension}")
 | 
				
			||||||
 | 
					      if (IS_ABSOLUTE ${source_file})
 | 
				
			||||||
 | 
					        set(old_source_file ${source_file})
 | 
				
			||||||
 | 
					      else ()
 | 
				
			||||||
 | 
					        set(old_source_file "${CMAKE_CURRENT_LIST_DIR}/${source_file}")
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      string(REPLACE ";" "\n#define " define_source "${obj_defines}")
 | 
				
			||||||
 | 
					      string(REPLACE "=" " " define_source "${define_source}")
 | 
				
			||||||
 | 
					      file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"")
 | 
				
			||||||
 | 
					      list(APPEND SRC_LIST_OUT ${new_source_file})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    endforeach ()
 | 
				
			||||||
 | 
					  endforeach ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  list(APPEND OPENBLAS_SRC ${SRC_LIST_OUT})
 | 
				
			||||||
 | 
					  set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE)
 | 
				
			||||||
 | 
					endfunction ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in
 | 
				
			||||||
 | 
					# @param sources_in the source files to build from
 | 
				
			||||||
 | 
					# @param defines_in the preprocessor definitions that will be combined to create the object files
 | 
				
			||||||
 | 
					# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects
 | 
				
			||||||
 | 
					# @param replace_scheme If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k.c with TRANS and UNIT defined will be symm_TU.
 | 
				
			||||||
 | 
					#                  If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU.
 | 
				
			||||||
 | 
					#                  If 2, it will append the code with an underscore, e.g. symm.c with TRANS and UNIT will be symm_TU.
 | 
				
			||||||
 | 
					#                  If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects).
 | 
				
			||||||
 | 
					#                  If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel
 | 
				
			||||||
 | 
					# @param alternate_name replaces the source name as the object name (define codes are still appended)
 | 
				
			||||||
 | 
					# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
 | 
				
			||||||
 | 
					# @param complex_filename_scheme see GenerateNamedObjects
 | 
				
			||||||
 | 
					function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  set(alternate_name_in "")
 | 
				
			||||||
 | 
					  if (DEFINED ARGV5)
 | 
				
			||||||
 | 
					    set(alternate_name_in ${ARGV5})
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  set(no_float_type false)
 | 
				
			||||||
 | 
					  if (DEFINED ARGV6)
 | 
				
			||||||
 | 
					    set(no_float_type ${ARGV6})
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  set(complex_filename_scheme "")
 | 
				
			||||||
 | 
					  if (DEFINED ARGV7)
 | 
				
			||||||
 | 
					    set(complex_filename_scheme ${ARGV7})
 | 
				
			||||||
 | 
					  endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  AllCombinations("${defines_in}" "${absent_codes_in}")
 | 
				
			||||||
 | 
					  set(define_combos ${LIST_OUT})
 | 
				
			||||||
 | 
					  set(define_codes ${CODES_OUT})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  list(LENGTH define_combos num_combos)
 | 
				
			||||||
 | 
					  math(EXPR num_combos "${num_combos} - 1")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  foreach (c RANGE 0 ${num_combos})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    list(GET define_combos ${c} define_combo)
 | 
				
			||||||
 | 
					    list(GET define_codes ${c} define_code)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    foreach (source_file ${sources_in})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      set(alternate_name ${alternate_name_in})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with
 | 
				
			||||||
 | 
					      string(REPLACE ":" ";" define_combo ${define_combo})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # now add the object and set the defines
 | 
				
			||||||
 | 
					      set(cur_defines ${define_combo})
 | 
				
			||||||
 | 
					      if ("${cur_defines}" STREQUAL " ")
 | 
				
			||||||
 | 
					        set(cur_defines ${all_defines_in})
 | 
				
			||||||
 | 
					      else ()
 | 
				
			||||||
 | 
					        list(APPEND cur_defines ${all_defines_in})
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      set(replace_code "")
 | 
				
			||||||
 | 
					      set(append_code "")
 | 
				
			||||||
 | 
					      if (replace_scheme EQUAL 1)
 | 
				
			||||||
 | 
					        set(replace_code ${define_code})
 | 
				
			||||||
 | 
					      else ()
 | 
				
			||||||
 | 
					        if (replace_scheme EQUAL 2)
 | 
				
			||||||
 | 
					          set(append_code "_${define_code}")
 | 
				
			||||||
 | 
					        elseif (replace_scheme EQUAL 3)
 | 
				
			||||||
 | 
					          if ("${alternate_name}" STREQUAL "")
 | 
				
			||||||
 | 
					            string(REGEX MATCH "[a-zA-Z]\\." last_letter ${source_file})
 | 
				
			||||||
 | 
					          else ()
 | 
				
			||||||
 | 
					            string(REGEX MATCH "[a-zA-Z]$" last_letter ${alternate_name})
 | 
				
			||||||
 | 
					          endif ()
 | 
				
			||||||
 | 
					          # first extract the last letter
 | 
				
			||||||
 | 
					          string(SUBSTRING ${last_letter} 0 1 last_letter) # remove period from match
 | 
				
			||||||
 | 
					          # break the code up into the first letter and the remaining (should only be 2 anyway)
 | 
				
			||||||
 | 
					          string(SUBSTRING ${define_code} 0 1 define_code_first)
 | 
				
			||||||
 | 
					          string(SUBSTRING ${define_code} 1 -1 define_code_second)
 | 
				
			||||||
 | 
					          set(replace_code "${define_code_first}${last_letter}${define_code_second}")
 | 
				
			||||||
 | 
					        elseif (replace_scheme EQUAL 4)
 | 
				
			||||||
 | 
					          # insert code before the last underscore and pass that in as the alternate_name
 | 
				
			||||||
 | 
					          if ("${alternate_name}" STREQUAL "")
 | 
				
			||||||
 | 
					            get_filename_component(alternate_name ${source_file} NAME_WE)
 | 
				
			||||||
 | 
					          endif ()
 | 
				
			||||||
 | 
					          set(extra_underscore "")
 | 
				
			||||||
 | 
					          # check if filename has two underscores, insert another if not (e.g. getrs_parallel needs to become getrs_U_parallel not getrsU_parallel)
 | 
				
			||||||
 | 
					          string(REGEX MATCH "_[a-zA-Z]+_" underscores ${alternate_name})
 | 
				
			||||||
 | 
					          string(LENGTH "${underscores}" underscores)
 | 
				
			||||||
 | 
					          if (underscores EQUAL 0)
 | 
				
			||||||
 | 
					            set(extra_underscore "_")
 | 
				
			||||||
 | 
					          endif ()
 | 
				
			||||||
 | 
					          string(REGEX REPLACE "(.+)(_[^_]+)$" "\\1${extra_underscore}${define_code}\\2" alternate_name ${alternate_name})
 | 
				
			||||||
 | 
					        else()
 | 
				
			||||||
 | 
					          set(append_code ${define_code}) # replace_scheme should be 0
 | 
				
			||||||
 | 
					        endif ()
 | 
				
			||||||
 | 
					      endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" false "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}")
 | 
				
			||||||
 | 
					    endforeach ()
 | 
				
			||||||
 | 
					  endforeach ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE)
 | 
				
			||||||
 | 
					endfunction ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										113
									
								
								common.h
								
								
								
								
							
							
						
						
									
										113
									
								
								common.h
								
								
								
								
							| 
						 | 
					@ -82,7 +82,10 @@ extern "C" {
 | 
				
			||||||
#include <stdio.h>
 | 
					#include <stdio.h>
 | 
				
			||||||
#include <stdlib.h>
 | 
					#include <stdlib.h>
 | 
				
			||||||
#include <string.h>
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if !defined(_MSC_VER)
 | 
				
			||||||
#include <unistd.h>
 | 
					#include <unistd.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef OS_LINUX
 | 
					#ifdef OS_LINUX
 | 
				
			||||||
#include <malloc.h>
 | 
					#include <malloc.h>
 | 
				
			||||||
| 
						 | 
					@ -93,6 +96,14 @@ extern "C" {
 | 
				
			||||||
#include <sched.h>
 | 
					#include <sched.h>
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef OS_ANDROID
 | 
				
			||||||
 | 
					#define NO_SYSV_IPC
 | 
				
			||||||
 | 
					//Android NDK only supports complex.h since Android 5.0
 | 
				
			||||||
 | 
					#if __ANDROID_API__ < 21
 | 
				
			||||||
 | 
					#define FORCE_OPENBLAS_COMPLEX_STRUCT
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef OS_WINDOWS
 | 
					#ifdef OS_WINDOWS
 | 
				
			||||||
#ifdef  ATOM
 | 
					#ifdef  ATOM
 | 
				
			||||||
#define GOTO_ATOM ATOM
 | 
					#define GOTO_ATOM ATOM
 | 
				
			||||||
| 
						 | 
					@ -106,8 +117,11 @@ extern "C" {
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
#include <sys/mman.h>
 | 
					#include <sys/mman.h>
 | 
				
			||||||
 | 
					#ifndef NO_SYSV_IPC
 | 
				
			||||||
#include <sys/shm.h>
 | 
					#include <sys/shm.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
#include <sys/time.h>
 | 
					#include <sys/time.h>
 | 
				
			||||||
 | 
					#include <time.h>
 | 
				
			||||||
#include <unistd.h>
 | 
					#include <unistd.h>
 | 
				
			||||||
#include <math.h>
 | 
					#include <math.h>
 | 
				
			||||||
#ifdef SMP
 | 
					#ifdef SMP
 | 
				
			||||||
| 
						 | 
					@ -287,13 +301,6 @@ typedef int blasint;
 | 
				
			||||||
#define COMPSIZE  2
 | 
					#define COMPSIZE  2
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(C_PGI) || defined(C_SUN)
 | 
					 | 
				
			||||||
#define CREAL(X)	(*((FLOAT *)&X + 0))
 | 
					 | 
				
			||||||
#define CIMAG(X)	(*((FLOAT *)&X + 1))
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
#define CREAL	__real__
 | 
					 | 
				
			||||||
#define CIMAG	__imag__
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define Address_H(x) (((x)+(1<<15))>>16)
 | 
					#define Address_H(x) (((x)+(1<<15))>>16)
 | 
				
			||||||
#define Address_L(x) ((x)-((Address_H(x))<<16))
 | 
					#define Address_L(x) ((x)-((Address_H(x))<<16))
 | 
				
			||||||
| 
						 | 
					@ -307,8 +314,12 @@ typedef int blasint;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(OS_WINDOWS)
 | 
					#if defined(OS_WINDOWS)
 | 
				
			||||||
 | 
					#if defined(_MSC_VER) && !defined(__clang__)
 | 
				
			||||||
 | 
					#define YIELDING    YieldProcessor()
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
#define YIELDING	SwitchToThread()
 | 
					#define YIELDING	SwitchToThread()
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
 | 
					#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
 | 
				
			||||||
#define YIELDING        asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
 | 
					#define YIELDING        asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
 | 
				
			||||||
| 
						 | 
					@ -404,7 +415,51 @@ typedef char env_var_t[MAX_PATH];
 | 
				
			||||||
typedef char* env_var_t;
 | 
					typedef char* env_var_t;
 | 
				
			||||||
#define readenv(p, n) ((p)=getenv(n))
 | 
					#define readenv(p, n) ((p)=getenv(n))
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS)
 | 
				
			||||||
 | 
					#ifdef _POSIX_MONOTONIC_CLOCK
 | 
				
			||||||
 | 
					#if defined(__GLIBC_PREREQ) // cut the if condition if two lines, otherwise will fail at __GLIBC_PREREQ(2, 17)
 | 
				
			||||||
 | 
					#if __GLIBC_PREREQ(2, 17) // don't require -lrt
 | 
				
			||||||
 | 
					#define USE_MONOTONIC
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					#elif defined(OS_ANDROID)
 | 
				
			||||||
 | 
					#define USE_MONOTONIC
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					/* use similar scale as x86 rdtsc for timeouts to work correctly */
 | 
				
			||||||
 | 
					static inline unsigned long long rpcc(void){
 | 
				
			||||||
 | 
					#ifdef USE_MONOTONIC
 | 
				
			||||||
 | 
					  struct timespec ts;
 | 
				
			||||||
 | 
					  clock_gettime(CLOCK_MONOTONIC, &ts);
 | 
				
			||||||
 | 
					  return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					  struct timeval tv;
 | 
				
			||||||
 | 
					  gettimeofday(&tv,NULL);
 | 
				
			||||||
 | 
					  return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#define RPCC_DEFINED
 | 
				
			||||||
 | 
					#define RPCC64BIT
 | 
				
			||||||
 | 
					#endif // !RPCC_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if !defined(BLAS_LOCK_DEFINED) && defined(__GNUC__)
 | 
				
			||||||
 | 
					static void __inline blas_lock(volatile BLASULONG *address){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  do {
 | 
				
			||||||
 | 
					    while (*address) {YIELDING;};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  } while (!__sync_bool_compare_and_swap(address, 0, 1));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#define BLAS_LOCK_DEFINED
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef RPCC_DEFINED
 | 
				
			||||||
 | 
					#error "rpcc() implementation is missing for your platform"
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifndef BLAS_LOCK_DEFINED
 | 
				
			||||||
 | 
					#error "blas_lock() implementation is missing for your platform"
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#endif // !ASSEMBLER
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef OS_LINUX
 | 
					#ifdef OS_LINUX
 | 
				
			||||||
#include "common_linux.h"
 | 
					#include "common_linux.h"
 | 
				
			||||||
| 
						 | 
					@ -450,18 +505,52 @@ typedef char* env_var_t;
 | 
				
			||||||
/* C99 supports complex floating numbers natively, which GCC also offers as an
 | 
					/* C99 supports complex floating numbers natively, which GCC also offers as an
 | 
				
			||||||
   extension since version 3.0.  If neither are available, use a compatible
 | 
					   extension since version 3.0.  If neither are available, use a compatible
 | 
				
			||||||
   structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
 | 
					   structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
 | 
				
			||||||
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
 | 
					#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
 | 
				
			||||||
     (__GNUC__ >= 3 && !defined(__cplusplus)))
 | 
					      (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT)))
 | 
				
			||||||
  #define OPENBLAS_COMPLEX_C99
 | 
					  #define OPENBLAS_COMPLEX_C99
 | 
				
			||||||
 | 
					  #ifndef __cplusplus
 | 
				
			||||||
 | 
					    #include <complex.h>
 | 
				
			||||||
 | 
					  #endif
 | 
				
			||||||
  typedef float _Complex openblas_complex_float;
 | 
					  typedef float _Complex openblas_complex_float;
 | 
				
			||||||
  typedef double _Complex openblas_complex_double;
 | 
					  typedef double _Complex openblas_complex_double;
 | 
				
			||||||
  typedef xdouble _Complex openblas_complex_xdouble;
 | 
					  typedef xdouble _Complex openblas_complex_xdouble;
 | 
				
			||||||
 | 
					  #define openblas_make_complex_float(real, imag)    ((real) + ((imag) * _Complex_I))
 | 
				
			||||||
 | 
					  #define openblas_make_complex_double(real, imag)   ((real) + ((imag) * _Complex_I))
 | 
				
			||||||
 | 
					  #define openblas_make_complex_xdouble(real, imag)  ((real) + ((imag) * _Complex_I))
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
  #define OPENBLAS_COMPLEX_STRUCT
 | 
					  #define OPENBLAS_COMPLEX_STRUCT
 | 
				
			||||||
  typedef struct { float real, imag; } openblas_complex_float;
 | 
					  typedef struct { float real, imag; } openblas_complex_float;
 | 
				
			||||||
  typedef struct { double real, imag; } openblas_complex_double;
 | 
					  typedef struct { double real, imag; } openblas_complex_double;
 | 
				
			||||||
  typedef struct { xdouble real, imag; } openblas_complex_xdouble;
 | 
					  typedef struct { xdouble real, imag; } openblas_complex_xdouble;
 | 
				
			||||||
 | 
					  #define openblas_make_complex_float(real, imag)    {(real), (imag)}
 | 
				
			||||||
 | 
					  #define openblas_make_complex_double(real, imag)   {(real), (imag)}
 | 
				
			||||||
 | 
					  #define openblas_make_complex_xdouble(real, imag)  {(real), (imag)}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef XDOUBLE
 | 
				
			||||||
 | 
					#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
 | 
				
			||||||
 | 
					#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
 | 
				
			||||||
 | 
					#elif defined(DOUBLE)
 | 
				
			||||||
 | 
					#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
 | 
				
			||||||
 | 
					#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
 | 
				
			||||||
 | 
					#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(C_PGI) || defined(C_SUN)
 | 
				
			||||||
 | 
					#define CREAL(X)	(*((FLOAT *)&X + 0))
 | 
				
			||||||
 | 
					#define CIMAG(X)	(*((FLOAT *)&X + 1))
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#ifdef OPENBLAS_COMPLEX_STRUCT
 | 
				
			||||||
 | 
					#define CREAL(Z)	((Z).real)
 | 
				
			||||||
 | 
					#define CIMAG(Z)	((Z).imag)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define CREAL	__real__
 | 
				
			||||||
 | 
					#define CIMAG	__imag__
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif  // ASSEMBLER
 | 
					#endif  // ASSEMBLER
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef IFLUSH
 | 
					#ifndef IFLUSH
 | 
				
			||||||
| 
						 | 
					@ -478,6 +567,10 @@ typedef char* env_var_t;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(C_MSVC)
 | 
				
			||||||
 | 
					#define inline __inline
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef ASSEMBLER
 | 
					#ifndef ASSEMBLER
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef MIN
 | 
					#ifndef MIN
 | 
				
			||||||
| 
						 | 
					@ -499,6 +592,8 @@ void  blas_set_parameter(void);
 | 
				
			||||||
int   blas_get_cpu_number(void);
 | 
					int   blas_get_cpu_number(void);
 | 
				
			||||||
void *blas_memory_alloc  (int);
 | 
					void *blas_memory_alloc  (int);
 | 
				
			||||||
void  blas_memory_free   (void *);
 | 
					void  blas_memory_free   (void *);
 | 
				
			||||||
 | 
					void *blas_memory_alloc_nolock  (int); //use malloc without blas_lock
 | 
				
			||||||
 | 
					void  blas_memory_free_nolock   (void *);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int  get_num_procs (void);
 | 
					int  get_num_procs (void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -76,6 +76,7 @@ static void __inline blas_lock(unsigned long *address){
 | 
				
			||||||
    "30:", address);
 | 
					    "30:", address);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define BLAS_LOCK_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline unsigned int rpcc(void){
 | 
					static __inline unsigned int rpcc(void){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -89,6 +90,7 @@ static __inline unsigned int rpcc(void){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return r0;
 | 
					  return r0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define RPCC_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define HALT 	ldq	$0, 0($0)
 | 
					#define HALT 	ldq	$0, 0($0)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										89
									
								
								common_arm.h
								
								
								
								
							
							
						
						
									
										89
									
								
								common_arm.h
								
								
								
								
							| 
						 | 
					@ -1,5 +1,5 @@
 | 
				
			||||||
/*****************************************************************************
 | 
					/*****************************************************************************
 | 
				
			||||||
Copyright (c) 2011-2014, The OpenBLAS Project
 | 
					Copyright (c) 2011-2015, The OpenBLAS Project
 | 
				
			||||||
All rights reserved.
 | 
					All rights reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Redistribution and use in source and binary forms, with or without
 | 
					Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
| 
						 | 
					@ -30,56 +30,29 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
				
			||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
					USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
**********************************************************************************/
 | 
					**********************************************************************************/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*********************************************************************/
 | 
					 | 
				
			||||||
/* Copyright 2009, 2010 The University of Texas at Austin.           */
 | 
					 | 
				
			||||||
/* All rights reserved.                                              */
 | 
					 | 
				
			||||||
/*                                                                   */
 | 
					 | 
				
			||||||
/* Redistribution and use in source and binary forms, with or        */
 | 
					 | 
				
			||||||
/* without modification, are permitted provided that the following   */
 | 
					 | 
				
			||||||
/* conditions are met:                                               */
 | 
					 | 
				
			||||||
/*                                                                   */
 | 
					 | 
				
			||||||
/*   1. Redistributions of source code must retain the above         */
 | 
					 | 
				
			||||||
/*      copyright notice, this list of conditions and the following  */
 | 
					 | 
				
			||||||
/*      disclaimer.                                                  */
 | 
					 | 
				
			||||||
/*                                                                   */
 | 
					 | 
				
			||||||
/*   2. Redistributions in binary form must reproduce the above      */
 | 
					 | 
				
			||||||
/*      copyright notice, this list of conditions and the following  */
 | 
					 | 
				
			||||||
/*      disclaimer in the documentation and/or other materials       */
 | 
					 | 
				
			||||||
/*      provided with the distribution.                              */
 | 
					 | 
				
			||||||
/*                                                                   */
 | 
					 | 
				
			||||||
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
 | 
					 | 
				
			||||||
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
 | 
					 | 
				
			||||||
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
 | 
					 | 
				
			||||||
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
 | 
					 | 
				
			||||||
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
 | 
					 | 
				
			||||||
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
 | 
					 | 
				
			||||||
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
 | 
					 | 
				
			||||||
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
 | 
					 | 
				
			||||||
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
 | 
					 | 
				
			||||||
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
 | 
					 | 
				
			||||||
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
 | 
					 | 
				
			||||||
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
 | 
					 | 
				
			||||||
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
 | 
					 | 
				
			||||||
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
 | 
					 | 
				
			||||||
/*                                                                   */
 | 
					 | 
				
			||||||
/* The views and conclusions contained in the software and           */
 | 
					 | 
				
			||||||
/* documentation are those of the authors and should not be          */
 | 
					 | 
				
			||||||
/* interpreted as representing official policies, either expressed   */
 | 
					 | 
				
			||||||
/* or implied, of The University of Texas at Austin.                 */
 | 
					 | 
				
			||||||
/*********************************************************************/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifndef COMMON_ARM
 | 
					#ifndef COMMON_ARM
 | 
				
			||||||
#define COMMON_ARM
 | 
					#define COMMON_ARM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(ARMV5) || defined(ARMV6)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define MB
 | 
					#define MB
 | 
				
			||||||
#define WMB
 | 
					#define WMB
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define MB   __asm__ __volatile__ ("dmb  ish" : : : "memory")
 | 
				
			||||||
 | 
					#define WMB  __asm__ __volatile__ ("dmb  ishst" : : : "memory")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define INLINE inline
 | 
					#define INLINE inline
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define RETURN_BY_COMPLEX
 | 
					#define RETURN_BY_COMPLEX
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef ASSEMBLER
 | 
					#ifndef ASSEMBLER
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(ARMV6) || defined(ARMV7) || defined(ARMV8)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void __inline blas_lock(volatile BLASULONG *address){
 | 
					static void __inline blas_lock(volatile BLASULONG *address){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int register ret;
 | 
					  int register ret;
 | 
				
			||||||
| 
						 | 
					@ -88,37 +61,29 @@ static void __inline blas_lock(volatile BLASULONG *address){
 | 
				
			||||||
    while (*address) {YIELDING;};
 | 
					    while (*address) {YIELDING;};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    __asm__ __volatile__(
 | 
					    __asm__ __volatile__(
 | 
				
			||||||
                         "ldrex r2, [%1]                                                \n\t"
 | 
					                         "ldrex r2, [%1]      \n\t"
 | 
				
			||||||
                         "mov   r2, #0                                                  \n\t"
 | 
					                         "strex %0, %2, [%1]  \n\t"
 | 
				
			||||||
                         "strex r3, r2, [%1]                                            \n\t"
 | 
					                         "orr   %0, r2        \n\t"
 | 
				
			||||||
			 "mov	%0 , r3							\n\t"
 | 
					                         : "=&r"(ret)
 | 
				
			||||||
                         : "=r"(ret), "=r"(address)
 | 
					                         : "r"(address), "r"(1)
 | 
				
			||||||
                         : "1"(address)
 | 
					                         : "memory", "r2"
 | 
				
			||||||
                         : "memory", "r2" , "r3"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  } while (ret);
 | 
					  } while (ret);
 | 
				
			||||||
 | 
					  MB;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define BLAS_LOCK_DEFINED
 | 
				
			||||||
static inline unsigned long long rpcc(void){
 | 
					#endif
 | 
				
			||||||
  unsigned long long ret=0;
 | 
					 | 
				
			||||||
  double v;
 | 
					 | 
				
			||||||
  struct timeval tv;
 | 
					 | 
				
			||||||
  gettimeofday(&tv,NULL);
 | 
					 | 
				
			||||||
  v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
 | 
					 | 
				
			||||||
  ret = (unsigned long long) ( v * 1000.0d );
 | 
					 | 
				
			||||||
  return ret;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int blas_quickdivide(blasint x, blasint y){
 | 
					static inline int blas_quickdivide(blasint x, blasint y){
 | 
				
			||||||
  return x / y;
 | 
					  return x / y;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(DOUBLE)
 | 
					#if !defined(HAVE_VFP)
 | 
				
			||||||
 | 
					/* no FPU, soft float */
 | 
				
			||||||
 | 
					#define GET_IMAGE(res)
 | 
				
			||||||
 | 
					#elif defined(DOUBLE)
 | 
				
			||||||
#define GET_IMAGE(res)  __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
 | 
					#define GET_IMAGE(res)  __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
#define GET_IMAGE(res)  __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
 | 
					#define GET_IMAGE(res)  __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
 | 
				
			||||||
| 
						 | 
					@ -166,4 +131,8 @@ REALNAME:
 | 
				
			||||||
#define MAP_ANONYMOUS MAP_ANON
 | 
					#define MAP_ANONYMOUS MAP_ANON
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if !defined(ARMV5) && !defined(ARMV6) && !defined(ARMV7) && !defined(ARMV8)
 | 
				
			||||||
 | 
					#error "you must define ARMV5, ARMV6, ARMV7 or ARMV8"
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,5 +1,5 @@
 | 
				
			||||||
/*****************************************************************************
 | 
					/*****************************************************************************
 | 
				
			||||||
Copyright (c) 2011-2014, The OpenBLAS Project
 | 
					Copyright (c) 2011-2015, The OpenBLAS Project
 | 
				
			||||||
All rights reserved.
 | 
					All rights reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Redistribution and use in source and binary forms, with or without
 | 
					Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
| 
						 | 
					@ -30,49 +30,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 | 
				
			||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
					USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
**********************************************************************************/
 | 
					**********************************************************************************/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*********************************************************************/
 | 
					 | 
				
			||||||
/* Copyright 2009, 2010 The University of Texas at Austin.           */
 | 
					 | 
				
			||||||
/* All rights reserved.                                              */
 | 
					 | 
				
			||||||
/*                                                                   */
 | 
					 | 
				
			||||||
/* Redistribution and use in source and binary forms, with or        */
 | 
					 | 
				
			||||||
/* without modification, are permitted provided that the following   */
 | 
					 | 
				
			||||||
/* conditions are met:                                               */
 | 
					 | 
				
			||||||
/*                                                                   */
 | 
					 | 
				
			||||||
/*   1. Redistributions of source code must retain the above         */
 | 
					 | 
				
			||||||
/*      copyright notice, this list of conditions and the following  */
 | 
					 | 
				
			||||||
/*      disclaimer.                                                  */
 | 
					 | 
				
			||||||
/*                                                                   */
 | 
					 | 
				
			||||||
/*   2. Redistributions in binary form must reproduce the above      */
 | 
					 | 
				
			||||||
/*      copyright notice, this list of conditions and the following  */
 | 
					 | 
				
			||||||
/*      disclaimer in the documentation and/or other materials       */
 | 
					 | 
				
			||||||
/*      provided with the distribution.                              */
 | 
					 | 
				
			||||||
/*                                                                   */
 | 
					 | 
				
			||||||
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
 | 
					 | 
				
			||||||
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
 | 
					 | 
				
			||||||
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
 | 
					 | 
				
			||||||
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
 | 
					 | 
				
			||||||
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
 | 
					 | 
				
			||||||
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
 | 
					 | 
				
			||||||
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
 | 
					 | 
				
			||||||
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
 | 
					 | 
				
			||||||
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
 | 
					 | 
				
			||||||
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
 | 
					 | 
				
			||||||
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
 | 
					 | 
				
			||||||
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
 | 
					 | 
				
			||||||
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
 | 
					 | 
				
			||||||
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
 | 
					 | 
				
			||||||
/*                                                                   */
 | 
					 | 
				
			||||||
/* The views and conclusions contained in the software and           */
 | 
					 | 
				
			||||||
/* documentation are those of the authors and should not be          */
 | 
					 | 
				
			||||||
/* interpreted as representing official policies, either expressed   */
 | 
					 | 
				
			||||||
/* or implied, of The University of Texas at Austin.                 */
 | 
					 | 
				
			||||||
/*********************************************************************/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifndef COMMON_ARM64
 | 
					#ifndef COMMON_ARM64
 | 
				
			||||||
#define COMMON_ARM64
 | 
					#define COMMON_ARM64
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define MB
 | 
					#define MB   __asm__ __volatile__ ("dmb  ish" : : : "memory")
 | 
				
			||||||
#define WMB
 | 
					#define WMB  __asm__ __volatile__ ("dmb  ishst" : : : "memory")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define INLINE inline
 | 
					#define INLINE inline
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -81,39 +44,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
#ifndef ASSEMBLER
 | 
					#ifndef ASSEMBLER
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void __inline blas_lock(volatile BLASULONG *address){
 | 
					static void __inline blas_lock(volatile BLASULONG *address){
 | 
				
			||||||
/*
 | 
					
 | 
				
			||||||
  int register ret;
 | 
					  long register ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  do {
 | 
					  do {
 | 
				
			||||||
    while (*address) {YIELDING;};
 | 
					    while (*address) {YIELDING;};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    __asm__ __volatile__(
 | 
					    __asm__ __volatile__(
 | 
				
			||||||
                         "ldrex r2, [%1]                                                \n\t"
 | 
					                         "ldaxr %0, [%1]      \n\t"
 | 
				
			||||||
                         "mov   r2, #0                                                  \n\t"
 | 
					                         "stlxr w2, %2, [%1]  \n\t"
 | 
				
			||||||
                         "strex r3, r2, [%1]                                            \n\t"
 | 
					                         "orr   %0, %0, x2    \n\t"
 | 
				
			||||||
			 "mov	%0 , r3							\n\t"
 | 
					                         : "=r"(ret)
 | 
				
			||||||
                         : "=r"(ret), "=r"(address)
 | 
					                         : "r"(address), "r"(1l)
 | 
				
			||||||
                         : "1"(address)
 | 
					                         : "memory", "x2"
 | 
				
			||||||
                         : "memory", "r2" , "r3"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  } while (ret);
 | 
					  } while (ret);
 | 
				
			||||||
*/
 | 
					  MB;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define BLAS_LOCK_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline unsigned long long rpcc(void){
 | 
					 | 
				
			||||||
  unsigned long long ret=0;
 | 
					 | 
				
			||||||
  double v;
 | 
					 | 
				
			||||||
  struct timeval tv;
 | 
					 | 
				
			||||||
  gettimeofday(&tv,NULL);
 | 
					 | 
				
			||||||
  v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
 | 
					 | 
				
			||||||
  ret = (unsigned long long) ( v * 1000.0d );
 | 
					 | 
				
			||||||
  return ret;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline int blas_quickdivide(blasint x, blasint y){
 | 
					static inline int blas_quickdivide(blasint x, blasint y){
 | 
				
			||||||
  return x / y;
 | 
					  return x / y;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -166,3 +117,4 @@ REALNAME:
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										19
									
								
								common_c.h
								
								
								
								
							
							
						
						
									
										19
									
								
								common_c.h
								
								
								
								
							| 
						 | 
					@ -220,6 +220,15 @@
 | 
				
			||||||
#define COMATCOPY_K_CTC         comatcopy_k_ctc
 | 
					#define COMATCOPY_K_CTC         comatcopy_k_ctc
 | 
				
			||||||
#define COMATCOPY_K_RTC         comatcopy_k_rtc
 | 
					#define COMATCOPY_K_RTC         comatcopy_k_rtc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_CN          cimatcopy_k_cn
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_RN          cimatcopy_k_rn
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_CT          cimatcopy_k_ct
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_RT          cimatcopy_k_rt
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_CNC         cimatcopy_k_cnc
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_RNC         cimatcopy_k_rnc
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_CTC         cimatcopy_k_ctc
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_RTC         cimatcopy_k_rtc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define CGEADD_K                cgeadd_k 
 | 
					#define CGEADD_K                cgeadd_k 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
| 
						 | 
					@ -403,6 +412,16 @@
 | 
				
			||||||
#define COMATCOPY_K_RNC         gotoblas -> comatcopy_k_rnc
 | 
					#define COMATCOPY_K_RNC         gotoblas -> comatcopy_k_rnc
 | 
				
			||||||
#define COMATCOPY_K_CTC         gotoblas -> comatcopy_k_ctc
 | 
					#define COMATCOPY_K_CTC         gotoblas -> comatcopy_k_ctc
 | 
				
			||||||
#define COMATCOPY_K_RTC         gotoblas -> comatcopy_k_rtc
 | 
					#define COMATCOPY_K_RTC         gotoblas -> comatcopy_k_rtc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_CN          gotoblas -> cimatcopy_k_cn
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_RN          gotoblas -> cimatcopy_k_rn
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_CT          gotoblas -> cimatcopy_k_ct
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_RT          gotoblas -> cimatcopy_k_rt
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_CNC         gotoblas -> cimatcopy_k_cnc
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_RNC         gotoblas -> cimatcopy_k_rnc
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_CTC         gotoblas -> cimatcopy_k_ctc
 | 
				
			||||||
 | 
					#define CIMATCOPY_K_RTC         gotoblas -> cimatcopy_k_rtc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define CGEADD_K                gotoblas -> cgeadd_k 
 | 
					#define CGEADD_K                gotoblas -> cgeadd_k 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -149,6 +149,11 @@
 | 
				
			||||||
#define DOMATCOPY_K_RN		domatcopy_k_rn
 | 
					#define DOMATCOPY_K_RN		domatcopy_k_rn
 | 
				
			||||||
#define DOMATCOPY_K_CT		domatcopy_k_ct
 | 
					#define DOMATCOPY_K_CT		domatcopy_k_ct
 | 
				
			||||||
#define DOMATCOPY_K_RT		domatcopy_k_rt
 | 
					#define DOMATCOPY_K_RT		domatcopy_k_rt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define DIMATCOPY_K_CN		dimatcopy_k_cn
 | 
				
			||||||
 | 
					#define DIMATCOPY_K_RN		dimatcopy_k_rn
 | 
				
			||||||
 | 
					#define DIMATCOPY_K_CT      dimatcopy_k_ct
 | 
				
			||||||
 | 
					#define DIMATCOPY_K_RT      dimatcopy_k_rt
 | 
				
			||||||
#define DGEADD_K                dgeadd_k 
 | 
					#define DGEADD_K                dgeadd_k 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
| 
						 | 
					@ -267,6 +272,10 @@
 | 
				
			||||||
#define DOMATCOPY_K_RN		gotoblas -> domatcopy_k_rn
 | 
					#define DOMATCOPY_K_RN		gotoblas -> domatcopy_k_rn
 | 
				
			||||||
#define DOMATCOPY_K_CT		gotoblas -> domatcopy_k_ct
 | 
					#define DOMATCOPY_K_CT		gotoblas -> domatcopy_k_ct
 | 
				
			||||||
#define DOMATCOPY_K_RT		gotoblas -> domatcopy_k_rt
 | 
					#define DOMATCOPY_K_RT		gotoblas -> domatcopy_k_rt
 | 
				
			||||||
 | 
					#define DIMATCOPY_K_CN		gotoblas -> dimatcopy_k_cn
 | 
				
			||||||
 | 
					#define DIMATCOPY_K_RN		gotoblas -> dimatcopy_k_rn
 | 
				
			||||||
 | 
					#define DIMATCOPY_K_CT		gotoblas -> dimatcopy_k_ct
 | 
				
			||||||
 | 
					#define DIMATCOPY_K_RT		gotoblas -> dimatcopy_k_rt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define DGEADD_K                gotoblas -> dgeadd_k 
 | 
					#define DGEADD_K                gotoblas -> dgeadd_k 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -68,6 +68,7 @@ static __inline void blas_lock(volatile unsigned long *address){
 | 
				
			||||||
			  : "ar.ccv", "memory");
 | 
								  : "ar.ccv", "memory");
 | 
				
			||||||
  } while (ret);
 | 
					  } while (ret);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define BLAS_LOCK_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline unsigned long rpcc(void) {
 | 
					static __inline unsigned long rpcc(void) {
 | 
				
			||||||
  unsigned long clocks;
 | 
					  unsigned long clocks;
 | 
				
			||||||
| 
						 | 
					@ -75,6 +76,7 @@ static __inline unsigned long rpcc(void) {
 | 
				
			||||||
  __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(clocks));
 | 
					  __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(clocks));
 | 
				
			||||||
  return clocks;
 | 
					  return clocks;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define RPCC_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline unsigned long stmxcsr(void){
 | 
					static __inline unsigned long stmxcsr(void){
 | 
				
			||||||
| 
						 | 
					@ -99,10 +101,12 @@ static __inline void blas_lock(volatile unsigned long *address){
 | 
				
			||||||
  while (*address || _InterlockedCompareExchange((volatile int *) address,1,0))
 | 
					  while (*address || _InterlockedCompareExchange((volatile int *) address,1,0))
 | 
				
			||||||
    ;
 | 
					    ;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define BLAS_LOCK_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline unsigned int rpcc(void) {
 | 
					static __inline unsigned int rpcc(void) {
 | 
				
			||||||
  return __getReg(_IA64_REG_AR_ITC);
 | 
					  return __getReg(_IA64_REG_AR_ITC);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define RPCC_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline unsigned int stmxcsr(void) {
 | 
					static __inline unsigned int stmxcsr(void) {
 | 
				
			||||||
  return __getReg(_IA64_REG_AR_FPSR);
 | 
					  return __getReg(_IA64_REG_AR_FPSR);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -47,12 +47,12 @@ double dsdot_k(BLASLONG, float   *, BLASLONG, float *, BLASLONG);
 | 
				
			||||||
double  ddot_k(BLASLONG, double  *, BLASLONG, double  *, BLASLONG);
 | 
					double  ddot_k(BLASLONG, double  *, BLASLONG, double  *, BLASLONG);
 | 
				
			||||||
xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
 | 
					xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
float   _Complex cdotc_k (BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
 | 
					openblas_complex_float cdotc_k (BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
 | 
				
			||||||
float   _Complex cdotu_k (BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
 | 
					openblas_complex_float cdotu_k (BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
 | 
				
			||||||
double  _Complex zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
 | 
					openblas_complex_double zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
 | 
				
			||||||
double  _Complex zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
 | 
					openblas_complex_double zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
 | 
				
			||||||
xdouble _Complex xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
 | 
					openblas_complex_xdouble xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
 | 
				
			||||||
xdouble _Complex xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
 | 
					openblas_complex_xdouble xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int    saxpy_k (BLASLONG, BLASLONG, BLASLONG, float,
 | 
					int    saxpy_k (BLASLONG, BLASLONG, BLASLONG, float,
 | 
				
			||||||
	       float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
 | 
						       float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1736,31 +1736,55 @@ int somatcopy_k_cn(BLASLONG, BLASLONG,  float, float *, BLASLONG, float  *, BLAS
 | 
				
			||||||
int somatcopy_k_rn(BLASLONG, BLASLONG,  float, float *, BLASLONG, float  *, BLASLONG);
 | 
					int somatcopy_k_rn(BLASLONG, BLASLONG,  float, float *, BLASLONG, float  *, BLASLONG);
 | 
				
			||||||
int somatcopy_k_ct(BLASLONG, BLASLONG,  float, float *, BLASLONG, float  *, BLASLONG);
 | 
					int somatcopy_k_ct(BLASLONG, BLASLONG,  float, float *, BLASLONG, float  *, BLASLONG);
 | 
				
			||||||
int somatcopy_k_rt(BLASLONG, BLASLONG,  float, float *, BLASLONG, float  *, BLASLONG);
 | 
					int somatcopy_k_rt(BLASLONG, BLASLONG,  float, float *, BLASLONG, float  *, BLASLONG);
 | 
				
			||||||
 | 
					int simatcopy_k_cn(BLASLONG, BLASLONG,  float, float *, BLASLONG);
 | 
				
			||||||
 | 
					int simatcopy_k_rn(BLASLONG, BLASLONG,  float, float *, BLASLONG);
 | 
				
			||||||
 | 
					int simatcopy_k_ct(BLASLONG, BLASLONG,  float, float *, BLASLONG);
 | 
				
			||||||
 | 
					int simatcopy_k_rt(BLASLONG, BLASLONG,  float, float *, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int domatcopy_k_cn(BLASLONG, BLASLONG,  double, double *, BLASLONG, double  *, BLASLONG);
 | 
					int domatcopy_k_cn(BLASLONG, BLASLONG,  double, double *, BLASLONG, double  *, BLASLONG);
 | 
				
			||||||
int domatcopy_k_rn(BLASLONG, BLASLONG,  double, double *, BLASLONG, double  *, BLASLONG);
 | 
					int domatcopy_k_rn(BLASLONG, BLASLONG,  double, double *, BLASLONG, double  *, BLASLONG);
 | 
				
			||||||
int domatcopy_k_ct(BLASLONG, BLASLONG,  double, double *, BLASLONG, double  *, BLASLONG);
 | 
					int domatcopy_k_ct(BLASLONG, BLASLONG,  double, double *, BLASLONG, double  *, BLASLONG);
 | 
				
			||||||
int domatcopy_k_rt(BLASLONG, BLASLONG,  double, double *, BLASLONG, double  *, BLASLONG);
 | 
					int domatcopy_k_rt(BLASLONG, BLASLONG,  double, double *, BLASLONG, double  *, BLASLONG);
 | 
				
			||||||
 | 
					int dimatcopy_k_cn(BLASLONG, BLASLONG,  double, double *, BLASLONG);
 | 
				
			||||||
 | 
					int dimatcopy_k_rn(BLASLONG, BLASLONG,  double, double *, BLASLONG);
 | 
				
			||||||
 | 
					int dimatcopy_k_ct(BLASLONG, BLASLONG,  double, double *, BLASLONG);
 | 
				
			||||||
 | 
					int dimatcopy_k_rt(BLASLONG, BLASLONG,  double, double *, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int comatcopy_k_cn(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
					int comatcopy_k_cn(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
				
			||||||
int comatcopy_k_rn(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
					int comatcopy_k_rn(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
				
			||||||
int comatcopy_k_ct(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
					int comatcopy_k_ct(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
				
			||||||
int comatcopy_k_rt(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
					int comatcopy_k_rt(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
				
			||||||
 | 
					int cimatcopy_k_cn(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
				
			||||||
 | 
					int cimatcopy_k_rn(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
				
			||||||
 | 
					int cimatcopy_k_ct(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
				
			||||||
 | 
					int cimatcopy_k_rt(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int comatcopy_k_cnc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
					int comatcopy_k_cnc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
				
			||||||
int comatcopy_k_rnc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
					int comatcopy_k_rnc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
				
			||||||
int comatcopy_k_ctc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
					int comatcopy_k_ctc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
				
			||||||
int comatcopy_k_rtc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
					int comatcopy_k_rtc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG, float  *, BLASLONG);
 | 
				
			||||||
 | 
					int cimatcopy_k_cnc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
				
			||||||
 | 
					int cimatcopy_k_rnc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
				
			||||||
 | 
					int cimatcopy_k_ctc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
				
			||||||
 | 
					int cimatcopy_k_rtc(BLASLONG, BLASLONG,  float, float, float *, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int zomatcopy_k_cn(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
					int zomatcopy_k_cn(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
				
			||||||
int zomatcopy_k_rn(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
					int zomatcopy_k_rn(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
				
			||||||
int zomatcopy_k_ct(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
					int zomatcopy_k_ct(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
				
			||||||
int zomatcopy_k_rt(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
					int zomatcopy_k_rt(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
				
			||||||
 | 
					int zimatcopy_k_cn(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
				
			||||||
 | 
					int zimatcopy_k_rn(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
				
			||||||
 | 
					int zimatcopy_k_ct(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
				
			||||||
 | 
					int zimatcopy_k_rt(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int zomatcopy_k_cnc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
					int zomatcopy_k_cnc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
				
			||||||
int zomatcopy_k_rnc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
					int zomatcopy_k_rnc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
				
			||||||
int zomatcopy_k_ctc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
					int zomatcopy_k_ctc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
				
			||||||
int zomatcopy_k_rtc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
					int zomatcopy_k_rtc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG, double  *, BLASLONG);
 | 
				
			||||||
 | 
					int zimatcopy_k_cnc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
				
			||||||
 | 
					int zimatcopy_k_rnc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
				
			||||||
 | 
					int zimatcopy_k_ctc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
				
			||||||
 | 
					int zimatcopy_k_rtc(BLASLONG, BLASLONG,  double, double, double *, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG); 
 | 
					int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG); 
 | 
				
			||||||
int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG); 
 | 
					int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG); 
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -634,6 +634,11 @@
 | 
				
			||||||
#define OMATCOPY_K_RN		DOMATCOPY_K_RN
 | 
					#define OMATCOPY_K_RN		DOMATCOPY_K_RN
 | 
				
			||||||
#define OMATCOPY_K_CT		DOMATCOPY_K_CT
 | 
					#define OMATCOPY_K_CT		DOMATCOPY_K_CT
 | 
				
			||||||
#define OMATCOPY_K_RT		DOMATCOPY_K_RT
 | 
					#define OMATCOPY_K_RT		DOMATCOPY_K_RT
 | 
				
			||||||
 | 
					#define IMATCOPY_K_CN		DIMATCOPY_K_CN
 | 
				
			||||||
 | 
					#define IMATCOPY_K_RN		DIMATCOPY_K_RN
 | 
				
			||||||
 | 
					#define IMATCOPY_K_CT		DIMATCOPY_K_CT
 | 
				
			||||||
 | 
					#define IMATCOPY_K_RT		DIMATCOPY_K_RT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define GEADD_K                 DGEADD_K 
 | 
					#define GEADD_K                 DGEADD_K 
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -931,6 +936,10 @@
 | 
				
			||||||
#define OMATCOPY_K_RN		SOMATCOPY_K_RN
 | 
					#define OMATCOPY_K_RN		SOMATCOPY_K_RN
 | 
				
			||||||
#define OMATCOPY_K_CT		SOMATCOPY_K_CT
 | 
					#define OMATCOPY_K_CT		SOMATCOPY_K_CT
 | 
				
			||||||
#define OMATCOPY_K_RT		SOMATCOPY_K_RT
 | 
					#define OMATCOPY_K_RT		SOMATCOPY_K_RT
 | 
				
			||||||
 | 
					#define IMATCOPY_K_CN		SIMATCOPY_K_CN
 | 
				
			||||||
 | 
					#define IMATCOPY_K_RN		SIMATCOPY_K_RN
 | 
				
			||||||
 | 
					#define IMATCOPY_K_CT		SIMATCOPY_K_CT
 | 
				
			||||||
 | 
					#define IMATCOPY_K_RT		SIMATCOPY_K_RT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define GEADD_K 		SGEADD_K 
 | 
					#define GEADD_K 		SGEADD_K 
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					@ -1747,6 +1756,15 @@
 | 
				
			||||||
#define OMATCOPY_K_RNC		ZOMATCOPY_K_RNC
 | 
					#define OMATCOPY_K_RNC		ZOMATCOPY_K_RNC
 | 
				
			||||||
#define OMATCOPY_K_CTC		ZOMATCOPY_K_CTC
 | 
					#define OMATCOPY_K_CTC		ZOMATCOPY_K_CTC
 | 
				
			||||||
#define OMATCOPY_K_RTC		ZOMATCOPY_K_RTC
 | 
					#define OMATCOPY_K_RTC		ZOMATCOPY_K_RTC
 | 
				
			||||||
 | 
					#define IMATCOPY_K_CN		ZIMATCOPY_K_CN
 | 
				
			||||||
 | 
					#define IMATCOPY_K_RN		ZIMATCOPY_K_RN
 | 
				
			||||||
 | 
					#define IMATCOPY_K_CT		ZIMATCOPY_K_CT
 | 
				
			||||||
 | 
					#define IMATCOPY_K_RT		ZIMATCOPY_K_RT
 | 
				
			||||||
 | 
					#define IMATCOPY_K_CNC		ZIMATCOPY_K_CNC
 | 
				
			||||||
 | 
					#define IMATCOPY_K_RNC		ZIMATCOPY_K_RNC
 | 
				
			||||||
 | 
					#define IMATCOPY_K_CTC		ZIMATCOPY_K_CTC
 | 
				
			||||||
 | 
					#define IMATCOPY_K_RTC		ZIMATCOPY_K_RTC
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define GEADD_K                 ZGEADD_K 
 | 
					#define GEADD_K                 ZGEADD_K 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
| 
						 | 
					@ -2160,6 +2178,14 @@
 | 
				
			||||||
#define OMATCOPY_K_RNC		COMATCOPY_K_RNC
 | 
					#define OMATCOPY_K_RNC		COMATCOPY_K_RNC
 | 
				
			||||||
#define OMATCOPY_K_CTC		COMATCOPY_K_CTC
 | 
					#define OMATCOPY_K_CTC		COMATCOPY_K_CTC
 | 
				
			||||||
#define OMATCOPY_K_RTC		COMATCOPY_K_RTC
 | 
					#define OMATCOPY_K_RTC		COMATCOPY_K_RTC
 | 
				
			||||||
 | 
					#define IMATCOPY_K_CN		CIMATCOPY_K_CN
 | 
				
			||||||
 | 
					#define IMATCOPY_K_RN		CIMATCOPY_K_RN
 | 
				
			||||||
 | 
					#define IMATCOPY_K_CT		CIMATCOPY_K_CT
 | 
				
			||||||
 | 
					#define IMATCOPY_K_RT		CIMATCOPY_K_RT
 | 
				
			||||||
 | 
					#define IMATCOPY_K_CNC		CIMATCOPY_K_CNC
 | 
				
			||||||
 | 
					#define IMATCOPY_K_RNC		CIMATCOPY_K_RNC
 | 
				
			||||||
 | 
					#define IMATCOPY_K_CTC		CIMATCOPY_K_CTC
 | 
				
			||||||
 | 
					#define IMATCOPY_K_RTC		CIMATCOPY_K_RTC
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define GEADD_K                 CGEADD_K 
 | 
					#define GEADD_K                 CGEADD_K 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -98,6 +98,7 @@ static void INLINE blas_lock(volatile unsigned long *address){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  } while (ret);
 | 
					  } while (ret);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define BLAS_LOCK_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline unsigned int rpcc(void){
 | 
					static inline unsigned int rpcc(void){
 | 
				
			||||||
  unsigned long ret;
 | 
					  unsigned long ret;
 | 
				
			||||||
| 
						 | 
					@ -118,6 +119,7 @@ static inline unsigned int rpcc(void){
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
  return ret;
 | 
					  return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define RPCC_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
 | 
					#if defined(LOONGSON3A) || defined(LOONGSON3B)
 | 
				
			||||||
#ifndef NO_AFFINITY
 | 
					#ifndef NO_AFFINITY
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -855,6 +855,36 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
 | 
				
			||||||
  int    (*zomatcopy_k_rnc)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
 | 
					  int    (*zomatcopy_k_rnc)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
 | 
				
			||||||
  int    (*zomatcopy_k_rtc)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
 | 
					  int    (*zomatcopy_k_rtc)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int    (*simatcopy_k_cn)	(BLASLONG, BLASLONG, float, float*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*simatcopy_k_ct)	(BLASLONG, BLASLONG, float, float*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*simatcopy_k_rn)	(BLASLONG, BLASLONG, float, float*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*simatcopy_k_rt)	(BLASLONG, BLASLONG, float, float*, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int    (*dimatcopy_k_cn)	(BLASLONG, BLASLONG, double, double*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*dimatcopy_k_ct)	(BLASLONG, BLASLONG, double, double*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*dimatcopy_k_rn)	(BLASLONG, BLASLONG, double, double*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*dimatcopy_k_rt)	(BLASLONG, BLASLONG, double, double*, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int    (*cimatcopy_k_cn)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*cimatcopy_k_ct)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*cimatcopy_k_rn)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*cimatcopy_k_rt)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int    (*cimatcopy_k_cnc)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*cimatcopy_k_ctc)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*cimatcopy_k_rnc)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*cimatcopy_k_rtc)	(BLASLONG, BLASLONG, float, float, float*, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int    (*zimatcopy_k_cn)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*zimatcopy_k_ct)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*zimatcopy_k_rn)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*zimatcopy_k_rt)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int    (*zimatcopy_k_cnc)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*zimatcopy_k_ctc)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*zimatcopy_k_rnc)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
				
			||||||
 | 
					  int    (*zimatcopy_k_rtc)	(BLASLONG, BLASLONG, double, double, double*, BLASLONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int    (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); 
 | 
					  int    (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); 
 | 
				
			||||||
  int    (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); 
 | 
					  int    (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); 
 | 
				
			||||||
  int    (*cgeadd_k) (BLASLONG, BLASLONG, float, float,  float *,  BLASLONG, float, float, float *, BLASLONG); 
 | 
					  int    (*cgeadd_k) (BLASLONG, BLASLONG, float, float,  float *,  BLASLONG, float, float, float *, BLASLONG); 
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -87,6 +87,7 @@ static void INLINE blas_lock(volatile unsigned long *address){
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
  } while (ret);
 | 
					  } while (ret);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define BLAS_LOCK_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline unsigned long rpcc(void){
 | 
					static inline unsigned long rpcc(void){
 | 
				
			||||||
  unsigned long ret;
 | 
					  unsigned long ret;
 | 
				
			||||||
| 
						 | 
					@ -103,6 +104,7 @@ static inline unsigned long rpcc(void){
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define RPCC_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef __64BIT__
 | 
					#ifdef __64BIT__
 | 
				
			||||||
#define RPCC64BIT
 | 
					#define RPCC64BIT
 | 
				
			||||||
| 
						 | 
					@ -495,6 +497,15 @@ static inline int blas_quickdivide(blasint x, blasint y){
 | 
				
			||||||
REALNAME:
 | 
					REALNAME:
 | 
				
			||||||
#define EPILOGUE	.size	REALNAME, .-REALNAME
 | 
					#define EPILOGUE	.size	REALNAME, .-REALNAME
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
 | 
					#if _CALL_ELF == 2
 | 
				
			||||||
 | 
					#define PROLOGUE \
 | 
				
			||||||
 | 
						.section .text;\
 | 
				
			||||||
 | 
						.align 6;\
 | 
				
			||||||
 | 
						.globl	REALNAME;\
 | 
				
			||||||
 | 
						.type	REALNAME, @function;\
 | 
				
			||||||
 | 
					REALNAME:
 | 
				
			||||||
 | 
					#define EPILOGUE	.size	REALNAME, .-REALNAME
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
#define PROLOGUE \
 | 
					#define PROLOGUE \
 | 
				
			||||||
	.section .text;\
 | 
						.section .text;\
 | 
				
			||||||
	.align 5;\
 | 
						.align 5;\
 | 
				
			||||||
| 
						 | 
					@ -514,6 +525,7 @@ REALNAME:;\
 | 
				
			||||||
	.size	.REALNAME, .-.REALNAME; \
 | 
						.size	.REALNAME, .-.REALNAME; \
 | 
				
			||||||
	.section	.note.GNU-stack,"",@progbits
 | 
						.section	.note.GNU-stack,"",@progbits
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef PROFILE
 | 
					#ifdef PROFILE
 | 
				
			||||||
#ifndef __64BIT__
 | 
					#ifndef __64BIT__
 | 
				
			||||||
| 
						 | 
					@ -792,4 +804,25 @@ Lmcount$lazy_ptr:
 | 
				
			||||||
#ifndef MAP_ANONYMOUS
 | 
					#ifndef MAP_ANONYMOUS
 | 
				
			||||||
#define MAP_ANONYMOUS MAP_ANON
 | 
					#define MAP_ANONYMOUS MAP_ANON
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef OS_LINUX
 | 
				
			||||||
 | 
					#ifndef __64BIT__
 | 
				
			||||||
 | 
					#define FRAMESLOT(X) (((X) * 4) + 8)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#if _CALL_ELF == 2
 | 
				
			||||||
 | 
					#define FRAMESLOT(X) (((X) * 8) + 96)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define FRAMESLOT(X) (((X) * 8) + 112)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(OS_AIX) || defined(OS_DARWIN)
 | 
				
			||||||
 | 
					#ifndef __64BIT__
 | 
				
			||||||
 | 
					#define FRAMESLOT(X) (((X) * 4) + 56)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define FRAMESLOT(X) (((X) * 8) + 112)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -152,6 +152,10 @@
 | 
				
			||||||
#define SOMATCOPY_K_RN          somatcopy_k_rn
 | 
					#define SOMATCOPY_K_RN          somatcopy_k_rn
 | 
				
			||||||
#define SOMATCOPY_K_CT          somatcopy_k_ct
 | 
					#define SOMATCOPY_K_CT          somatcopy_k_ct
 | 
				
			||||||
#define SOMATCOPY_K_RT          somatcopy_k_rt
 | 
					#define SOMATCOPY_K_RT          somatcopy_k_rt
 | 
				
			||||||
 | 
					#define SIMATCOPY_K_CN          simatcopy_k_cn
 | 
				
			||||||
 | 
					#define SIMATCOPY_K_RN          simatcopy_k_rn
 | 
				
			||||||
 | 
					#define SIMATCOPY_K_CT          simatcopy_k_ct
 | 
				
			||||||
 | 
					#define SIMATCOPY_K_RT          simatcopy_k_rt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SGEADD_K                sgeadd_k 
 | 
					#define SGEADD_K                sgeadd_k 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -274,6 +278,10 @@
 | 
				
			||||||
#define SOMATCOPY_K_RN          gotoblas -> somatcopy_k_rn
 | 
					#define SOMATCOPY_K_RN          gotoblas -> somatcopy_k_rn
 | 
				
			||||||
#define SOMATCOPY_K_CT          gotoblas -> somatcopy_k_ct
 | 
					#define SOMATCOPY_K_CT          gotoblas -> somatcopy_k_ct
 | 
				
			||||||
#define SOMATCOPY_K_RT          gotoblas -> somatcopy_k_rt
 | 
					#define SOMATCOPY_K_RT          gotoblas -> somatcopy_k_rt
 | 
				
			||||||
 | 
					#define SIMATCOPY_K_CN          gotoblas -> simatcopy_k_cn
 | 
				
			||||||
 | 
					#define SIMATCOPY_K_RN          gotoblas -> simatcopy_k_rn
 | 
				
			||||||
 | 
					#define SIMATCOPY_K_CT          gotoblas -> simatcopy_k_ct
 | 
				
			||||||
 | 
					#define SIMATCOPY_K_RT          gotoblas -> simatcopy_k_rt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SGEADD_K                gotoblas -> sgeadd_k 
 | 
					#define SGEADD_K                gotoblas -> sgeadd_k 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -58,6 +58,7 @@ static void __inline blas_lock(volatile unsigned long *address){
 | 
				
			||||||
			 : "memory");
 | 
								 : "memory");
 | 
				
			||||||
  } while (ret);
 | 
					  } while (ret);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define BLAS_LOCK_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline unsigned long rpcc(void){
 | 
					static __inline unsigned long rpcc(void){
 | 
				
			||||||
  unsigned long clocks;
 | 
					  unsigned long clocks;
 | 
				
			||||||
| 
						 | 
					@ -66,6 +67,7 @@ static __inline unsigned long rpcc(void){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return clocks;
 | 
					  return clocks;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					#define RPCC_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef __64BIT__
 | 
					#ifdef __64BIT__
 | 
				
			||||||
#define RPCC64BIT
 | 
					#define RPCC64BIT
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										39
									
								
								common_x86.h
								
								
								
								
							
							
						
						
									
										39
									
								
								common_x86.h
								
								
								
								
							| 
						 | 
					@ -56,41 +56,67 @@ static void __inline blas_lock(volatile BLASULONG *address){
 | 
				
			||||||
  do {
 | 
					  do {
 | 
				
			||||||
    while (*address) {YIELDING;};
 | 
					    while (*address) {YIELDING;};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(_MSC_VER) && !defined(__clang__)
 | 
				
			||||||
 | 
						// use intrinsic instead of inline assembly
 | 
				
			||||||
 | 
						ret = _InterlockedExchange(address, 1);
 | 
				
			||||||
 | 
						// inline assembly
 | 
				
			||||||
 | 
						/*__asm {
 | 
				
			||||||
 | 
							mov eax, address
 | 
				
			||||||
 | 
							mov ebx, 1
 | 
				
			||||||
 | 
							xchg [eax], ebx
 | 
				
			||||||
 | 
							mov ret, ebx
 | 
				
			||||||
 | 
						}*/
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
    __asm__ __volatile__(
 | 
					    __asm__ __volatile__(
 | 
				
			||||||
			 "xchgl %0, %1\n"
 | 
								 "xchgl %0, %1\n"
 | 
				
			||||||
			 : "=r"(ret), "=m"(*address)
 | 
								 : "=r"(ret), "=m"(*address)
 | 
				
			||||||
			 : "0"(1), "m"(*address)
 | 
								 : "0"(1), "m"(*address)
 | 
				
			||||||
			 : "memory");
 | 
								 : "memory");
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  } while (ret);
 | 
					  } while (ret);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define BLAS_LOCK_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline unsigned long long rpcc(void){
 | 
					static __inline unsigned long long rpcc(void){
 | 
				
			||||||
 | 
					#if defined(_MSC_VER) && !defined(__clang__)
 | 
				
			||||||
 | 
					  return __rdtsc(); // use MSVC intrinsic
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
  unsigned int a, d;
 | 
					  unsigned int a, d;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
 | 
					  __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return ((unsigned long long)a + ((unsigned long long)d << 32));
 | 
					  return ((unsigned long long)a + ((unsigned long long)d << 32));
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					#define RPCC_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline unsigned long getstackaddr(void){
 | 
					static __inline unsigned long getstackaddr(void){
 | 
				
			||||||
 | 
					#if defined(_MSC_VER) && !defined(__clang__)
 | 
				
			||||||
 | 
					  return (unsigned long)_ReturnAddress(); // use MSVC intrinsic
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
  unsigned long addr;
 | 
					  unsigned long addr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  __asm__ __volatile__ ("mov %%esp, %0"
 | 
					  __asm__ __volatile__ ("mov %%esp, %0"
 | 
				
			||||||
			 : "=r"(addr) : : "memory");
 | 
								 : "=r"(addr) : : "memory");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return addr;
 | 
					  return addr;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline long double sqrt_long(long double val) {
 | 
					static __inline long double sqrt_long(long double val) {
 | 
				
			||||||
 | 
					#if defined(_MSC_VER) && !defined(__clang__)
 | 
				
			||||||
 | 
					  return sqrt(val); // not sure if this will use fsqrt
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
  long double result;
 | 
					  long double result;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  __asm__ __volatile__ ("fldt %1\n"
 | 
					  __asm__ __volatile__ ("fldt %1\n"
 | 
				
			||||||
		    "fsqrt\n"
 | 
							    "fsqrt\n"
 | 
				
			||||||
		    "fstpt %0\n" : "=m" (result) : "m"(val));
 | 
							    "fstpt %0\n" : "=m" (result) : "m"(val));
 | 
				
			||||||
  return result;
 | 
					  return result;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SQRT(a)  sqrt_long(a)
 | 
					#define SQRT(a)  sqrt_long(a)
 | 
				
			||||||
| 
						 | 
					@ -100,7 +126,7 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define WHEREAMI
 | 
					#define WHEREAMI
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int WhereAmI(void){
 | 
					static __inline int WhereAmI(void){
 | 
				
			||||||
  int eax, ebx, ecx, edx;
 | 
					  int eax, ebx, ecx, edx;
 | 
				
			||||||
  int apicid;
 | 
					  int apicid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -146,9 +172,14 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  y = blas_quick_divide_table[y];
 | 
					  y = blas_quick_divide_table[y];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(_MSC_VER) && !defined(__clang__)
 | 
				
			||||||
 | 
					  (void*)result;
 | 
				
			||||||
 | 
					  return x*y;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
  __asm__ __volatile__  ("mull %0" :"=d" (result) :"a"(x), "0" (y));
 | 
					  __asm__ __volatile__  ("mull %0" :"=d" (result) :"a"(x), "0" (y));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return result;
 | 
					  return result;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -171,7 +202,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
 | 
				
			||||||
#define MMXSTORE	movd
 | 
					#define MMXSTORE	movd
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
 | 
					#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
 | 
				
			||||||
//Enable some optimazation for barcelona.
 | 
					//Enable some optimazation for barcelona.
 | 
				
			||||||
#define BARCELONA_OPTIMIZATION
 | 
					#define BARCELONA_OPTIMIZATION
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					@ -284,8 +315,12 @@ REALNAME:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define PROFCODE
 | 
					#define PROFCODE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef __clang__
 | 
				
			||||||
 | 
					#define EPILOGUE .end
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
#define EPILOGUE .end	 REALNAME
 | 
					#define EPILOGUE .end	 REALNAME
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__)
 | 
					#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__)
 | 
				
			||||||
#define PROLOGUE \
 | 
					#define PROLOGUE \
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -41,6 +41,10 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef ASSEMBLER
 | 
					#ifndef ASSEMBLER
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef C_MSVC
 | 
				
			||||||
 | 
					#include <intrin.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef C_SUN
 | 
					#ifdef C_SUN
 | 
				
			||||||
#define	__asm__ __asm
 | 
					#define	__asm__ __asm
 | 
				
			||||||
#define	__volatile__
 | 
					#define	__volatile__
 | 
				
			||||||
| 
						 | 
					@ -61,30 +65,45 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void __inline blas_lock(volatile BLASULONG *address){
 | 
					static void __inline blas_lock(volatile BLASULONG *address){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef C_MSVC
 | 
				
			||||||
  int ret;
 | 
					  int ret;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					  BLASULONG ret;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  do {
 | 
					  do {
 | 
				
			||||||
    while (*address) {YIELDING;};
 | 
					    while (*address) {YIELDING;};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef C_MSVC
 | 
				
			||||||
    __asm__ __volatile__(
 | 
					    __asm__ __volatile__(
 | 
				
			||||||
			 "xchgl %0, %1\n"
 | 
								 "xchgl %0, %1\n"
 | 
				
			||||||
			 : "=r"(ret), "=m"(*address)
 | 
								 : "=r"(ret), "=m"(*address)
 | 
				
			||||||
			 : "0"(1), "m"(*address)
 | 
								 : "0"(1), "m"(*address)
 | 
				
			||||||
			 : "memory");
 | 
								 : "memory");
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    ret=InterlockedExchange64((volatile LONG64 *)(address), 1);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
  } while (ret);
 | 
					  } while (ret);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define BLAS_LOCK_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline BLASULONG rpcc(void){
 | 
					static __inline BLASULONG rpcc(void){
 | 
				
			||||||
 | 
					#ifdef C_MSVC
 | 
				
			||||||
 | 
					  return __rdtsc();
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
  BLASULONG a, d;
 | 
					  BLASULONG a, d;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
 | 
					  __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return ((BLASULONG)a + ((BLASULONG)d << 32));
 | 
					  return ((BLASULONG)a + ((BLASULONG)d << 32));
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define RPCC_DEFINED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define RPCC64BIT
 | 
					#define RPCC64BIT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef C_MSVC
 | 
				
			||||||
static __inline BLASULONG getstackaddr(void){
 | 
					static __inline BLASULONG getstackaddr(void){
 | 
				
			||||||
  BLASULONG addr;
 | 
					  BLASULONG addr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -93,22 +112,32 @@ static __inline BLASULONG getstackaddr(void){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return addr;
 | 
					  return addr;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
 | 
					static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef C_MSVC
 | 
				
			||||||
 | 
					  int cpuinfo[4];
 | 
				
			||||||
 | 
					  __cpuid(cpuinfo, op);
 | 
				
			||||||
 | 
					  *eax=cpuinfo[0];
 | 
				
			||||||
 | 
					  *ebx=cpuinfo[1];
 | 
				
			||||||
 | 
					  *ecx=cpuinfo[2];
 | 
				
			||||||
 | 
					  *edx=cpuinfo[3];
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
        __asm__ __volatile__("cpuid"
 | 
					        __asm__ __volatile__("cpuid"
 | 
				
			||||||
			     : "=a" (*eax),
 | 
								     : "=a" (*eax),
 | 
				
			||||||
			     "=b" (*ebx),
 | 
								     "=b" (*ebx),
 | 
				
			||||||
			     "=c" (*ecx),
 | 
								     "=c" (*ecx),
 | 
				
			||||||
			     "=d" (*edx)
 | 
								     "=d" (*edx)
 | 
				
			||||||
			     : "0" (op));
 | 
								     : "0" (op));
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
#define WHEREAMI
 | 
					#define WHEREAMI
 | 
				
			||||||
*/
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int WhereAmI(void){
 | 
					static __inline int WhereAmI(void){
 | 
				
			||||||
  int eax, ebx, ecx, edx;
 | 
					  int eax, ebx, ecx, edx;
 | 
				
			||||||
  int apicid;
 | 
					  int apicid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -150,10 +179,14 @@ static inline int WhereAmI(void){
 | 
				
			||||||
#define GET_IMAGE_CANCEL
 | 
					#define GET_IMAGE_CANCEL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef SMP
 | 
					#ifdef SMP
 | 
				
			||||||
#ifdef USE64BITINT
 | 
					#if defined(USE64BITINT)
 | 
				
			||||||
static __inline blasint blas_quickdivide(blasint x, blasint y){
 | 
					static __inline blasint blas_quickdivide(blasint x, blasint y){
 | 
				
			||||||
  return x / y;
 | 
					  return x / y;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#elif defined (C_MSVC)
 | 
				
			||||||
 | 
					static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){
 | 
				
			||||||
 | 
					  return x / y;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
extern unsigned int blas_quick_divide_table[];
 | 
					extern unsigned int blas_quick_divide_table[];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -226,7 +259,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef ASSEMBLER
 | 
					#ifdef ASSEMBLER
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
 | 
					#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
 | 
				
			||||||
//Enable some optimazation for barcelona.
 | 
					//Enable some optimazation for barcelona.
 | 
				
			||||||
#define BARCELONA_OPTIMIZATION
 | 
					#define BARCELONA_OPTIMIZATION
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										18
									
								
								common_z.h
								
								
								
								
							
							
						
						
									
										18
									
								
								common_z.h
								
								
								
								
							| 
						 | 
					@ -220,6 +220,15 @@
 | 
				
			||||||
#define ZOMATCOPY_K_CTC         zomatcopy_k_ctc
 | 
					#define ZOMATCOPY_K_CTC         zomatcopy_k_ctc
 | 
				
			||||||
#define ZOMATCOPY_K_RTC         zomatcopy_k_rtc
 | 
					#define ZOMATCOPY_K_RTC         zomatcopy_k_rtc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_CN          zimatcopy_k_cn
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_RN          zimatcopy_k_rn
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_CT          zimatcopy_k_ct
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_RT          zimatcopy_k_rt
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_CNC         zimatcopy_k_cnc
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_RNC         zimatcopy_k_rnc
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_CTC         zimatcopy_k_ctc
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_RTC         zimatcopy_k_rtc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define ZGEADD_K                zgeadd_k 
 | 
					#define ZGEADD_K                zgeadd_k 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
| 
						 | 
					@ -404,6 +413,15 @@
 | 
				
			||||||
#define ZOMATCOPY_K_CTC         gotoblas -> zomatcopy_k_ctc
 | 
					#define ZOMATCOPY_K_CTC         gotoblas -> zomatcopy_k_ctc
 | 
				
			||||||
#define ZOMATCOPY_K_RTC         gotoblas -> zomatcopy_k_rtc
 | 
					#define ZOMATCOPY_K_RTC         gotoblas -> zomatcopy_k_rtc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_CN          gotoblas -> zimatcopy_k_cn
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_RN          gotoblas -> zimatcopy_k_rn
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_CT          gotoblas -> zimatcopy_k_ct
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_RT          gotoblas -> zimatcopy_k_rt
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_CNC         gotoblas -> zimatcopy_k_cnc
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_RNC         gotoblas -> zimatcopy_k_rnc
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_CTC         gotoblas -> zimatcopy_k_ctc
 | 
				
			||||||
 | 
					#define ZIMATCOPY_K_RTC         gotoblas -> zimatcopy_k_rtc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define ZGEADD_K                gotoblas -> zgeadd_k
 | 
					#define ZGEADD_K                gotoblas -> zgeadd_k
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										8
									
								
								cpuid.h
								
								
								
								
							
							
						
						
									
										8
									
								
								cpuid.h
								
								
								
								
							| 
						 | 
					@ -39,6 +39,10 @@
 | 
				
			||||||
#ifndef CPUID_H
 | 
					#ifndef CPUID_H
 | 
				
			||||||
#define CPUID_H
 | 
					#define CPUID_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
 | 
				
			||||||
 | 
					#define INTEL_AMD
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define VENDOR_INTEL      1
 | 
					#define VENDOR_INTEL      1
 | 
				
			||||||
#define VENDOR_UMC        2
 | 
					#define VENDOR_UMC        2
 | 
				
			||||||
#define VENDOR_AMD        3
 | 
					#define VENDOR_AMD        3
 | 
				
			||||||
| 
						 | 
					@ -59,7 +63,7 @@
 | 
				
			||||||
#define FAMILY_PM     7
 | 
					#define FAMILY_PM     7
 | 
				
			||||||
#define FAMILY_IA64   8
 | 
					#define FAMILY_IA64   8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(__i386__) || defined(__x86_64__)
 | 
					#ifdef INTEL_AMD
 | 
				
			||||||
#define GET_EXFAMILY  1
 | 
					#define GET_EXFAMILY  1
 | 
				
			||||||
#define GET_EXMODEL   2
 | 
					#define GET_EXMODEL   2
 | 
				
			||||||
#define GET_TYPE      3
 | 
					#define GET_TYPE      3
 | 
				
			||||||
| 
						 | 
					@ -109,6 +113,7 @@
 | 
				
			||||||
#define CORE_PILEDRIVER  23
 | 
					#define CORE_PILEDRIVER  23
 | 
				
			||||||
#define CORE_HASWELL     24
 | 
					#define CORE_HASWELL     24
 | 
				
			||||||
#define CORE_STEAMROLLER 25
 | 
					#define CORE_STEAMROLLER 25
 | 
				
			||||||
 | 
					#define CORE_EXCAVATOR   26
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define HAVE_SSE      (1 <<  0)
 | 
					#define HAVE_SSE      (1 <<  0)
 | 
				
			||||||
#define HAVE_SSE2     (1 <<  1)
 | 
					#define HAVE_SSE2     (1 <<  1)
 | 
				
			||||||
| 
						 | 
					@ -203,5 +208,6 @@ typedef struct {
 | 
				
			||||||
#define CPUTYPE_PILEDRIVER              47
 | 
					#define CPUTYPE_PILEDRIVER              47
 | 
				
			||||||
#define CPUTYPE_HASWELL 		48
 | 
					#define CPUTYPE_HASWELL 		48
 | 
				
			||||||
#define CPUTYPE_STEAMROLLER 		49
 | 
					#define CPUTYPE_STEAMROLLER 		49
 | 
				
			||||||
 | 
					#define CPUTYPE_EXCAVATOR 		50
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -192,6 +192,7 @@ void get_cpuconfig(void)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
	       case CPU_CORTEXA9:
 | 
						       case CPU_CORTEXA9:
 | 
				
			||||||
    			printf("#define CORTEXA9\n");
 | 
					    			printf("#define CORTEXA9\n");
 | 
				
			||||||
 | 
					    			printf("#define ARMV7\n");
 | 
				
			||||||
    			printf("#define HAVE_VFP\n");
 | 
					    			printf("#define HAVE_VFP\n");
 | 
				
			||||||
    			printf("#define HAVE_VFPV3\n");
 | 
					    			printf("#define HAVE_VFPV3\n");
 | 
				
			||||||
			if ( get_feature("neon"))	printf("#define HAVE_NEON\n");
 | 
								if ( get_feature("neon"))	printf("#define HAVE_NEON\n");
 | 
				
			||||||
| 
						 | 
					@ -207,6 +208,7 @@ void get_cpuconfig(void)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	       case CPU_CORTEXA15:
 | 
						       case CPU_CORTEXA15:
 | 
				
			||||||
    			printf("#define CORTEXA15\n");
 | 
					    			printf("#define CORTEXA15\n");
 | 
				
			||||||
 | 
					    			printf("#define ARMV7\n");
 | 
				
			||||||
    			printf("#define HAVE_VFP\n");
 | 
					    			printf("#define HAVE_VFP\n");
 | 
				
			||||||
    			printf("#define HAVE_VFPV3\n");
 | 
					    			printf("#define HAVE_VFPV3\n");
 | 
				
			||||||
			if ( get_feature("neon"))	printf("#define HAVE_NEON\n");
 | 
								if ( get_feature("neon"))	printf("#define HAVE_NEON\n");
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -115,6 +115,7 @@ int detect(void){
 | 
				
			||||||
  if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
 | 
					  if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
 | 
				
			||||||
  if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
 | 
					  if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
 | 
				
			||||||
  if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
 | 
					  if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
 | 
				
			||||||
 | 
					  if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER6;
 | 
				
			||||||
  if (!strncasecmp(p, "Cell",   4)) return CPUTYPE_CELL;
 | 
					  if (!strncasecmp(p, "Cell",   4)) return CPUTYPE_CELL;
 | 
				
			||||||
  if (!strncasecmp(p, "7447",   4)) return CPUTYPE_PPCG4;
 | 
					  if (!strncasecmp(p, "7447",   4)) return CPUTYPE_PPCG4;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										185
									
								
								cpuid_x86.c
								
								
								
								
							
							
						
						
									
										185
									
								
								cpuid_x86.c
								
								
								
								
							| 
						 | 
					@ -40,6 +40,12 @@
 | 
				
			||||||
#include <string.h>
 | 
					#include <string.h>
 | 
				
			||||||
#include "cpuid.h"
 | 
					#include "cpuid.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(_MSC_VER) && !defined(__clang__)
 | 
				
			||||||
 | 
					#define C_INLINE __inline
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define C_INLINE inline
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
#ifdef NO_AVX
 | 
					#ifdef NO_AVX
 | 
				
			||||||
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
 | 
					#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
 | 
				
			||||||
| 
						 | 
					@ -53,12 +59,26 @@
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
*/
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(_MSC_VER) && !defined(__clang__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int cpuInfo[4] = {-1};
 | 
				
			||||||
 | 
					  __cpuid(cpuInfo, op);
 | 
				
			||||||
 | 
					  *eax = cpuInfo[0];
 | 
				
			||||||
 | 
					  *ebx = cpuInfo[1];
 | 
				
			||||||
 | 
					  *ecx = cpuInfo[2];
 | 
				
			||||||
 | 
					  *edx = cpuInfo[3];
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef CPUIDEMU
 | 
					#ifndef CPUIDEMU
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(__APPLE__) && defined(__i386__)
 | 
					#if defined(__APPLE__) && defined(__i386__)
 | 
				
			||||||
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
 | 
					void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
 | 
					static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
 | 
				
			||||||
#if defined(__i386__) && defined(__PIC__)
 | 
					#if defined(__i386__) && defined(__PIC__)
 | 
				
			||||||
  __asm__ __volatile__
 | 
					  __asm__ __volatile__
 | 
				
			||||||
    ("mov %%ebx, %%edi;"
 | 
					    ("mov %%ebx, %%edi;"
 | 
				
			||||||
| 
						 | 
					@ -115,14 +135,16 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int have_cpuid(void){
 | 
					#endif // _MSC_VER
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static C_INLINE int have_cpuid(void){
 | 
				
			||||||
  int eax, ebx, ecx, edx;
 | 
					  int eax, ebx, ecx, edx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  cpuid(0, &eax, &ebx, &ecx, &edx);
 | 
					  cpuid(0, &eax, &ebx, &ecx, &edx);
 | 
				
			||||||
  return eax;
 | 
					  return eax;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int have_excpuid(void){
 | 
					static C_INLINE int have_excpuid(void){
 | 
				
			||||||
  int eax, ebx, ecx, edx;
 | 
					  int eax, ebx, ecx, edx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
 | 
					  cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
 | 
				
			||||||
| 
						 | 
					@ -130,10 +152,14 @@ static inline int have_excpuid(void){
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef NO_AVX
 | 
					#ifndef NO_AVX
 | 
				
			||||||
static inline void xgetbv(int op, int * eax, int * edx){
 | 
					static C_INLINE void xgetbv(int op, int * eax, int * edx){
 | 
				
			||||||
  //Use binary code for xgetbv
 | 
					  //Use binary code for xgetbv
 | 
				
			||||||
 | 
					#if defined(_MSC_VER) && !defined(__clang__)
 | 
				
			||||||
 | 
					  *eax = __xgetbv(op);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
  __asm__ __volatile__
 | 
					  __asm__ __volatile__
 | 
				
			||||||
    (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
 | 
					    (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1098,6 +1124,16 @@ int get_cpuname(void){
 | 
				
			||||||
            return CPUTYPE_HASWELL;
 | 
					            return CPUTYPE_HASWELL;
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
	    return CPUTYPE_SANDYBRIDGE;
 | 
						    return CPUTYPE_SANDYBRIDGE;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					          else
 | 
				
			||||||
 | 
						    return CPUTYPE_NEHALEM;
 | 
				
			||||||
 | 
						case 13:
 | 
				
			||||||
 | 
						  //Broadwell
 | 
				
			||||||
 | 
					          if(support_avx())
 | 
				
			||||||
 | 
					#ifndef NO_AVX2
 | 
				
			||||||
 | 
					            return CPUTYPE_HASWELL;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						    return CPUTYPE_SANDYBRIDGE;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
          else
 | 
					          else
 | 
				
			||||||
	    return CPUTYPE_NEHALEM;
 | 
						    return CPUTYPE_NEHALEM;
 | 
				
			||||||
| 
						 | 
					@ -1112,11 +1148,57 @@ int get_cpuname(void){
 | 
				
			||||||
            return CPUTYPE_HASWELL;
 | 
					            return CPUTYPE_HASWELL;
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
	    return CPUTYPE_SANDYBRIDGE;
 | 
						    return CPUTYPE_SANDYBRIDGE;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					          else
 | 
				
			||||||
 | 
						    return CPUTYPE_NEHALEM;
 | 
				
			||||||
 | 
						case 7:
 | 
				
			||||||
 | 
						case 15:
 | 
				
			||||||
 | 
						  //Broadwell
 | 
				
			||||||
 | 
					          if(support_avx())
 | 
				
			||||||
 | 
					#ifndef NO_AVX2
 | 
				
			||||||
 | 
					            return CPUTYPE_HASWELL;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						    return CPUTYPE_SANDYBRIDGE;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					          else
 | 
				
			||||||
 | 
						    return CPUTYPE_NEHALEM;
 | 
				
			||||||
 | 
						case 14:
 | 
				
			||||||
 | 
						  //Skylake
 | 
				
			||||||
 | 
					          if(support_avx())
 | 
				
			||||||
 | 
					#ifndef NO_AVX2
 | 
				
			||||||
 | 
					            return CPUTYPE_HASWELL;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						    return CPUTYPE_SANDYBRIDGE;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
          else
 | 
					          else
 | 
				
			||||||
	    return CPUTYPE_NEHALEM;
 | 
						    return CPUTYPE_NEHALEM;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        break;
 | 
					        break;
 | 
				
			||||||
 | 
					      case 5:
 | 
				
			||||||
 | 
					        switch (model) {
 | 
				
			||||||
 | 
						case 6:
 | 
				
			||||||
 | 
						  //Broadwell
 | 
				
			||||||
 | 
					          if(support_avx())
 | 
				
			||||||
 | 
					#ifndef NO_AVX2
 | 
				
			||||||
 | 
					            return CPUTYPE_HASWELL;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						    return CPUTYPE_SANDYBRIDGE;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					          else
 | 
				
			||||||
 | 
						    return CPUTYPE_NEHALEM;
 | 
				
			||||||
 | 
						case 5:
 | 
				
			||||||
 | 
					        case 14:
 | 
				
			||||||
 | 
						  // Skylake
 | 
				
			||||||
 | 
					          if(support_avx())
 | 
				
			||||||
 | 
					#ifndef NO_AVX2
 | 
				
			||||||
 | 
					            return CPUTYPE_HASWELL;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						    return CPUTYPE_SANDYBRIDGE;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					          else
 | 
				
			||||||
 | 
						    return CPUTYPE_NEHALEM;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						break;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      break;
 | 
					      break;
 | 
				
			||||||
    case 0x7:
 | 
					    case 0x7:
 | 
				
			||||||
| 
						 | 
					@ -1163,11 +1245,20 @@ int get_cpuname(void){
 | 
				
			||||||
	  else
 | 
						  else
 | 
				
			||||||
	    return CPUTYPE_BARCELONA; //OS don't support AVX.
 | 
						    return CPUTYPE_BARCELONA; //OS don't support AVX.
 | 
				
			||||||
	case 0:
 | 
						case 0:
 | 
				
			||||||
	  if(support_avx())
 | 
						  switch(exmodel){
 | 
				
			||||||
	    return CPUTYPE_STEAMROLLER;
 | 
						  case 3:
 | 
				
			||||||
	  else
 | 
						    if(support_avx())
 | 
				
			||||||
	    return CPUTYPE_BARCELONA; //OS don't support AVX.
 | 
						      return CPUTYPE_STEAMROLLER;
 | 
				
			||||||
 | 
						    else
 | 
				
			||||||
 | 
						      return CPUTYPE_BARCELONA; //OS don't support AVX.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						  case 6:
 | 
				
			||||||
 | 
						    if(support_avx())
 | 
				
			||||||
 | 
						      return CPUTYPE_EXCAVATOR;
 | 
				
			||||||
 | 
						    else
 | 
				
			||||||
 | 
						      return CPUTYPE_BARCELONA; //OS don't support AVX.
 | 
				
			||||||
 | 
						  }
 | 
				
			||||||
 | 
						  break;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	break;
 | 
						break;
 | 
				
			||||||
      case  5:
 | 
					      case  5:
 | 
				
			||||||
| 
						 | 
					@ -1297,6 +1388,7 @@ static char *cpuname[] = {
 | 
				
			||||||
  "PILEDRIVER",
 | 
					  "PILEDRIVER",
 | 
				
			||||||
  "HASWELL",
 | 
					  "HASWELL",
 | 
				
			||||||
  "STEAMROLLER",
 | 
					  "STEAMROLLER",
 | 
				
			||||||
 | 
					  "EXCAVATOR",
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static char *lowercpuname[] = {
 | 
					static char *lowercpuname[] = {
 | 
				
			||||||
| 
						 | 
					@ -1349,6 +1441,7 @@ static char *lowercpuname[] = {
 | 
				
			||||||
  "piledriver",
 | 
					  "piledriver",
 | 
				
			||||||
  "haswell",
 | 
					  "haswell",
 | 
				
			||||||
  "steamroller",
 | 
					  "steamroller",
 | 
				
			||||||
 | 
					  "excavator",
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static char *corename[] = {
 | 
					static char *corename[] = {
 | 
				
			||||||
| 
						 | 
					@ -1378,6 +1471,7 @@ static char *corename[] = {
 | 
				
			||||||
  "PILEDRIVER",
 | 
					  "PILEDRIVER",
 | 
				
			||||||
  "HASWELL",
 | 
					  "HASWELL",
 | 
				
			||||||
  "STEAMROLLER",
 | 
					  "STEAMROLLER",
 | 
				
			||||||
 | 
					  "EXCAVATOR",
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static char *corename_lower[] = {
 | 
					static char *corename_lower[] = {
 | 
				
			||||||
| 
						 | 
					@ -1407,6 +1501,7 @@ static char *corename_lower[] = {
 | 
				
			||||||
  "piledriver",
 | 
					  "piledriver",
 | 
				
			||||||
  "haswell",
 | 
					  "haswell",
 | 
				
			||||||
  "steamroller",
 | 
					  "steamroller",
 | 
				
			||||||
 | 
					  "excavator",
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1525,6 +1620,16 @@ int get_coretype(void){
 | 
				
			||||||
            return CORE_HASWELL;
 | 
					            return CORE_HASWELL;
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
	    return CORE_SANDYBRIDGE;
 | 
						    return CORE_SANDYBRIDGE;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					          else
 | 
				
			||||||
 | 
						    return CORE_NEHALEM;
 | 
				
			||||||
 | 
						case 13:
 | 
				
			||||||
 | 
						  //broadwell
 | 
				
			||||||
 | 
					          if(support_avx())
 | 
				
			||||||
 | 
					#ifndef NO_AVX2
 | 
				
			||||||
 | 
					            return CORE_HASWELL;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						    return CORE_SANDYBRIDGE;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
          else
 | 
					          else
 | 
				
			||||||
	    return CORE_NEHALEM;
 | 
						    return CORE_NEHALEM;
 | 
				
			||||||
| 
						 | 
					@ -1539,11 +1644,57 @@ int get_coretype(void){
 | 
				
			||||||
            return CORE_HASWELL;
 | 
					            return CORE_HASWELL;
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
	    return CORE_SANDYBRIDGE;
 | 
						    return CORE_SANDYBRIDGE;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					          else
 | 
				
			||||||
 | 
						    return CORE_NEHALEM;
 | 
				
			||||||
 | 
						case 7:
 | 
				
			||||||
 | 
						case 15:
 | 
				
			||||||
 | 
						  //broadwell
 | 
				
			||||||
 | 
					          if(support_avx())
 | 
				
			||||||
 | 
					#ifndef NO_AVX2
 | 
				
			||||||
 | 
					            return CORE_HASWELL;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						    return CORE_SANDYBRIDGE;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					          else
 | 
				
			||||||
 | 
						    return CORE_NEHALEM;
 | 
				
			||||||
 | 
						case 14:
 | 
				
			||||||
 | 
						  //Skylake
 | 
				
			||||||
 | 
					          if(support_avx())
 | 
				
			||||||
 | 
					#ifndef NO_AVX2
 | 
				
			||||||
 | 
					            return CORE_HASWELL;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						    return CORE_SANDYBRIDGE;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
          else
 | 
					          else
 | 
				
			||||||
	    return CORE_NEHALEM;
 | 
						    return CORE_NEHALEM;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        break;
 | 
					        break;
 | 
				
			||||||
 | 
					      case 5:
 | 
				
			||||||
 | 
					        switch (model) {
 | 
				
			||||||
 | 
						case 6:
 | 
				
			||||||
 | 
						  //broadwell
 | 
				
			||||||
 | 
					          if(support_avx())
 | 
				
			||||||
 | 
					#ifndef NO_AVX2
 | 
				
			||||||
 | 
					            return CORE_HASWELL;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						    return CORE_SANDYBRIDGE;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					          else
 | 
				
			||||||
 | 
						    return CORE_NEHALEM;
 | 
				
			||||||
 | 
						case 5:
 | 
				
			||||||
 | 
						case 14:
 | 
				
			||||||
 | 
						  // Skylake
 | 
				
			||||||
 | 
					          if(support_avx())
 | 
				
			||||||
 | 
					#ifndef NO_AVX2
 | 
				
			||||||
 | 
					            return CORE_HASWELL;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						    return CORE_SANDYBRIDGE;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					          else
 | 
				
			||||||
 | 
						    return CORE_NEHALEM;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						break;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      break;
 | 
					      break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1574,10 +1725,20 @@ int get_coretype(void){
 | 
				
			||||||
	    return CORE_BARCELONA; //OS don't support AVX.
 | 
						    return CORE_BARCELONA; //OS don't support AVX.
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
	case 0:
 | 
						case 0:
 | 
				
			||||||
	  if(support_avx())
 | 
						  switch(exmodel){
 | 
				
			||||||
	    return CORE_STEAMROLLER;
 | 
						  case 3:
 | 
				
			||||||
	  else
 | 
						    if(support_avx())
 | 
				
			||||||
	    return CORE_BARCELONA; //OS don't support AVX.
 | 
						      return CORE_STEAMROLLER;
 | 
				
			||||||
 | 
						    else
 | 
				
			||||||
 | 
						      return CORE_BARCELONA; //OS don't support AVX.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						  case 6:
 | 
				
			||||||
 | 
						    if(support_avx())
 | 
				
			||||||
 | 
						      return CORE_EXCAVATOR;
 | 
				
			||||||
 | 
						    else
 | 
				
			||||||
 | 
						      return CORE_BARCELONA; //OS don't support AVX.
 | 
				
			||||||
 | 
						  }
 | 
				
			||||||
 | 
						  break;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										4
									
								
								ctest.c
								
								
								
								
							
							
						
						
									
										4
									
								
								ctest.c
								
								
								
								
							| 
						 | 
					@ -44,6 +44,10 @@ COMPILER_DEC
 | 
				
			||||||
COMPILER_GNU
 | 
					COMPILER_GNU
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(__ANDROID__)
 | 
				
			||||||
 | 
					OS_ANDROID
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(__linux__)
 | 
					#if defined(__linux__)
 | 
				
			||||||
OS_LINUX
 | 
					OS_LINUX
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,46 @@
 | 
				
			||||||
 | 
					include_directories(${CMAKE_SOURCE_DIR})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					enable_language(Fortran)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DADD${BU} -DCBLAS")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh
 | 
				
			||||||
 | 
					"$1 < $2\n"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					foreach(float_type ${FLOAT_TYPES})
 | 
				
			||||||
 | 
					  string(SUBSTRING ${float_type} 0 1 float_char_upper)
 | 
				
			||||||
 | 
					  string(TOLOWER ${float_char_upper} float_char)
 | 
				
			||||||
 | 
					  #level1
 | 
				
			||||||
 | 
					  add_executable(x${float_char}cblat1
 | 
				
			||||||
 | 
					    c_${float_char}blat1.f
 | 
				
			||||||
 | 
					    c_${float_char}blas1.c)
 | 
				
			||||||
 | 
					  target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}_static)
 | 
				
			||||||
 | 
					  add_test(NAME "x${float_char}cblat1"
 | 
				
			||||||
 | 
					    COMMAND "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat1")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  #level2
 | 
				
			||||||
 | 
					  add_executable(x${float_char}cblat2
 | 
				
			||||||
 | 
					    c_${float_char}blat2.f
 | 
				
			||||||
 | 
					    c_${float_char}blas2.c
 | 
				
			||||||
 | 
					    c_${float_char}2chke.c
 | 
				
			||||||
 | 
					    auxiliary.c
 | 
				
			||||||
 | 
					    c_xerbla.c
 | 
				
			||||||
 | 
					    constant.c)
 | 
				
			||||||
 | 
					  target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}_static)
 | 
				
			||||||
 | 
					  add_test(NAME "x${float_char}cblat2"
 | 
				
			||||||
 | 
					    COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat2" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  #level3
 | 
				
			||||||
 | 
					  add_executable(x${float_char}cblat3
 | 
				
			||||||
 | 
					    c_${float_char}blat3.f
 | 
				
			||||||
 | 
					    c_${float_char}blas3.c
 | 
				
			||||||
 | 
					    c_${float_char}3chke.c
 | 
				
			||||||
 | 
					    auxiliary.c
 | 
				
			||||||
 | 
					    c_xerbla.c
 | 
				
			||||||
 | 
					    constant.c)
 | 
				
			||||||
 | 
					  target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}_static)
 | 
				
			||||||
 | 
					  add_test(NAME "x${float_char}cblat3"
 | 
				
			||||||
 | 
					    COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat3" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endforeach()
 | 
				
			||||||
| 
						 | 
					@ -27,12 +27,18 @@ ctestl2o = c_cblas2.o c_c2chke.o auxiliary.o c_xerbla.o constant.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ctestl3o = c_cblas3.o c_c3chke.o auxiliary.o c_xerbla.o constant.o
 | 
					ctestl3o = c_cblas3.o c_c3chke.o auxiliary.o c_xerbla.o constant.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ctestl3o_3m = c_cblas3_3m.o c_c3chke_3m.o auxiliary.o c_xerbla.o constant.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ztestl1o = c_zblas1.o
 | 
					ztestl1o = c_zblas1.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o constant.o
 | 
					ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o constant.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o constant.o
 | 
					ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o constant.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ztestl3o_3m = c_zblas3_3m.o c_z3chke_3m.o auxiliary.o c_xerbla.o constant.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
all :: all1 all2 all3
 | 
					all :: all1 all2 all3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
 | 
					all1: xscblat1 xdcblat1 xccblat1 xzcblat1
 | 
				
			||||||
| 
						 | 
					@ -115,8 +121,8 @@ xccblat2: $(ctestl2o) c_cblat2.o $(TOPDIR)/$(LIBNAME)
 | 
				
			||||||
xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME)
 | 
					xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME)
 | 
				
			||||||
	$(FC) $(FLDFLAGS) -o xccblat3 c_cblat3.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
 | 
						$(FC) $(FLDFLAGS) -o xccblat3 c_cblat3.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
xccblat3_3m: $(ctestl3o) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
 | 
					xccblat3_3m: $(ctestl3o_3m) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
 | 
				
			||||||
	$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
 | 
						$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Double complex
 | 
					# Double complex
 | 
				
			||||||
xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME)
 | 
					xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME)
 | 
				
			||||||
| 
						 | 
					@ -127,8 +133,8 @@ xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME)
 | 
				
			||||||
	$(FC) $(FLDFLAGS) -o xzcblat3 c_zblat3.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
 | 
						$(FC) $(FLDFLAGS) -o xzcblat3 c_zblat3.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
xzcblat3_3m: $(ztestl3o) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
 | 
					xzcblat3_3m: $(ztestl3o_3m) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
 | 
				
			||||||
	$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
 | 
						$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
include $(TOPDIR)/Makefile.tail
 | 
					include $(TOPDIR)/Makefile.tail
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										230
									
								
								ctest/c_c3chke.c
								
								
								
								
							
							
						
						
									
										230
									
								
								ctest/c_c3chke.c
								
								
								
								
							| 
						 | 
					@ -46,235 +46,7 @@ void  F77_c3chke(char *  rout) {
 | 
				
			||||||
   }
 | 
					   }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   if (strncmp( sf,"cblas_cgemm3m"   ,13)==0) {
 | 
					   if (strncmp( sf,"cblas_cgemm"   ,11)==0) {
 | 
				
			||||||
      cblas_rout = "cblas_cgemm3"   ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      cblas_info = 1;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( INVALID,  CblasNoTrans, CblasNoTrans, 0, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 1;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( INVALID,  CblasNoTrans, CblasTrans, 0, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 1;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( INVALID,  CblasTrans, CblasNoTrans, 0, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 1;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( INVALID,  CblasTrans, CblasTrans, 0, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 2; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  INVALID, CblasNoTrans, 0, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 2; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  INVALID, CblasTrans, 0, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 3; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, INVALID, 0, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 3; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, INVALID, 0, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 4; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 4; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasTrans, INVALID, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 4; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasNoTrans, INVALID, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 4; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasTrans, INVALID, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 5; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 5; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasTrans, 0, INVALID, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 5; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasNoTrans, 0, INVALID, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 5; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasTrans, 0, INVALID, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 6; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 6; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasTrans, 0, 0, INVALID,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 6; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasNoTrans, 0, 0, INVALID,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 6; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasTrans, 0, 0, INVALID,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 9; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasNoTrans, 2, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 2 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 9; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasTrans, 2, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 2 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 9; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasNoTrans, 0, 0, 2,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 2, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 9; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasTrans, 0, 0, 2,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 11; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasNoTrans, 0, 0, 2,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 11; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasNoTrans, 0, 0, 2,
 | 
					 | 
				
			||||||
                   ALPHA, A, 2, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 11; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasTrans, 0, 2, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 11; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasTrans, 0, 2, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 14; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasNoTrans, 2, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 2, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 14; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasNoTrans, CblasTrans, 2, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 2, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 14; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasNoTrans, 2, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 14; RowMajorStrg = FALSE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasColMajor,  CblasTrans, CblasTrans, 2, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 4; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 4; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasTrans, INVALID, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 4; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasNoTrans, INVALID, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 4; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasTrans, INVALID, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 5; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 5; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasTrans, 0, INVALID, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 5; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasNoTrans, 0, INVALID, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 5; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasTrans, 0, INVALID, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 6; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 6; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasTrans, 0, 0, INVALID,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 6; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasNoTrans, 0, 0, INVALID,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 6; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasTrans, 0, 0, INVALID,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 9;  RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasNoTrans, 0, 0, 2,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 2 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 9; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasTrans, 0, 0, 2,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 2, BETA, C, 2 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 9; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasNoTrans, 2, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 2, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 9; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasTrans, 2, 0, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 11; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasNoTrans, 0, 2, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 11; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasNoTrans, 0, 2, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 2, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 11; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasTrans, 0, 0, 2,
 | 
					 | 
				
			||||||
                   ALPHA, A, 2, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 11; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasTrans, 0, 0, 2,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 14; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasNoTrans, 0, 2, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 2, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 14; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasNoTrans, CblasTrans, 0, 2, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 14; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasNoTrans, 0, 2, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 2, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
      cblas_info = 14; RowMajorStrg = TRUE;
 | 
					 | 
				
			||||||
      cblas_cgemm3m( CblasRowMajor,  CblasTrans, CblasTrans, 0, 2, 0,
 | 
					 | 
				
			||||||
                   ALPHA, A, 1, B, 1, BETA, C, 1 );
 | 
					 | 
				
			||||||
      chkxer();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   } else if (strncmp( sf,"cblas_cgemm"   ,11)==0) {
 | 
					 | 
				
			||||||
            cblas_rout = "cblas_cgemm"   ;
 | 
					            cblas_rout = "cblas_cgemm"   ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| 
						 | 
					@ -567,81 +567,3 @@ void F77_ctrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void F77_cgemm3m(int *order, char *transpa, char *transpb, int *m, int *n,
 | 
					 | 
				
			||||||
     int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
					 | 
				
			||||||
     CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
 | 
					 | 
				
			||||||
     CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  CBLAS_TEST_COMPLEX *A, *B, *C;
 | 
					 | 
				
			||||||
  int i,j,LDA, LDB, LDC;
 | 
					 | 
				
			||||||
  enum CBLAS_TRANSPOSE transa, transb;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  get_transpose_type(transpa, &transa);
 | 
					 | 
				
			||||||
  get_transpose_type(transpb, &transb);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  if (*order == TEST_ROW_MJR) {
 | 
					 | 
				
			||||||
     if (transa == CblasNoTrans) {
 | 
					 | 
				
			||||||
        LDA = *k+1;
 | 
					 | 
				
			||||||
        A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
					 | 
				
			||||||
        for( i=0; i<*m; i++ )
 | 
					 | 
				
			||||||
           for( j=0; j<*k; j++ ) {
 | 
					 | 
				
			||||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
					 | 
				
			||||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
					 | 
				
			||||||
           }
 | 
					 | 
				
			||||||
     }
 | 
					 | 
				
			||||||
     else {
 | 
					 | 
				
			||||||
        LDA = *m+1;
 | 
					 | 
				
			||||||
        A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
 | 
					 | 
				
			||||||
        for( i=0; i<*k; i++ )
 | 
					 | 
				
			||||||
           for( j=0; j<*m; j++ ) {
 | 
					 | 
				
			||||||
              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
					 | 
				
			||||||
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
					 | 
				
			||||||
           }
 | 
					 | 
				
			||||||
     }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
     if (transb == CblasNoTrans) {
 | 
					 | 
				
			||||||
        LDB = *n+1;
 | 
					 | 
				
			||||||
        B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
 | 
					 | 
				
			||||||
        for( i=0; i<*k; i++ )
 | 
					 | 
				
			||||||
           for( j=0; j<*n; j++ ) {
 | 
					 | 
				
			||||||
              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
					 | 
				
			||||||
              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
					 | 
				
			||||||
           }
 | 
					 | 
				
			||||||
     }
 | 
					 | 
				
			||||||
     else {
 | 
					 | 
				
			||||||
        LDB = *k+1;
 | 
					 | 
				
			||||||
        B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
 | 
					 | 
				
			||||||
        for( i=0; i<*n; i++ )
 | 
					 | 
				
			||||||
           for( j=0; j<*k; j++ ) {
 | 
					 | 
				
			||||||
              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
					 | 
				
			||||||
              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
					 | 
				
			||||||
           }
 | 
					 | 
				
			||||||
     }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
     LDC = *n+1;
 | 
					 | 
				
			||||||
     C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
 | 
					 | 
				
			||||||
     for( j=0; j<*n; j++ )
 | 
					 | 
				
			||||||
        for( i=0; i<*m; i++ ) {
 | 
					 | 
				
			||||||
           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
					 | 
				
			||||||
           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
     cblas_cgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
 | 
					 | 
				
			||||||
                  B, LDB, beta, C, LDC );
 | 
					 | 
				
			||||||
     for( j=0; j<*n; j++ )
 | 
					 | 
				
			||||||
        for( i=0; i<*m; i++ ) {
 | 
					 | 
				
			||||||
           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
					 | 
				
			||||||
           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
     free(A);
 | 
					 | 
				
			||||||
     free(B);
 | 
					 | 
				
			||||||
     free(C);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  else if (*order == TEST_COL_MJR)
 | 
					 | 
				
			||||||
     cblas_cgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
 | 
					 | 
				
			||||||
                  b, *ldb, beta, c, *ldc );
 | 
					 | 
				
			||||||
  else
 | 
					 | 
				
			||||||
     cblas_cgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
 | 
					 | 
				
			||||||
                  b, *ldb, beta, c, *ldc );
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,647 @@
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 *     Written by D.P. Manley, Digital Equipment Corporation.
 | 
				
			||||||
 | 
					 *     Prefixed "C_" to BLAS routines and their declarations.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *     Modified by T. H. Do, 4/15/98, SGI/CRAY Research.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#include "common.h"
 | 
				
			||||||
 | 
					#include "cblas_test.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define  TEST_COL_MJR	0
 | 
				
			||||||
 | 
					#define  TEST_ROW_MJR	1
 | 
				
			||||||
 | 
					#define  UNDEFINED     -1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void F77_cgemm(int *order, char *transpa, char *transpb, int *m, int *n,
 | 
				
			||||||
 | 
					     int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
				
			||||||
 | 
					     CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
 | 
				
			||||||
 | 
					     CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  CBLAS_TEST_COMPLEX *A, *B, *C;
 | 
				
			||||||
 | 
					  int i,j,LDA, LDB, LDC;
 | 
				
			||||||
 | 
					  enum CBLAS_TRANSPOSE transa, transb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  get_transpose_type(transpa, &transa);
 | 
				
			||||||
 | 
					  get_transpose_type(transpb, &transb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (*order == TEST_ROW_MJR) {
 | 
				
			||||||
 | 
					     if (transa == CblasNoTrans) {
 | 
				
			||||||
 | 
					        LDA = *k+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*k; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     else {
 | 
				
			||||||
 | 
					        LDA = *m+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        for( i=0; i<*k; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*m; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     if (transb == CblasNoTrans) {
 | 
				
			||||||
 | 
					        LDB = *n+1;
 | 
				
			||||||
 | 
					        B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
 | 
				
			||||||
 | 
					        for( i=0; i<*k; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*n; j++ ) {
 | 
				
			||||||
 | 
					              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
				
			||||||
 | 
					              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     else {
 | 
				
			||||||
 | 
					        LDB = *k+1;
 | 
				
			||||||
 | 
					        B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        for( i=0; i<*n; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*k; j++ ) {
 | 
				
			||||||
 | 
					              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
				
			||||||
 | 
					              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     LDC = *n+1;
 | 
				
			||||||
 | 
					     C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					     for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ ) {
 | 
				
			||||||
 | 
					           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
				
			||||||
 | 
					           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     cblas_cgemm( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
 | 
				
			||||||
 | 
					                  B, LDB, beta, C, LDC );
 | 
				
			||||||
 | 
					     for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ ) {
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     free(A);
 | 
				
			||||||
 | 
					     free(B);
 | 
				
			||||||
 | 
					     free(C);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  else if (*order == TEST_COL_MJR)
 | 
				
			||||||
 | 
					     cblas_cgemm( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
 | 
				
			||||||
 | 
					                  b, *ldb, beta, c, *ldc );
 | 
				
			||||||
 | 
					  else
 | 
				
			||||||
 | 
					     cblas_cgemm( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
 | 
				
			||||||
 | 
					                  b, *ldb, beta, c, *ldc );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void F77_chemm(int *order, char *rtlf, char *uplow, int *m, int *n,
 | 
				
			||||||
 | 
					        CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
				
			||||||
 | 
						CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
 | 
				
			||||||
 | 
					        CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  CBLAS_TEST_COMPLEX *A, *B, *C;
 | 
				
			||||||
 | 
					  int i,j,LDA, LDB, LDC;
 | 
				
			||||||
 | 
					  enum CBLAS_UPLO uplo;
 | 
				
			||||||
 | 
					  enum CBLAS_SIDE side;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  get_uplo_type(uplow,&uplo);
 | 
				
			||||||
 | 
					  get_side_type(rtlf,&side);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (*order == TEST_ROW_MJR) {
 | 
				
			||||||
 | 
					     if (side == CblasLeft) {
 | 
				
			||||||
 | 
					        LDA = *m+1;
 | 
				
			||||||
 | 
					        A= (CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*m; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     else{
 | 
				
			||||||
 | 
					        LDA = *n+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
				
			||||||
 | 
					        for( i=0; i<*n; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*n; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     LDB = *n+1;
 | 
				
			||||||
 | 
					     B=(CBLAS_TEST_COMPLEX* )malloc( (*m)*LDB*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
				
			||||||
 | 
					     for( i=0; i<*m; i++ )
 | 
				
			||||||
 | 
					        for( j=0; j<*n; j++ ) {
 | 
				
			||||||
 | 
					           B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
				
			||||||
 | 
					           B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     LDC = *n+1;
 | 
				
			||||||
 | 
					     C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
				
			||||||
 | 
					     for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ ) {
 | 
				
			||||||
 | 
					           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
				
			||||||
 | 
					           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     cblas_chemm( CblasRowMajor, side, uplo, *m, *n, alpha, A, LDA, B, LDB,
 | 
				
			||||||
 | 
					                  beta, C, LDC );
 | 
				
			||||||
 | 
					     for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ ) {
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     free(A);
 | 
				
			||||||
 | 
					     free(B);
 | 
				
			||||||
 | 
					     free(C);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  else if (*order == TEST_COL_MJR)
 | 
				
			||||||
 | 
					     cblas_chemm( CblasColMajor, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
 | 
				
			||||||
 | 
					                  beta, c, *ldc );
 | 
				
			||||||
 | 
					  else
 | 
				
			||||||
 | 
					     cblas_chemm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
 | 
				
			||||||
 | 
					                  beta, c, *ldc );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void F77_csymm(int *order, char *rtlf, char *uplow, int *m, int *n,
 | 
				
			||||||
 | 
					          CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
				
			||||||
 | 
						  CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
 | 
				
			||||||
 | 
					          CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  CBLAS_TEST_COMPLEX *A, *B, *C;
 | 
				
			||||||
 | 
					  int i,j,LDA, LDB, LDC;
 | 
				
			||||||
 | 
					  enum CBLAS_UPLO uplo;
 | 
				
			||||||
 | 
					  enum CBLAS_SIDE side;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  get_uplo_type(uplow,&uplo);
 | 
				
			||||||
 | 
					  get_side_type(rtlf,&side);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (*order == TEST_ROW_MJR) {
 | 
				
			||||||
 | 
					     if (side == CblasLeft) {
 | 
				
			||||||
 | 
					        LDA = *m+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*m; j++ )
 | 
				
			||||||
 | 
					              A[i*LDA+j]=a[j*(*lda)+i];
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     else{
 | 
				
			||||||
 | 
					        LDA = *n+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
				
			||||||
 | 
					        for( i=0; i<*n; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					              A[i*LDA+j]=a[j*(*lda)+i];
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     LDB = *n+1;
 | 
				
			||||||
 | 
					     B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX ));
 | 
				
			||||||
 | 
					     for( i=0; i<*m; i++ )
 | 
				
			||||||
 | 
					        for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					           B[i*LDB+j]=b[j*(*ldb)+i];
 | 
				
			||||||
 | 
					     LDC = *n+1;
 | 
				
			||||||
 | 
					     C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					     for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ )
 | 
				
			||||||
 | 
					           C[i*LDC+j]=c[j*(*ldc)+i];
 | 
				
			||||||
 | 
					     cblas_csymm( CblasRowMajor, side, uplo, *m, *n, alpha, A, LDA, B, LDB,
 | 
				
			||||||
 | 
					                  beta, C, LDC );
 | 
				
			||||||
 | 
					     for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ )
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i]=C[i*LDC+j];
 | 
				
			||||||
 | 
					     free(A);
 | 
				
			||||||
 | 
					     free(B);
 | 
				
			||||||
 | 
					     free(C);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  else if (*order == TEST_COL_MJR)
 | 
				
			||||||
 | 
					     cblas_csymm( CblasColMajor, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
 | 
				
			||||||
 | 
					                  beta, c, *ldc );
 | 
				
			||||||
 | 
					  else
 | 
				
			||||||
 | 
					     cblas_csymm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
 | 
				
			||||||
 | 
					                  beta, c, *ldc );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void F77_cherk(int *order, char *uplow, char *transp, int *n, int *k,
 | 
				
			||||||
 | 
					     float *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
				
			||||||
 | 
					     float *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int i,j,LDA,LDC;
 | 
				
			||||||
 | 
					  CBLAS_TEST_COMPLEX *A, *C;
 | 
				
			||||||
 | 
					  enum CBLAS_UPLO uplo;
 | 
				
			||||||
 | 
					  enum CBLAS_TRANSPOSE trans;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  get_uplo_type(uplow,&uplo);
 | 
				
			||||||
 | 
					  get_transpose_type(transp,&trans);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (*order == TEST_ROW_MJR) {
 | 
				
			||||||
 | 
					     if (trans == CblasNoTrans) {
 | 
				
			||||||
 | 
					        LDA = *k+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
				
			||||||
 | 
					        for( i=0; i<*n; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*k; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     else{
 | 
				
			||||||
 | 
					        LDA = *n+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
				
			||||||
 | 
					        for( i=0; i<*k; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*n; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     LDC = *n+1;
 | 
				
			||||||
 | 
					     C=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
				
			||||||
 | 
					     for( i=0; i<*n; i++ )
 | 
				
			||||||
 | 
					        for( j=0; j<*n; j++ ) {
 | 
				
			||||||
 | 
					           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
				
			||||||
 | 
					           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     cblas_cherk(CblasRowMajor, uplo, trans, *n, *k, *alpha, A, LDA, *beta,
 | 
				
			||||||
 | 
						         C, LDC );
 | 
				
			||||||
 | 
					     for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					        for( i=0; i<*n; i++ ) {
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     free(A);
 | 
				
			||||||
 | 
					     free(C);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  else if (*order == TEST_COL_MJR)
 | 
				
			||||||
 | 
					     cblas_cherk(CblasColMajor, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
 | 
				
			||||||
 | 
						         c, *ldc );
 | 
				
			||||||
 | 
					  else
 | 
				
			||||||
 | 
					     cblas_cherk(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
 | 
				
			||||||
 | 
						         c, *ldc );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void F77_csyrk(int *order, char *uplow, char *transp, int *n, int *k,
 | 
				
			||||||
 | 
					     CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
				
			||||||
 | 
					     CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int i,j,LDA,LDC;
 | 
				
			||||||
 | 
					  CBLAS_TEST_COMPLEX *A, *C;
 | 
				
			||||||
 | 
					  enum CBLAS_UPLO uplo;
 | 
				
			||||||
 | 
					  enum CBLAS_TRANSPOSE trans;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  get_uplo_type(uplow,&uplo);
 | 
				
			||||||
 | 
					  get_transpose_type(transp,&trans);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (*order == TEST_ROW_MJR) {
 | 
				
			||||||
 | 
					     if (trans == CblasNoTrans) {
 | 
				
			||||||
 | 
					        LDA = *k+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        for( i=0; i<*n; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*k; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     else{
 | 
				
			||||||
 | 
					        LDA = *n+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
				
			||||||
 | 
					        for( i=0; i<*k; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*n; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     LDC = *n+1;
 | 
				
			||||||
 | 
					     C=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
				
			||||||
 | 
					     for( i=0; i<*n; i++ )
 | 
				
			||||||
 | 
					        for( j=0; j<*n; j++ ) {
 | 
				
			||||||
 | 
					           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
				
			||||||
 | 
					           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     cblas_csyrk(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA, beta,
 | 
				
			||||||
 | 
						         C, LDC );
 | 
				
			||||||
 | 
					     for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					        for( i=0; i<*n; i++ ) {
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     free(A);
 | 
				
			||||||
 | 
					     free(C);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  else if (*order == TEST_COL_MJR)
 | 
				
			||||||
 | 
					     cblas_csyrk(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda, beta,
 | 
				
			||||||
 | 
						         c, *ldc );
 | 
				
			||||||
 | 
					  else
 | 
				
			||||||
 | 
					     cblas_csyrk(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, beta,
 | 
				
			||||||
 | 
						         c, *ldc );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void F77_cher2k(int *order, char *uplow, char *transp, int *n, int *k,
 | 
				
			||||||
 | 
					        CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
				
			||||||
 | 
						CBLAS_TEST_COMPLEX *b, int *ldb, float *beta,
 | 
				
			||||||
 | 
					        CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
				
			||||||
 | 
					  int i,j,LDA,LDB,LDC;
 | 
				
			||||||
 | 
					  CBLAS_TEST_COMPLEX *A, *B, *C;
 | 
				
			||||||
 | 
					  enum CBLAS_UPLO uplo;
 | 
				
			||||||
 | 
					  enum CBLAS_TRANSPOSE trans;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  get_uplo_type(uplow,&uplo);
 | 
				
			||||||
 | 
					  get_transpose_type(transp,&trans);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (*order == TEST_ROW_MJR) {
 | 
				
			||||||
 | 
					     if (trans == CblasNoTrans) {
 | 
				
			||||||
 | 
					        LDA = *k+1;
 | 
				
			||||||
 | 
					        LDB = *k+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ));
 | 
				
			||||||
 | 
					        B=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDB*sizeof(CBLAS_TEST_COMPLEX ));
 | 
				
			||||||
 | 
					        for( i=0; i<*n; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*k; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
				
			||||||
 | 
					              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     else {
 | 
				
			||||||
 | 
					        LDA = *n+1;
 | 
				
			||||||
 | 
					        LDB = *n+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc( LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
				
			||||||
 | 
					        B=(CBLAS_TEST_COMPLEX* )malloc( LDB*(*k)*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
				
			||||||
 | 
					        for( i=0; i<*k; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*n; j++ ){
 | 
				
			||||||
 | 
						      A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
				
			||||||
 | 
					              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     LDC = *n+1;
 | 
				
			||||||
 | 
					     C=(CBLAS_TEST_COMPLEX* )malloc( (*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
				
			||||||
 | 
					     for( i=0; i<*n; i++ )
 | 
				
			||||||
 | 
					        for( j=0; j<*n; j++ ) {
 | 
				
			||||||
 | 
					           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
				
			||||||
 | 
					           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     cblas_cher2k(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA,
 | 
				
			||||||
 | 
							  B, LDB, *beta, C, LDC );
 | 
				
			||||||
 | 
					     for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					        for( i=0; i<*n; i++ ) {
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     free(A);
 | 
				
			||||||
 | 
					     free(B);
 | 
				
			||||||
 | 
					     free(C);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  else if (*order == TEST_COL_MJR)
 | 
				
			||||||
 | 
					     cblas_cher2k(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda,
 | 
				
			||||||
 | 
							   b, *ldb, *beta, c, *ldc );
 | 
				
			||||||
 | 
					  else
 | 
				
			||||||
 | 
					     cblas_cher2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda,
 | 
				
			||||||
 | 
							   b, *ldb, *beta, c, *ldc );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void F77_csyr2k(int *order, char *uplow, char *transp, int *n, int *k,
 | 
				
			||||||
 | 
					         CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
				
			||||||
 | 
						 CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
 | 
				
			||||||
 | 
					         CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
				
			||||||
 | 
					  int i,j,LDA,LDB,LDC;
 | 
				
			||||||
 | 
					  CBLAS_TEST_COMPLEX *A, *B, *C;
 | 
				
			||||||
 | 
					  enum CBLAS_UPLO uplo;
 | 
				
			||||||
 | 
					  enum CBLAS_TRANSPOSE trans;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  get_uplo_type(uplow,&uplo);
 | 
				
			||||||
 | 
					  get_transpose_type(transp,&trans);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (*order == TEST_ROW_MJR) {
 | 
				
			||||||
 | 
					     if (trans == CblasNoTrans) {
 | 
				
			||||||
 | 
					        LDA = *k+1;
 | 
				
			||||||
 | 
					        LDB = *k+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        B=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDB*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        for( i=0; i<*n; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*k; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
				
			||||||
 | 
					              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     else {
 | 
				
			||||||
 | 
					        LDA = *n+1;
 | 
				
			||||||
 | 
					        LDB = *n+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*k)*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        for( i=0; i<*k; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*n; j++ ){
 | 
				
			||||||
 | 
						      A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
				
			||||||
 | 
					              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     LDC = *n+1;
 | 
				
			||||||
 | 
					     C=(CBLAS_TEST_COMPLEX* )malloc( (*n)*LDC*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					     for( i=0; i<*n; i++ )
 | 
				
			||||||
 | 
					        for( j=0; j<*n; j++ ) {
 | 
				
			||||||
 | 
					           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
				
			||||||
 | 
					           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     cblas_csyr2k(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA,
 | 
				
			||||||
 | 
							  B, LDB, beta, C, LDC );
 | 
				
			||||||
 | 
					     for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					        for( i=0; i<*n; i++ ) {
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     free(A);
 | 
				
			||||||
 | 
					     free(B);
 | 
				
			||||||
 | 
					     free(C);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  else if (*order == TEST_COL_MJR)
 | 
				
			||||||
 | 
					     cblas_csyr2k(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda,
 | 
				
			||||||
 | 
							   b, *ldb, beta, c, *ldc );
 | 
				
			||||||
 | 
					  else
 | 
				
			||||||
 | 
					     cblas_csyr2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda,
 | 
				
			||||||
 | 
							   b, *ldb, beta, c, *ldc );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void F77_ctrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
 | 
				
			||||||
 | 
					       int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a,
 | 
				
			||||||
 | 
					       int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) {
 | 
				
			||||||
 | 
					  int i,j,LDA,LDB;
 | 
				
			||||||
 | 
					  CBLAS_TEST_COMPLEX *A, *B;
 | 
				
			||||||
 | 
					  enum CBLAS_SIDE side;
 | 
				
			||||||
 | 
					  enum CBLAS_DIAG diag;
 | 
				
			||||||
 | 
					  enum CBLAS_UPLO uplo;
 | 
				
			||||||
 | 
					  enum CBLAS_TRANSPOSE trans;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  get_uplo_type(uplow,&uplo);
 | 
				
			||||||
 | 
					  get_transpose_type(transp,&trans);
 | 
				
			||||||
 | 
					  get_diag_type(diagn,&diag);
 | 
				
			||||||
 | 
					  get_side_type(rtlf,&side);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (*order == TEST_ROW_MJR) {
 | 
				
			||||||
 | 
					     if (side == CblasLeft) {
 | 
				
			||||||
 | 
					        LDA = *m+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*m; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     else{
 | 
				
			||||||
 | 
					        LDA = *n+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        for( i=0; i<*n; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*n; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     LDB = *n+1;
 | 
				
			||||||
 | 
					     B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					     for( i=0; i<*m; i++ )
 | 
				
			||||||
 | 
					        for( j=0; j<*n; j++ ) {
 | 
				
			||||||
 | 
					           B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
				
			||||||
 | 
					           B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     cblas_ctrmm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha,
 | 
				
			||||||
 | 
							 A, LDA, B, LDB );
 | 
				
			||||||
 | 
					     for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ ) {
 | 
				
			||||||
 | 
					           b[j*(*ldb)+i].real=B[i*LDB+j].real;
 | 
				
			||||||
 | 
					           b[j*(*ldb)+i].imag=B[i*LDB+j].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     free(A);
 | 
				
			||||||
 | 
					     free(B);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  else if (*order == TEST_COL_MJR)
 | 
				
			||||||
 | 
					     cblas_ctrmm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha,
 | 
				
			||||||
 | 
							   a, *lda, b, *ldb);
 | 
				
			||||||
 | 
					  else
 | 
				
			||||||
 | 
					     cblas_ctrmm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha,
 | 
				
			||||||
 | 
							   a, *lda, b, *ldb);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void F77_ctrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
 | 
				
			||||||
 | 
					         int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a,
 | 
				
			||||||
 | 
					         int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) {
 | 
				
			||||||
 | 
					  int i,j,LDA,LDB;
 | 
				
			||||||
 | 
					  CBLAS_TEST_COMPLEX *A, *B;
 | 
				
			||||||
 | 
					  enum CBLAS_SIDE side;
 | 
				
			||||||
 | 
					  enum CBLAS_DIAG diag;
 | 
				
			||||||
 | 
					  enum CBLAS_UPLO uplo;
 | 
				
			||||||
 | 
					  enum CBLAS_TRANSPOSE trans;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  get_uplo_type(uplow,&uplo);
 | 
				
			||||||
 | 
					  get_transpose_type(transp,&trans);
 | 
				
			||||||
 | 
					  get_diag_type(diagn,&diag);
 | 
				
			||||||
 | 
					  get_side_type(rtlf,&side);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (*order == TEST_ROW_MJR) {
 | 
				
			||||||
 | 
					     if (side == CblasLeft) {
 | 
				
			||||||
 | 
					        LDA = *m+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc( (*m)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*m; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     else{
 | 
				
			||||||
 | 
					        LDA = *n+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        for( i=0; i<*n; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*n; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     LDB = *n+1;
 | 
				
			||||||
 | 
					     B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					     for( i=0; i<*m; i++ )
 | 
				
			||||||
 | 
					        for( j=0; j<*n; j++ ) {
 | 
				
			||||||
 | 
					           B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
				
			||||||
 | 
					           B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     cblas_ctrsm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha,
 | 
				
			||||||
 | 
							 A, LDA, B, LDB );
 | 
				
			||||||
 | 
					     for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ ) {
 | 
				
			||||||
 | 
					           b[j*(*ldb)+i].real=B[i*LDB+j].real;
 | 
				
			||||||
 | 
					           b[j*(*ldb)+i].imag=B[i*LDB+j].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     free(A);
 | 
				
			||||||
 | 
					     free(B);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  else if (*order == TEST_COL_MJR)
 | 
				
			||||||
 | 
					     cblas_ctrsm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha,
 | 
				
			||||||
 | 
							   a, *lda, b, *ldb);
 | 
				
			||||||
 | 
					  else
 | 
				
			||||||
 | 
					     cblas_ctrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha,
 | 
				
			||||||
 | 
							   a, *lda, b, *ldb);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void F77_cgemm3m(int *order, char *transpa, char *transpb, int *m, int *n,
 | 
				
			||||||
 | 
					     int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
 | 
				
			||||||
 | 
					     CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
 | 
				
			||||||
 | 
					     CBLAS_TEST_COMPLEX *c, int *ldc ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  CBLAS_TEST_COMPLEX *A, *B, *C;
 | 
				
			||||||
 | 
					  int i,j,LDA, LDB, LDC;
 | 
				
			||||||
 | 
					  enum CBLAS_TRANSPOSE transa, transb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  get_transpose_type(transpa, &transa);
 | 
				
			||||||
 | 
					  get_transpose_type(transpb, &transb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (*order == TEST_ROW_MJR) {
 | 
				
			||||||
 | 
					     if (transa == CblasNoTrans) {
 | 
				
			||||||
 | 
					        LDA = *k+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*k; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     else {
 | 
				
			||||||
 | 
					        LDA = *m+1;
 | 
				
			||||||
 | 
					        A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        for( i=0; i<*k; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*m; j++ ) {
 | 
				
			||||||
 | 
					              A[i*LDA+j].real=a[j*(*lda)+i].real;
 | 
				
			||||||
 | 
					              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     if (transb == CblasNoTrans) {
 | 
				
			||||||
 | 
					        LDB = *n+1;
 | 
				
			||||||
 | 
					        B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
 | 
				
			||||||
 | 
					        for( i=0; i<*k; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*n; j++ ) {
 | 
				
			||||||
 | 
					              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
				
			||||||
 | 
					              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					     else {
 | 
				
			||||||
 | 
					        LDB = *k+1;
 | 
				
			||||||
 | 
					        B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					        for( i=0; i<*n; i++ )
 | 
				
			||||||
 | 
					           for( j=0; j<*k; j++ ) {
 | 
				
			||||||
 | 
					              B[i*LDB+j].real=b[j*(*ldb)+i].real;
 | 
				
			||||||
 | 
					              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     LDC = *n+1;
 | 
				
			||||||
 | 
					     C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
 | 
				
			||||||
 | 
					     for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ ) {
 | 
				
			||||||
 | 
					           C[i*LDC+j].real=c[j*(*ldc)+i].real;
 | 
				
			||||||
 | 
					           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     cblas_cgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
 | 
				
			||||||
 | 
					                  B, LDB, beta, C, LDC );
 | 
				
			||||||
 | 
					     for( j=0; j<*n; j++ )
 | 
				
			||||||
 | 
					        for( i=0; i<*m; i++ ) {
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i].real=C[i*LDC+j].real;
 | 
				
			||||||
 | 
					           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					     free(A);
 | 
				
			||||||
 | 
					     free(B);
 | 
				
			||||||
 | 
					     free(C);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  else if (*order == TEST_COL_MJR)
 | 
				
			||||||
 | 
					     cblas_cgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
 | 
				
			||||||
 | 
					                  b, *ldb, beta, c, *ldc );
 | 
				
			||||||
 | 
					  else
 | 
				
			||||||
 | 
					     cblas_cgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
 | 
				
			||||||
 | 
					                  b, *ldb, beta, c, *ldc );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Some files were not shown because too many files have changed in this diff Show More
		Loading…
	
		Reference in New Issue