Compare commits

..

1 Commits

Author SHA1 Message Date
Zhang Xianyi
12ab1804b6 Merge branch 'develop' 2016-04-12 15:29:19 -04:00
4413 changed files with 75130 additions and 335153 deletions

18
.gitignore vendored
View File

@@ -14,21 +14,6 @@ lapack-3.4.2.tgz
lapack-netlib/make.inc
lapack-netlib/lapacke/include/lapacke_mangling.h
lapack-netlib/TESTING/testing_results.txt
lapack-netlib/INSTALL/test*
lapack-netlib/TESTING/xeigtstc
lapack-netlib/TESTING/xeigtstd
lapack-netlib/TESTING/xeigtsts
lapack-netlib/TESTING/xeigtstz
lapack-netlib/TESTING/xlintstc
lapack-netlib/TESTING/xlintstd
lapack-netlib/TESTING/xlintstds
lapack-netlib/TESTING/xlintstrfc
lapack-netlib/TESTING/xlintstrfd
lapack-netlib/TESTING/xlintstrfs
lapack-netlib/TESTING/xlintstrfz
lapack-netlib/TESTING/xlintsts
lapack-netlib/TESTING/xlintstz
lapack-netlib/TESTING/xlintstzc
*.so
*.so.*
*.a
@@ -84,6 +69,3 @@ test/zblat3
build
build.*
*.swp
benchmark/*.goto
benchmark/smallscaling

View File

@@ -2,19 +2,16 @@
## Author: Hank Anderson <hank@statease.com>
##
cmake_minimum_required(VERSION 2.8.5)
cmake_minimum_required(VERSION 2.8.4)
project(OpenBLAS)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 2)
set(OpenBLAS_PATCH_VERSION 20.dev)
set(OpenBLAS_PATCH_VERSION 18)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
enable_language(ASM)
enable_language(C)
# Adhere to GNU filesystem layout conventions
include(GNUInstallDirs)
if(MSVC)
set(OpenBLAS_LIBNAME libopenblas)
else()
@@ -33,20 +30,10 @@ set(NO_LAPACK 1)
set(NO_LAPACKE 1)
endif()
if(CMAKE_CONFIGURATION_TYPES) # multiconfig generator?
set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)
set(CMAKE_BUILD_TYPE
Debug Debug
Release Release
)
if(BUILD_DEBUG)
set(CMAKE_BUILD_TYPE Debug)
else()
if( NOT CMAKE_BUILD_TYPE )
if(BUILD_DEBUG)
set(CMAKE_BUILD_TYPE Debug)
else()
set(CMAKE_BUILD_TYPE Release)
endif()
endif()
set(CMAKE_BUILD_TYPE Release)
endif()
if(BUILD_WITHOUT_CBLAS)
@@ -58,8 +45,8 @@ endif()
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake")
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/system.cmake")
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
@@ -120,12 +107,9 @@ if (${NO_STATIC} AND ${NO_SHARED})
endif ()
#Set default output directory
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
if(MSVC)
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib/Debug)
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib/Release)
endif ()
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
set(TARGET_OBJS "")
foreach (SUBDIR ${SUBDIRS})
@@ -139,50 +123,43 @@ endforeach ()
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
if (NOT NOFORTRAN AND NOT NO_LAPACK)
include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
if (NOT NO_LAPACKE)
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
endif ()
endif ()
# Only generate .def for dll on MSVC and always produce pdb files for debug and release
#Only generate .def for dll on MSVC
if(MSVC)
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
endif()
# add objects to the openblas lib
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
# Set output for libopenblas
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
endforeach()
enable_testing()
add_subdirectory(utest)
if (NOT MSVC)
#only build shared library for MSVC
if(NOT MSVC)
#only build shared library for MSVC
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
if(SMP)
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
if(SMP)
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
endif()
#build test and ctest
@@ -221,73 +198,3 @@ set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
#endif
# @touch lib.grd
# Install project
# Install libraries
install(TARGETS ${OpenBLAS_LIBNAME}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
# Install include files
set (GENCONFIG_BIN ${CMAKE_BINARY_DIR}/gen_config_h${CMAKE_EXECUTABLE_SUFFIX})
ADD_CUSTOM_COMMAND(
OUTPUT ${CMAKE_BINARY_DIR}/openblas_config.h
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/config.h
COMMAND ${GENCONFIG_BIN} ${CMAKE_CURRENT_SOURCE_DIR}/config.h ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h > ${CMAKE_BINARY_DIR}/openblas_config.h
)
ADD_CUSTOM_TARGET(genconfig
ALL
DEPENDS openblas_config.h
)
add_dependencies(genconfig ${OpenBLAS_LIBNAME})
install (FILES ${CMAKE_BINARY_DIR}/openblas_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
ADD_CUSTOM_TARGET(genf77blas
ALL
COMMAND ${AWK} 'BEGIN{print \"\#ifndef OPENBLAS_F77BLAS_H\" \; print \"\#define OPENBLAS_F77BLAS_H\" \; print \"\#include \\"openblas_config.h\\" \"}; NF {print}; END{print \"\#endif\"}' ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h > ${CMAKE_BINARY_DIR}/f77blas.h
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/config.h
)
add_dependencies(genf77blas ${OpenBLAS_LIBNAME})
install (FILES ${CMAKE_BINARY_DIR}/f77blas.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(NOT NO_CBLAS)
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
ADD_CUSTOM_TARGET(gencblas
ALL
COMMAND ${SED} 's/common/openblas_config/g' ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h > "${CMAKE_BINARY_DIR}/cblas.tmp"
COMMAND cp "${CMAKE_BINARY_DIR}/cblas.tmp" "${CMAKE_BINARY_DIR}/cblas.h"
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h
)
add_dependencies(gencblas ${OpenBLAS_LIBNAME})
install (FILES ${CMAKE_BINARY_DIR}/cblas.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()
if(NOT NO_LAPACKE)
message (STATUS "Copying LAPACKE header files to ${CMAKE_INSTALL_INCLUDEDIR}")
add_dependencies( ${OpenBLAS_LIBNAME} genlapacke)
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/*.h")
install (FILES ${INCLUDE_FILES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
ADD_CUSTOM_TARGET(genlapacke
COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h"
)
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()
if(NOT MSVC)
install (TARGETS ${OpenBLAS_LIBNAME}_static DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()
include(FindPkgConfig QUIET)
if(PKG_CONFIG_FOUND)
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas.pc @ONLY)
install (FILES ${PROJECT_BINARY_DIR}/openblas.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
endif()

View File

@@ -150,21 +150,3 @@ In chronological order:
* theoractice <https://github.com/theoractice/>
* [2016-03-20] Fix compiler error in VisualStudio with CMake
* [2016-03-22] Fix access violation on Windows while static linking
* Paul Mustière <https://github.com/buffer51/>
* [2016-02-04] Fix Android build on ARMV7
* [2016-04-26] Android build with LAPACK for ARMV7 & ARMV8
* Shivraj Patil <https://github.com/sva-img/>
* [2016-05-03] DGEMM optimization for MIPS P5600 and I6400 using MSA
* Kaustubh Raste <https://github.com/ksraste/>
* [2016-05-09] DTRSM optimization for MIPS P5600 and I6400 using MSA
* [2016-05-20] STRSM optimization for MIPS P5600 and I6400 using MSA
* Abdelrauf <https://github.com/quickwritereader>
* [2017-01-01] dgemm and dtrmm kernels for IBM z13
* [2017-02-26] ztrmm kernel for IBM z13
* [2017-03-13] strmm and ctrmm kernel for IBM z13

View File

@@ -1,22 +1,4 @@
OpenBLAS ChangeLog
====================================================================
Version 0.2.19
1-Sep-2016
common:
* Improved cross compiling.
* Fix the bug on musl libc.
POWER:
* Optimize BLAS on Power8
* Fixed Julia+OpenBLAS bugs on Power8
MIPS:
* Optimize BLAS on MIPS P5600 and I6400 (Thanks, Shivraj Patil, Kaustubh Raste)
ARM:
* Improved on ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)
====================================================================
Version 0.2.18
12-Apr-2016

View File

@@ -81,7 +81,7 @@ endif
shared :
ifndef NO_SHARED
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android))
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
@$(MAKE) -C exports so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
@@ -108,6 +108,8 @@ endif
tests :
ifndef NOFORTRAN
ifndef TARGET
ifndef CROSS
touch $(LIBNAME)
ifndef NO_FBLAS
$(MAKE) -C test all
@@ -117,6 +119,8 @@ ifndef NO_CBLAS
$(MAKE) -C ctest all
endif
endif
endif
endif
libs :
ifeq ($(CORE), UNKOWN)
@@ -278,13 +282,13 @@ lapack-timing : large.tgz timing.tgz
ifndef NOFORTRAN
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TIMING
make -C $(NETLIB_LAPACK_DIR)/TIMING
endif
lapack-test :
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
ifneq ($(CROSS), 1)
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
@@ -299,7 +303,7 @@ lapack-runtest:
blas-test:
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out)
@@ -329,8 +333,3 @@ endif
@rm -f *.grd Makefile.conf_last config_last.h
@(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out testing_results.txt)
@echo Done.
# Makefile debugging trick:
# call print-VARIABLE to see the runtime value of any variable
print-%:
@echo '$*=$($*)'

View File

@@ -1,19 +1,31 @@
ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15))
# ifeq logical or
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
ifeq ($(OSNAME), Android)
CCOMMON_OPT += -mfpu=neon -march=armv7-a
FCOMMON_OPT += -mfpu=neon -march=armv7-a
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
else
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
endif
endif
ifeq ($(CORE), ARMV7)
ifeq ($(OSNAME), Android)
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch
else
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
endif
endif
ifeq ($(CORE), ARMV6)
CCOMMON_OPT += -mfpu=vfp -march=armv6
FCOMMON_OPT += -mfpu=vfp -march=armv6
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
endif
ifeq ($(CORE), ARMV5)
CCOMMON_OPT += -march=armv5
FCOMMON_OPT += -march=armv5
CCOMMON_OPT += -marm -march=armv5
FCOMMON_OPT += -marm -march=armv5
endif

View File

@@ -9,17 +9,3 @@ CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
endif
ifeq ($(CORE), VULCAN)
CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
endif
ifeq ($(CORE), THUNDERX)
CCOMMON_OPT += -mtune=thunderx -mcpu=thunderx
FCOMMON_OPT += -mtune=thunderx -mcpu=thunderx
endif
ifeq ($(CORE), THUNDERX2T99)
CCOMMON_OPT += -mtune=thunderx2t99 -mcpu=thunderx2t99
FCOMMON_OPT += -mtune=thunderx2t99 -mcpu=thunderx2t99
endif

View File

@@ -12,7 +12,6 @@ OPENBLAS_BUILD_DIR := $(CURDIR)
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
.PHONY : install
.NOTPARALLEL : install
@@ -21,122 +20,110 @@ lib.grd :
$(error OpenBLAS: Please run "make" firstly)
install : lib.grd
@-mkdir -p "$(DESTDIR)$(PREFIX)"
@-mkdir -p "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)"
@-mkdir -p "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@-mkdir -p "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
@-mkdir -p "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)"
@-mkdir -p "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
@-mkdir -p $(DESTDIR)$(PREFIX)
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@-mkdir -p $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
#for inc
@echo \#ifndef OPENBLAS_CONFIG_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@echo \#define OPENBLAS_CONFIG_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@cat openblas_config_template.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@echo \#ifndef OPENBLAS_F77BLAS_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
@echo \#define OPENBLAS_F77BLAS_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
@echo \#include \"openblas_config.h\" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
@cat common_interface.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
@echo \#endif >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
@echo \#ifndef OPENBLAS_F77BLAS_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
@echo \#define OPENBLAS_F77BLAS_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
@echo \#include \"openblas_config.h\" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
@cat common_interface.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
@echo \#endif >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
ifndef NO_CBLAS
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
@sed 's/common/openblas_config/g' cblas.h > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h
endif
ifndef NO_LAPACKE
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
endif
#for install static library
ifndef NO_STATIC
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@install -pm644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
endif
#for install shared library
ifndef NO_SHARED
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android))
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
ifeq ($(OSNAME), FreeBSD)
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif
ifeq ($(OSNAME), NetBSD)
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif
ifeq ($(OSNAME), Darwin)
@-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@-install_name_tool -id "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
@-cp $(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@-install_name_tool -id $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
endif
ifeq ($(OSNAME), WINNT)
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
@-cp $(LIBDLLNAME).a "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@-cp $(LIBDLLNAME) $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@-cp $(LIBDLLNAME).a $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
endif
ifeq ($(OSNAME), CYGWIN_NT)
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
endif
endif
#Generating openblas.pc
@echo Generating openblas.pc in $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
@echo 'version='$(VERSION) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
@echo 'extralib='$(EXTRALIB) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
@cat openblas.pc.in >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
#Generating OpenBLASConfig.cmake
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
ifndef NO_SHARED
#ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
endif
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
endif
ifeq ($(OSNAME), Darwin)
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
endif
else
#only static
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
endif
#Generating OpenBLASConfigVersion.cmake
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo "else ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo "else ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo " endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo "endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo Install OK!

View File

@@ -1,3 +0,0 @@
ifdef BINARY64
else
endif

View File

@@ -1,26 +1,4 @@
ifdef USE_THREAD
ifeq ($(USE_THREAD), 0)
USE_OPENMP = 0
else
USE_OPENMP = 1
endif
else
USE_OPENMP = 1
endif
ifeq ($(CORE), POWER8)
ifeq ($(USE_OPENMP), 1)
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
else
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
endif
endif
# CCOMMON_OPT += -DALLOC_SHM
FLAMEPATH = $(HOME)/flame/lib
@@ -38,16 +16,6 @@ else
endif
endif
#Either uncomment below line or run make with `USE_MASS=1` to enable support of MASS library
#USE_MASS = 1
ifeq ($(USE_MASS), 1)
# Path to MASS libs, change it if the libs are installed at any other location
MASSPATH = /opt/ibm/xlmass/8.1.5/lib
COMMON_OPT += -mveclibabi=mass -ftree-vectorize -funsafe-math-optimizations -DUSE_MASS
EXTRALIB += -L$(MASSPATH) -lmass -lmassvp8 -lmass_simdp8
endif
ifdef BINARY64

View File

@@ -17,26 +17,14 @@ ifdef CPUIDEMU
EXFLAGS = -DCPUIDEMU -DVENDOR=99
endif
ifeq ($(TARGET), P5600)
TARGET_FLAGS = -mips32r5
endif
ifeq ($(TARGET), I6400)
TARGET_FLAGS = -mips64r6
endif
ifeq ($(TARGET), P6600)
TARGET_FLAGS = -mips64r6
endif
all: getarch_2nd
./getarch_2nd 0 >> $(TARGET_MAKE)
./getarch_2nd 1 >> $(TARGET_CONF)
config.h : c_check f_check getarch
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS)
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC)
ifneq ($(ONLY_CBLAS), 1)
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS)
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC)
else
#When we only build CBLAS, we set NOFORTRAN=2
echo "NOFORTRAN=2" >> $(TARGET_MAKE)

View File

@@ -3,7 +3,7 @@
#
# This library's version
VERSION = 0.2.20.dev
VERSION = 0.2.18
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@@ -52,7 +52,6 @@ VERSION = 0.2.20.dev
# USE_THREAD = 0
# If you're going to use this library with OpenMP, please comment it in.
# This flag is always set for POWER8. Don't modify the flag
# USE_OPENMP = 1
# You can define maximum number of threads. Basically it should be
@@ -154,12 +153,10 @@ NO_AFFINITY = 1
# Common Optimization Flag;
# The default -O2 is enough.
# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT
# COMMON_OPT = -O2
# gfortran option for LAPACK
# enable this flag only on 64bit Linux and if you need a thread safe lapack library
# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT
# FCOMMON_OPT = -frecursive
# Profiling flags

View File

@@ -68,9 +68,6 @@ endif
ifeq ($(TARGET), EXCAVATOR)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET), ZEN)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
endif
@@ -101,9 +98,6 @@ endif
ifeq ($(TARGET_CORE), EXCAVATOR)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET_CORE), ZEN)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
endif
@@ -165,7 +159,7 @@ ifndef GOTOBLAS_MAKEFILE
export GOTOBLAS_MAKEFILE = 1
# Generating Makefile.conf and config.h
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all)
ifndef TARGET_CORE
include $(TOPDIR)/Makefile.conf
@@ -223,9 +217,7 @@ endif
#
ifeq ($(OSNAME), Darwin)
ifndef MACOSX_DEPLOYMENT_TARGET
export MACOSX_DEPLOYMENT_TARGET=10.6
endif
MD5SUM = md5 -r
endif
@@ -242,10 +234,6 @@ EXTRALIB += -lm
NO_EXPRECISION = 1
endif
ifeq ($(OSNAME), Android)
EXTRALIB += -lm
endif
ifeq ($(OSNAME), AIX)
EXTRALIB += -lm
endif
@@ -418,6 +406,7 @@ CCOMMON_OPT += -fopenmp
endif
ifeq ($(C_COMPILER), CLANG)
$(error OpenBLAS: Clang didn't support OpenMP yet.)
CCOMMON_OPT += -fopenmp
endif
@@ -452,13 +441,12 @@ ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
endif
ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += HASWELL ZEN
DYNAMIC_CORE += HASWELL
endif
endif
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
ifndef DYNAMIC_CORE
override DYNAMIC_ARCH=
DYNAMIC_ARCH =
endif
endif
@@ -474,7 +462,7 @@ endif
endif
endif
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
ifeq ($(ARCH), mips64)
NO_BINARY_MODE = 1
endif
@@ -486,23 +474,6 @@ endif
ifeq ($(ARCH), arm)
NO_BINARY_MODE = 1
BINARY_DEFINED = 1
CCOMMON_OPT += -marm
FCOMMON_OPT += -marm
# If softfp abi is mentioned on the command line, force it.
ifeq ($(ARM_SOFTFP_ABI), 1)
CCOMMON_OPT += -mfloat-abi=softfp
FCOMMON_OPT += -mfloat-abi=softfp
endif
ifeq ($(OSNAME), Android)
ifeq ($(ARM_SOFTFP_ABI), 1)
EXTRALIB += -lm
else
EXTRALIB += -Wl,-lm_hard
endif
endif
endif
ifeq ($(ARCH), arm64)
@@ -531,16 +502,13 @@ endif
ifdef NO_BINARY_MODE
ifeq ($(ARCH), $(filter $(ARCH),mips64))
ifeq ($(ARCH), mips64)
ifdef BINARY64
CCOMMON_OPT += -mabi=64
else
CCOMMON_OPT += -mabi=n32
endif
BINARY_DEFINED = 1
else ifeq ($(ARCH), $(filter $(ARCH),mips))
CCOMMON_OPT += -mabi=32
BINARY_DEFINED = 1
endif
ifeq ($(CORE), LOONGSON3A)
@@ -553,21 +521,6 @@ CCOMMON_OPT += -march=mips64
FCOMMON_OPT += -march=mips64
endif
ifeq ($(CORE), P5600)
CCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
FCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
endif
ifeq ($(CORE), I6400)
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
endif
ifeq ($(CORE), P6600)
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
endif
ifeq ($(OSNAME), AIX)
BINARY_DEFINED = 1
endif
@@ -604,23 +557,6 @@ endif
# Fortran Compiler dependent settings
#
ifeq ($(F_COMPILER), FLANG)
CCOMMON_OPT += -DF_INTERFACE_FLANG
ifdef BINARY64
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
FCOMMON_OPT += -i8
endif
endif
FCOMMON_OPT += -Wall
else
FCOMMON_OPT += -Wall
endif
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -fopenmp
endif
endif
ifeq ($(F_COMPILER), G77)
CCOMMON_OPT += -DF_INTERFACE_G77
FCOMMON_OPT += -Wall
@@ -653,14 +589,12 @@ ifneq ($(NO_LAPACK), 1)
EXTRALIB += -lgfortran
endif
ifdef NO_BINARY_MODE
ifeq ($(ARCH), $(filter $(ARCH),mips64))
ifeq ($(ARCH), mips64)
ifdef BINARY64
FCOMMON_OPT += -mabi=64
else
FCOMMON_OPT += -mabi=n32
endif
else ifeq ($(ARCH), $(filter $(ARCH),mips))
FCOMMON_OPT += -mabi=32
endif
else
ifdef BINARY64
@@ -743,7 +677,21 @@ FCOMMON_OPT += -i8
endif
endif
endif
ifneq ($(ARCH), mips64)
ifndef BINARY64
FCOMMON_OPT += -m32
else
FCOMMON_OPT += -m64
endif
else
ifdef BINARY64
FCOMMON_OPT += -mabi=64
else
FCOMMON_OPT += -mabi=n32
endif
endif
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -mp
endif
@@ -759,7 +707,7 @@ endif
endif
endif
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
ifeq ($(ARCH), mips64)
ifndef BINARY64
FCOMMON_OPT += -n32
else
@@ -789,7 +737,7 @@ endif
ifeq ($(C_COMPILER), OPEN64)
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
ifeq ($(ARCH), mips64)
ifndef BINARY64
CCOMMON_OPT += -n32
else
@@ -1048,7 +996,7 @@ endif
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
override FFLAGS += $(FCOMMON_OPT)
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
#MAKEOVERRIDES =
@@ -1178,8 +1126,6 @@ export HAVE_VFP
export HAVE_VFPV3
export HAVE_VFPV4
export HAVE_NEON
export HAVE_MSA
export MSA_FLAGS
export KERNELDIR
export FUNCTION_PROFILE
export TARGET_CORE

View File

@@ -1,6 +0,0 @@
ifeq ($(CORE), Z13)
CCOMMON_OPT += -march=z13 -mzvector
FCOMMON_OPT += -march=z13 -mzvector
endif

View File

@@ -43,35 +43,6 @@ On X86 box, compile this library for loongson3a CPU with loongcc (based on Open6
make DEBUG=1
### Compile with MASS Support on Power CPU (Optional dependency)
[IBM MASS](http://www-01.ibm.com/software/awdtools/mass/linux/mass-linux.html) library consists of a set of mathematical functions for C, C++, and
Fortran-language applications that are tuned for optimum performance on POWER architectures. OpenBLAS with MASS requires 64-bit, little-endian OS on POWER.
The library can be installed as below -
* On Ubuntu:
wget -q http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg -O- | sudo apt-key add -</br>
echo "deb http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ trusty main" | sudo tee /etc/apt/sources.list.d/ibm-xl-compiler-eval.list</br>
sudo apt-get update</br>
sudo apt-get install libxlmass-devel.8.1.5</br>
* On RHEL/CentOS:
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/repodata/repomd.xml.key</br>
sudo rpm --import repomd.xml.key</br>
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/ibm-xl-compiler-eval.repo</br>
sudo cp ibm-xl-compiler-eval.repo /etc/yum.repos.d/</br>
sudo yum install libxlmass-devel.8.1.5</br>
After installing MASS library, compile openblas with USE_MASS=1.
Example:
Compiling on Power8 with MASS support -
make USE_MASS=1 TARGET=POWER8
### Install to the directory (optional)
Example:
@@ -106,16 +77,11 @@ Please read GotoBLAS_01Readme.txt
- **ARMV8**: Experimental
- **ARM Cortex-A57**: Experimental
#### IBM zEnterprise System:
- **Z13**: Optimized Level-3 BLAS
### Support OS:
- **GNU/Linux**
- **MingWin or Visual Studio(CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
- **FreeBSD**: Supported by community. We didn't test the library on this OS.
- **Android**: Supported by community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
## Usages
Link with libopenblas.a or -lopenblas for shared library.

View File

@@ -34,7 +34,6 @@ BULLDOZER
PILEDRIVER
STEAMROLLER
EXCAVATOR
ZEN
c)VIA CPU:
SSE_GENERIC
@@ -54,34 +53,26 @@ PPC440
PPC440FP2
CELL
3.MIPS CPU:
P5600
4.MIPS64 CPU:
3.MIPS64 CPU:
SICORTEX
LOONGSON3A
LOONGSON3B
I6400
P6600
5.IA64 CPU:
4.IA64 CPU:
ITANIUM2
6.SPARC CPU:
5.SPARC CPU:
SPARC
SPARCV7
7.ARM CPU:
6.ARM CPU:
CORTEXA15
CORTEXA9
ARMV7
ARMV6
ARMV5
8.ARM 64-bit CPU:
7.ARM 64-bit CPU:
ARMV8
CORTEXA57
VULCAN
THUNDERX
THUNDERX2T99

View File

@@ -1,4 +1,4 @@
version: 0.2.19.{build}
version: 0.2.18.{build}
#environment:

View File

@@ -37,18 +37,6 @@ ESSL=/opt/ibm/lib
#LIBESSL = -lesslsmp $(ESSL)/libxlomp_ser.so.1 $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a
LIBESSL = -lesslsmp $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a
ifneq ($(NO_LAPACK), 1)
GOTO_LAPACK_TARGETS=slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
csymv.goto zsymv.goto \
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto
else
GOTO_LAPACK_TARGETS=
endif
ifeq ($(OSNAME), WINNT)
goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
@@ -159,7 +147,9 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
else
goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \
sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \
strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \
ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \
@@ -172,16 +162,18 @@ goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
sswap.goto dswap.goto cswap.goto zswap.goto \
sscal.goto dscal.goto cscal.goto zscal.goto \
sasum.goto dasum.goto casum.goto zasum.goto \
ssymv.goto dsymv.goto \
ssymv.goto dsymv.goto csymv.goto zsymv.goto \
chemv.goto zhemv.goto \
chemm.goto zhemm.goto \
cherk.goto zherk.goto \
cher2k.goto zher2k.goto \
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
smallscaling \
isamax.goto idamax.goto icamax.goto izamax.goto \
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS)
smallscaling
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
@@ -234,9 +226,7 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
spotrf.atlas dpotrf.atlas cpotrf.atlas zpotrf.atlas \
ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas \
isamax.atlas idamax.atlas icamax.atlas izamax.atlas \
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto
ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas
mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
scholesky.mkl dcholesky.mkl ccholesky.mkl zcholesky.mkl \
@@ -271,9 +261,7 @@ endif
essl :: sgemm.essl strmm.essl dgemm.essl dtrmm.essl \
cgemm.essl ctrmm.essl zgemm.essl ztrmm.essl \
slinpack.essl clinpack.essl dlinpack.essl zlinpack.essl \
scholesky.essl ccholesky.essl dcholesky.essl zcholesky.essl \
strsm.essl dtrsm.essl ctrsm.essl ztrsm.essl
slinpack.essl clinpack.essl dlinpack.essl zlinpack.essl
veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \
scholesky.veclib dcholesky.veclib ccholesky.veclib zcholesky.veclib \
@@ -405,9 +393,6 @@ scholesky.mkl : scholesky.$(SUFFIX)
scholesky.veclib : scholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
scholesky.essl : scholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Dcholesky ###################################################
dcholesky.goto : dcholesky.$(SUFFIX) ../$(LIBNAME)
@@ -425,9 +410,6 @@ dcholesky.mkl : dcholesky.$(SUFFIX)
dcholesky.veclib : dcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
dcholesky.essl : dcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Ccholesky ###################################################
ccholesky.goto : ccholesky.$(SUFFIX) ../$(LIBNAME)
@@ -445,9 +427,6 @@ ccholesky.mkl : ccholesky.$(SUFFIX)
ccholesky.veclib : ccholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ccholesky.essl : ccholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Zcholesky ###################################################
@@ -466,9 +445,6 @@ zcholesky.mkl : zcholesky.$(SUFFIX)
zcholesky.veclib : zcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
zcholesky.essl : zcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Sgemm ####################################################
sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@@ -707,9 +683,6 @@ strsm.mkl : strsm.$(SUFFIX)
strsm.veclib : strsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
strsm.essl : strsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Dtrsm ####################################################
dtrsm.goto : dtrsm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@@ -726,9 +699,6 @@ dtrsm.mkl : dtrsm.$(SUFFIX)
dtrsm.veclib : dtrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
dtrsm.essl : dtrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Ctrsm ####################################################
ctrsm.goto : ctrsm.$(SUFFIX) ../$(LIBNAME)
@@ -746,9 +716,6 @@ ctrsm.mkl : ctrsm.$(SUFFIX)
ctrsm.veclib : ctrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ctrsm.essl : ctrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Ztrsm ####################################################
ztrsm.goto : ztrsm.$(SUFFIX) ../$(LIBNAME)
@@ -766,9 +733,6 @@ ztrsm.mkl : ztrsm.$(SUFFIX)
ztrsm.veclib : ztrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ztrsm.essl : ztrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Ssyrk ####################################################
ssyrk.goto : ssyrk.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@@ -1947,63 +1911,6 @@ zgemm3m.mkl : zgemm3m.$(SUFFIX)
zgemm3m.veclib : zgemm3m.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## ISAMAX ##############################################
isamax.goto : isamax.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
isamax.atlas : isamax.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## IDAMAX ##############################################
idamax.goto : idamax.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
idamax.atlas : idamax.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## ICAMAX ##############################################
icamax.goto : icamax.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
icamax.atlas : icamax.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## IZAMAX ##############################################
izamax.goto : izamax.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
izamax.atlas : izamax.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## SNRM2 ##############################################
snrm2.goto : snrm2.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
snrm2.atlas : snrm2.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## DNRM2 ##############################################
dnrm2.goto : dnrm2.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
dnrm2.atlas : dnrm2.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## Sscnrm2 ##############################################
scnrm2.goto : scnrm2.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
scnrm2.atlas : scnrm2.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## Ddznrm2 ##############################################
dznrm2.goto : dznrm2.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
dznrm2.atlas : dznrm2.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
###################################################################################################
slinpack.$(SUFFIX) : linpack.c
@@ -2310,38 +2217,11 @@ cgemm3m.$(SUFFIX) : gemm3m.c
zgemm3m.$(SUFFIX) : gemm3m.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
isamax.$(SUFFIX) : iamax.c
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
idamax.$(SUFFIX) : iamax.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
icamax.$(SUFFIX) : iamax.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
izamax.$(SUFFIX) : iamax.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
snrm2.$(SUFFIX) : nrm2.c
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
dnrm2.$(SUFFIX) : nrm2.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
scnrm2.$(SUFFIX) : nrm2.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
dznrm2.$(SUFFIX) : nrm2.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
smallscaling: smallscaling.c ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(EXTRALIB) -fopenmp -lm -lpthread
$(CC) $(CFLAGS) -o $(@F) $^ $(EXTRALIB) -fopenmp -lm
clean ::
@rm -f *.goto *.mkl *.acml *.atlas *.veclib *.essl smallscaling
@rm -f *.goto *.mkl *.acml *.atlas *.veclib *.essl
include $(TOPDIR)/Makefile.tail

View File

@@ -183,9 +183,9 @@ int main(int argc, char *argv[]){
timeg /= loops;
#ifdef COMPLEX
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg);
fprintf(stderr, " %10.2f MFlops\n", 4. * (double)m / timeg * 1.e-6);
#else
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
fprintf(stderr, " %10.2f MFlops\n", 2. * (double)m / timeg * 1.e-6);
#endif
}

View File

@@ -190,8 +190,8 @@ int main(int argc, char *argv[]){
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
}

View File

@@ -190,8 +190,8 @@ int main(int argc, char *argv[]){
timeg /= loops;
fprintf(stderr,
" %10.2f MBytes %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
" %10.2f MBytes\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
}

View File

@@ -184,8 +184,8 @@ int main(int argc, char *argv[]){
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
}

View File

@@ -221,7 +221,7 @@ int main(int argc, char *argv[]){
timeg /= loops;
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg);
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
}
}
@@ -258,7 +258,7 @@ int main(int argc, char *argv[]){
timeg /= loops;
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg);
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
}
}

View File

@@ -1,192 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef IAMAX
#ifdef COMPLEX
#ifdef DOUBLE
#define IAMAX BLASFUNC(izamax)
#else
#define IAMAX BLASFUNC(icamax)
#endif
#else
#ifdef DOUBLE
#define IAMAX BLASFUNC(idamax)
#else
#define IAMAX BLASFUNC(isamax)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
IAMAX (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -1,193 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef NRM2
#ifdef COMPLEX
#ifdef DOUBLE
#define NRM2 BLASFUNC(dznrm2)
#else
#define NRM2 BLASFUNC(scnrm2)
#endif
#else
#ifdef DOUBLE
#define NRM2 BLASFUNC(dnrm2)
#else
#define NRM2 BLASFUNC(snrm2)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
NRM2 (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@@ -186,8 +186,8 @@ int main(int argc, char *argv[]){
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6);
}

View File

@@ -189,9 +189,9 @@ int main(int argc, char *argv[]){
timeg /= loops;
#ifdef COMPLEX
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 6. * (double)m / timeg * 1.e-6, timeg);
fprintf(stderr, " %10.2f MFlops\n", 6. * (double)m / timeg * 1.e-6);
#else
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 1. * (double)m / timeg * 1.e-6, timeg);
fprintf(stderr, " %10.2f MFlops\n", 1. * (double)m / timeg * 1.e-6);
#endif
}

View File

@@ -2,54 +2,61 @@
argv <- commandArgs(trailingOnly = TRUE)
nfrom <- 128
nto <- 2048
nstep <- 128
loops <- 1
nfrom = 128
nto = 2048
nstep = 128
loops = 1
if (length(argv) > 0) {
for (z in 1:length(argv)) {
if (z == 1) {
nfrom <- as.numeric(argv[z])
} else if (z == 2) {
nto <- as.numeric(argv[z])
} else if (z == 3) {
nstep <- as.numeric(argv[z])
} else if (z == 4) {
loops <- as.numeric(argv[z])
}
}
if ( length(argv) > 0 ) {
for ( z in 1:length(argv) ) {
if ( z == 1 ) {
nfrom <- as.numeric(argv[z])
} else if ( z==2 ) {
nto <- as.numeric(argv[z])
} else if ( z==3 ) {
nstep <- as.numeric(argv[z])
} else if ( z==4 ) {
loops <- as.numeric(argv[z])
}
}
}
p <- Sys.getenv("OPENBLAS_LOOPS")
if (p != "") {
loops <- as.numeric(p)
}
p=Sys.getenv("OPENBLAS_LOOPS")
if ( p != "" ) {
loops <- as.numeric(p)
}
cat(sprintf(
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
nfrom,
nto,
nstep,
loops
))
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
cat(sprintf(" SIZE Flops Time\n"))
n <- nfrom
while (n <= nto) {
A <- matrix(rnorm(n * n), ncol = n, nrow = n)
ev <- 0
z <- system.time(for (l in 1:loops) {
ev <- eigen(A)
})
n = nfrom
while ( n <= nto ) {
mflops <- (26.66 * n * n * n) * loops / (z[3] * 1.0e6)
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
l = 1
st <- sprintf("%.0fx%.0f :", n, n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
start <- proc.time()[3]
n <- n + nstep
while ( l <= loops ) {
ev <- eigen(A)
l = l + 1
}
end <- proc.time()[3]
timeg = end - start
mflops = (26.66 *n*n*n ) * loops / ( timeg * 1.0e6 )
st = sprintf("%.0fx%.0f :",n , n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
n = n + nstep
}

View File

@@ -2,63 +2,62 @@
argv <- commandArgs(trailingOnly = TRUE)
nfrom <- 128
nto <- 2048
nstep <- 128
loops <- 1
nfrom = 128
nto = 2048
nstep = 128
loops = 1
if (length(argv) > 0) {
for (z in 1:length(argv)) {
if (z == 1) {
nfrom <- as.numeric(argv[z])
} else if (z == 2) {
nto <- as.numeric(argv[z])
} else if (z == 3) {
nstep <- as.numeric(argv[z])
} else if (z == 4) {
loops <- as.numeric(argv[z])
}
}
if ( length(argv) > 0 ) {
for ( z in 1:length(argv) ) {
if ( z == 1 ) {
nfrom <- as.numeric(argv[z])
} else if ( z==2 ) {
nto <- as.numeric(argv[z])
} else if ( z==3 ) {
nstep <- as.numeric(argv[z])
} else if ( z==4 ) {
loops <- as.numeric(argv[z])
}
}
}
p <- Sys.getenv("OPENBLAS_LOOPS")
if (p != "") {
loops <- as.numeric(p)
}
p=Sys.getenv("OPENBLAS_LOOPS")
if ( p != "" ) {
loops <- as.numeric(p)
}
cat(sprintf(
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
nfrom,
nto,
nstep,
loops
))
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
cat(sprintf(" SIZE Flops Time\n"))
n <- nfrom
while (n <= nto) {
A <- matrix(runif(n * n),
ncol = n,
nrow = n,
byrow = TRUE)
B <- matrix(runif(n * n),
ncol = n,
nrow = n,
byrow = TRUE)
C <- 1
n = nfrom
while ( n <= nto ) {
z <- system.time(for (l in 1:loops) {
C <- A %*% B
l <- l + 1
})
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
l = 1
mflops <- (2.0 * n * n * n) * loops / (z[3] * 1.0e6)
start <- proc.time()[3]
st <- sprintf("%.0fx%.0f :", n, n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
while ( l <= loops ) {
n <- n + nstep
C <- A %*% B
l = l + 1
}
end <- proc.time()[3]
timeg = end - start
mflops = ( 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
st = sprintf("%.0fx%.0f :",n , n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
n = n + nstep
}

View File

@@ -2,56 +2,62 @@
argv <- commandArgs(trailingOnly = TRUE)
nfrom <- 128
nto <- 2048
nstep <- 128
loops <- 1
nfrom = 128
nto = 2048
nstep = 128
loops = 1
if (length(argv) > 0) {
for (z in 1:length(argv)) {
if (z == 1) {
nfrom <- as.numeric(argv[z])
} else if (z == 2) {
nto <- as.numeric(argv[z])
} else if (z == 3) {
nstep <- as.numeric(argv[z])
} else if (z == 4) {
loops <- as.numeric(argv[z])
}
}
if ( length(argv) > 0 ) {
for ( z in 1:length(argv) ) {
if ( z == 1 ) {
nfrom <- as.numeric(argv[z])
} else if ( z==2 ) {
nto <- as.numeric(argv[z])
} else if ( z==3 ) {
nstep <- as.numeric(argv[z])
} else if ( z==4 ) {
loops <- as.numeric(argv[z])
}
}
}
p <- Sys.getenv("OPENBLAS_LOOPS")
if (p != "") {
loops <- as.numeric(p)
}
p=Sys.getenv("OPENBLAS_LOOPS")
if ( p != "" ) {
loops <- as.numeric(p)
}
cat(sprintf(
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
nfrom,
nto,
nstep,
loops
))
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
cat(sprintf(" SIZE Flops Time\n"))
n <- nfrom
while (n <= nto) {
A <- matrix(rnorm(n * n), ncol = n, nrow = n)
B <- matrix(rnorm(n * n), ncol = n, nrow = n)
n = nfrom
while ( n <= nto ) {
z <- system.time(for (l in 1:loops) {
solve(A, B)
})
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
l = 1
mflops <-
(2.0 / 3.0 * n * n * n + 2.0 * n * n * n) * loops / (z[3] * 1.0e6)
start <- proc.time()[3]
st <- sprintf("%.0fx%.0f :", n, n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
while ( l <= loops ) {
n <- n + nstep
solve(A,B)
l = l + 1
}
end <- proc.time()[3]
timeg = end - start
mflops = (2.0/3.0 *n*n*n + 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
st = sprintf("%.0fx%.0f :",n , n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
n = n + nstep
}

View File

@@ -5,7 +5,6 @@
#include <time.h>
#include <cblas.h>
#include <omp.h>
#include <pthread.h>
#define MIN_SIZE 5
#define MAX_SIZE 60
#define NB_SIZE 10

View File

@@ -190,8 +190,8 @@ int main(int argc, char *argv[]){
timeg /= loops;
fprintf(stderr,
" %10.2f MBytes %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
" %10.2f MBytes\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
}

View File

@@ -191,8 +191,8 @@ int main(int argc, char *argv[]){
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6, time1);
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
}

View File

@@ -184,8 +184,8 @@ int main(int argc, char *argv[]){
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
}

65
c_check
View File

@@ -1,8 +1,5 @@
#!/usr/bin/perl
use File::Basename;
use File::Temp qw(tempfile);
# Checking cross compile
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
@@ -10,9 +7,7 @@ $hostarch = "x86_64" if ($hostarch eq "amd64");
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
$hostarch = "arm64" if ($hostarch eq "aarch64");
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
$hostarch = "zarch" if ($hostarch eq "s390x");
$tmpf = new File::Temp( UNLINK => 1 );
$binary = $ENV{"BINARY"};
$makefile = shift(@ARGV);
@@ -31,12 +26,14 @@ if ($?) {
$cross_suffix = "";
if (dirname($compiler_name) ne ".") {
$cross_suffix .= dirname($compiler_name) . "/";
}
if (basename($compiler_name) =~ /([^\s]*-)(.*)/) {
$cross_suffix .= $1;
if ($ARGV[0] =~ /(.*)(-[.\d]+)/) {
if ($1 =~ /(.*-)(.*)/) {
$cross_suffix = $1;
}
} else {
if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) {
$cross_suffix = $1;
}
}
$compiler = "";
@@ -66,14 +63,13 @@ $os = Android if ($data =~ /OS_ANDROID/);
$architecture = x86 if ($data =~ /ARCH_X86/);
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
$architecture = power if ($data =~ /ARCH_POWER/);
$architecture = mips if ($data =~ /ARCH_MIPS/);
$architecture = mips32 if ($data =~ /ARCH_MIPS32/);
$architecture = mips64 if ($data =~ /ARCH_MIPS64/);
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
$architecture = sparc if ($data =~ /ARCH_SPARC/);
$architecture = ia64 if ($data =~ /ARCH_IA64/);
$architecture = arm if ($data =~ /ARCH_ARM/);
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
$defined = 0;
@@ -83,12 +79,7 @@ if ($os eq "AIX") {
$defined = 1;
}
if ($architecture eq "mips") {
$compiler_name .= " -mabi=32";
$defined = 1;
}
if ($architecture eq "mips64") {
if (($architecture eq "mips32") || ($architecture eq "mips64")) {
$compiler_name .= " -mabi=n32" if ($binary eq "32");
$compiler_name .= " -mabi=64" if ($binary eq "64");
$defined = 1;
@@ -98,11 +89,6 @@ if (($architecture eq "arm") || ($architecture eq "arm64")) {
$defined = 1;
}
if ($architecture eq "zarch") {
$defined = 1;
$binary = 64;
}
if ($architecture eq "alpha") {
$defined = 1;
$binary = 64;
@@ -166,35 +152,16 @@ if ($?) {
die 1;
}
$have_msa = 0;
if (($architecture eq "mips") || ($architecture eq "mips64")) {
$code = '"addvi.b $w0, $w1, 1"';
$msa_flags = "-mmsa -mfp64 -msched-weight -mload-store-pairs";
print $tmpf "#include <msa.h>\n\n";
print $tmpf "void main(void){ __asm__ volatile($code); }\n";
$args = "$msa_flags -o $tmpf.o -x c $tmpf";
my @cmd = ("$compiler_name $args");
system(@cmd) == 0;
if ($? != 0) {
$have_msa = 0;
} else {
$have_msa = 1;
}
unlink("$tmpf.o");
}
$architecture = x86 if ($data =~ /ARCH_X86/);
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
$architecture = power if ($data =~ /ARCH_POWER/);
$architecture = mips if ($data =~ /ARCH_MIPS/);
$architecture = mips32 if ($data =~ /ARCH_MIPS32/);
$architecture = mips64 if ($data =~ /ARCH_MIPS64/);
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
$architecture = sparc if ($data =~ /ARCH_SPARC/);
$architecture = ia64 if ($data =~ /ARCH_IA64/);
$architecture = arm if ($data =~ /ARCH_ARM/);
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
$binformat = bin32;
$binformat = bin64 if ($data =~ /BINARY_64/);
@@ -242,11 +209,6 @@ $linker_a = "";
$linker_L .= "-Wl,". $flags . " "
}
if ($flags =~ /^\--exclude-libs/) {
$linker_L .= "-Wl,". $flags . " ";
$flags="";
}
if (
($flags =~ /^\-l/)
&& ($flags !~ /gfortranbegin/)
@@ -281,11 +243,9 @@ print MAKEFILE "BINARY64=\n" if $binformat ne bin64;
print MAKEFILE "BINARY32=1\n" if $binformat eq bin32;
print MAKEFILE "BINARY64=1\n" if $binformat eq bin64;
print MAKEFILE "FU=$need_fu\n" if $need_fu ne "";
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross != 0 && $cross_suffix ne "";
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross_suffix ne "";
print MAKEFILE "CROSS=1\n" if $cross != 0;
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n";
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1;
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1;
$os =~ tr/[a-z]/[A-Z]/;
$architecture =~ tr/[a-z]/[A-Z]/;
@@ -297,7 +257,6 @@ print CONFFILE "#define C_$compiler\t1\n";
print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32;
print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64;
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1;
if ($os eq "LINUX") {

View File

@@ -73,7 +73,7 @@ if (DYNAMIC_ARCH)
set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER")
endif ()
if (NOT NO_AVX2)
set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL ZEN")
set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL")
endif ()
endif ()

View File

@@ -73,10 +73,6 @@ if (${ARCH} STREQUAL "X86")
set(ARCH x86)
endif ()
if (${ARCH} MATCHES "ppc")
set(ARCH power)
endif ()
set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
if (${COMPILER_ID} STREQUAL "GNU")
set(COMPILER_ID "GCC")
@@ -91,8 +87,3 @@ file(WRITE ${TARGET_CONF}
"#define __${BINARY}BIT__\t1\n"
"#define FUNDERSCORE\t${FU}\n")
if (${HOST_OS} STREQUAL "WINDOWSSTORE")
file(APPEND ${TARGET_CONF}
"#define OS_WINNT\t1\n")
endif ()

View File

@@ -53,7 +53,7 @@ endif()
add_custom_command(
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
COMMAND perl
ARGS "${PROJECT_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
COMMENT "Create openblas.def file"
VERBATIM)

View File

@@ -3,21 +3,6 @@
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets Fortran related variables.
if (${F_COMPILER} STREQUAL "FLANG")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FLANG")
if (BINARY64)
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "G77")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77")
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")

View File

@@ -2,7 +2,7 @@
set(ALLAUX
ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f
ilaprec.f ilatrans.f ilauplo.f iladiag.f iparam2stage.F chla_transtype.f
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f
../INSTALL/ilaver.f ../INSTALL/slamch.f
)
@@ -26,7 +26,7 @@ set(SCLAUX
)
set(DZLAUX
dbdsdc.f dbdsvdx.f
dbdsdc.f
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
@@ -42,28 +42,20 @@ set(DZLAUX
dsteqr.f dsterf.f dlaisnan.f disnan.f
dlartgp.f dlartgs.f
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f
dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f
dgetsls.f dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f
dtplqt2.f dtpmlqt.f dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f
dsytf2_rk.f dlasyf_rk.f dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f
dsytri_3x.f dsysv_rk.f dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f
dsbevx_2stage.f dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f
dsyevx_2stage.f dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F
dsytrd_sy2sb.f dlarfy.f
)
set(SLASRC
sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
DEPRECATED/sgegs.f DEPRECATED/sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
sgels.f sgelsd.f sgelss.f DEPRECATED/sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
sgeqp3.f DEPRECATED/sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvdx.f sgesvx.f
sgetc2.f sgetri.f sgetrf2.f
sggbak.f sggbal.f sgghd3.f sgges.f sgges3.f sggesx.f sggev.f sggev3.f sggevx.f
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f
sgetc2.f sgetri.f
sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f
sggglm.f sgghrd.f sgglse.f sggqrf.f
sggrqf.f DEPRECATED/sggsvd.f sggsvd3.f DEPRECATED/sggsvp.f sggsvp3.f sgtcon.f sgtrfs.f sgtsv.f
sggrqf.f DEPRECATED/sggsvd.f DEPRECATED/sggsvp.f sgtcon.f sgtrfs.f sgtsv.f
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
@@ -80,7 +72,7 @@ set(SLASRC
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f DEPRECATED/slatzm.f
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
sorgrq.f sorgtr.f sorm2l.f sorm2r.f sorm22.f
sorgrq.f sorgtr.f sorm2l.f sorm2r.f
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
spbstf.f spbsv.f spbsvx.f
@@ -104,7 +96,7 @@ set(SLASRC
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
stptrs.f
strcon.f strevc.f strevc3.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
strtrs.f DEPRECATED/stzrqf.f stzrzf.f sstemr.f
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
@@ -114,16 +106,9 @@ set(SLASRC
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f
sgelq.f sgelqt.f sgelqt3.f sgemlq.f sgemlqt.f sgemqr.f sgeqr.f sgetsls.f
slamswlq.f slamtsqr.f slaswlq.f slatsqr.f stplqt.f stplqt2.f stpmlqt.f
ssysv_aa.f ssytrf_aa.f ssytrs_aa.f slasyf_aa.f ssytf2_rk.f slasyf_rk.f
ssytrf_rk.f ssytrs_3.f ssycon_3.f ssytri_3.f ssytri_3x.f ssysv_rk.f
ssb2st_kernels.f ssbev_2stage.f ssbevd_2stage.f ssbevx_2stage.f
ssyev_2stage.f ssyevd_2stage.f ssyevr_2stage.f ssyevx_2stage.f
ssygv_2stage.f ssytrd_2stage.f ssytrd_sb2st.F ssytrd_sy2sb.f slarfy.f
)
set(DSLASRC spotrs.f spotrf2.f)
set(DSLASRC spotrs.f)
set(CLASRC
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
@@ -180,7 +165,7 @@ set(CLASRC
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
ctprfs.f ctptri.f
ctptrs.f ctrcon.f ctrevc.f ctrevc3.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
ctrsyl.f ctrtrs.f DEPRECATED/ctzrqf.f ctzrzf.f cung2l.f cung2r.f
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
@@ -193,14 +178,6 @@ set(CLASRC
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f
cgelq.f cgelqt.f cgelqt3.f cgemlq.f cgemlqt.f cgemqr.f cgeqr.f cgetsls.f
clamswlq.f clamtsqr.f claswlq.f clatsqr.f ctplqt.f ctplqt2.f ctpmlqt.f
chesv_aa.f chetrf_aa.f chetrs_aa.f clahef_aa.f csytf2_rk.f clasyf_rk.f
csytrf_rk.f csytrs_3.f csycon_3.f csytri_3.f csytri_3x.f csysv_rk.f
chetf2_rk.f clahef_rk.f chetrf_rk.f chetrs_3.f checon_3.f chetri_3.f
chetri_3x.f chesv_rk.f chb2st_kernels.f chbev_2stage.f chbevd_2stage.f
chbevx_2stage.f cheev_2stage.f cheevd_2stage.f cheevr_2stage.f cheevx_2stage.f
chegv_2stage.f chetrd_2stage.f chetrd_hb2st.F chetrd_he2hb.f clarfy.f
)
set(ZCLASRC cpotrs.f)
@@ -212,11 +189,11 @@ set(DLASRC
DEPRECATED/dgegs.f DEPRECATED/dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
dgels.f dgelsd.f dgelss.f DEPRECATED/dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
dgeqp3.f DEPRECATED/dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvdx.f dgesvx.f
dgetc2.f dgetri.f dgetrf2.f
dggbak.f dggbal.f dgges.f dgges3.f dggesx.f dggev.f dggev3.f dggevx.f
dggglm.f dgghd3.f dgghrd.f dgglse.f dggqrf.f
dggrqf.f dggsvd3.f dggsvp3.f DEPRECATED/dggsvd.f DEPRECATED/dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f
dgetc2.f dgetri.f
dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f
dggglm.f dgghrd.f dgglse.f dggqrf.f
dggrqf.f DEPRECATED/dggsvd.f DEPRECATED/dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
@@ -233,12 +210,12 @@ set(DLASRC
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f DEPRECATED/dlatzm.f
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
dorgrq.f dorgtr.f dorm2l.f dorm2r.f dorm22.f
dorgrq.f dorgtr.f dorm2l.f dorm2r.f
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
dpbstf.f dpbsv.f dpbsvx.f
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
dposvx.f dpotrf2.f dpotrs.f dpstrf.f dpstf2.f
dposvx.f dpotrs.f dpstrf.f dpstf2.f
dppcon.f dppequ.f
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
@@ -257,7 +234,7 @@ set(DLASRC
dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
dtptrs.f
dtrcon.f dtrevc.f dtrevc3.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
dtrtrs.f DEPRECATED/dtzrqf.f dtzrzf.f dstemr.f
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
@@ -268,28 +245,20 @@ set(DLASRC
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f
dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f dgetsls.f
dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f dtplqt2.f dtpmlqt.f
dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f dsytf2_rk.f dlasyf_rk.f
dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f dsytri_3x.f dsysv_rk.f
dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f dsbevx_2stage.f
dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f dsyevx_2stage.f
dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F dsytrd_sy2sb.f dlarfy.f
)
set(ZLASRC
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
DEPRECATED/zgegs.f DEPRECATED/zgegv.f zgehd2.f zgehrd.f zgejsv.f zgelq2.f zgelqf.f
DEPRECATED/zgegs.f DEPRECATED/zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f
zgels.f zgelsd.f zgelss.f DEPRECATED/zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
DEPRECATED/zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
zgesc2.f zgesdd.f zgesvd.f zgesvdx.f zgesvj.f zgesvx.f zgetc2.f
zgetri.f zgetrf2.f
zggbak.f zggbal.f zgges.f zgges3.f zggesx.f zggev.f zggev3.f zggevx.f zggglm.f
zgghd3.f zgghrd.f zgglse.f zggqrf.f zggrqf.f
DEPRECATED/zggsvd.f zggsvd3.f DEPRECATED/zggsvp.f zggsvp3.f
zgsvj0.f zgsvj1.f
zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f
zgetri.f
zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f
zgghrd.f zgglse.f zggqrf.f zggrqf.f
DEPRECATED/zggsvd.f DEPRECATED/zggsvp.f
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
@@ -318,28 +287,28 @@ set(ZLASRC
zlarfg.f zlarft.f zlarfgp.f
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
zlassq.f zlasyf.f zlasyf_rook.f zlasyf_aa.f
zlassq.f zlasyf.f zlasyf_rook.f
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f DEPRECATED/zlatzm.f
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
zposv.f zposvx.f zpotrf2.f zpotrs.f zpstrf.f zpstf2.f
zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
zrot.f zspcon.f zsprfs.f zspsv.f
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
zstegr.f zstein.f zsteqr.f
zsycon.f zsysv_aa.f
zsycon.f
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f
zsyswapr.f zsytrs.f zsytrs_aa.f zsytrs2.f zsyconv.f
zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f
zsytri_rook.f zsycon_rook.f zsysv_rook.f
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
ztprfs.f ztptri.f
ztptrs.f ztrcon.f ztrevc.f ztrevc3.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
ztrsyl.f ztrtrs.f DEPRECATED/ztzrqf.f ztzrzf.f zung2l.f
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunm22.f zunml2.f
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
zunmtr.f zupgtr.f
zupmtr.f izmax1.f dzsum1.f zstemr.f
@@ -351,15 +320,6 @@ set(ZLASRC
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f
zgelq.f zgelqt.f zgelqt3.f zgemlq.f zgemlqt.f zgemqr.f zgeqr.f zgetsls.f
zlamswlq.f zlamtsqr.f zlaswlq.f zlatsqr.f ztplqt.f ztplqt2.f ztpmlqt.f
zhesv_aa.f zhetrf_aa.f zhetrs_aa.f zlahef_aa.f zsytf2_rk.f zlasyf_rk.f
zsytrf_aa.f zsytrf_rk.f zsytrs_3.f zsycon_3.f zsytri_3.f zsytri_3x.f zsysv_rk.f
zhetf2_rk.f zlahef_rk.f zhetrf_rk.f zhetrs_3.f zhecon_3.f zhetri_3.f
zhetri_3x.f zhesv_rk.f zhb2st_kernels.f zhbev_2stage.f zhbevd_2stage.f
zhbevx_2stage.f zheev_2stage.f zheevd_2stage.f zheevr_2stage.f
zheevx_2stage.f zhegv_2stage.f zhetrd_2stage.f zhetrd_hb2st.F zhetrd_he2hb.f
zlarfy.f
)
set(LA_REL_SRC ${ALLAUX})

File diff suppressed because it is too large Load Diff

View File

@@ -1,9 +0,0 @@
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
Name: OpenBLAS
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
Version: @OPENBLAS_VERSION@
URL: https://github.com/xianyi/OpenBLAS
Libs: -L${libdir} -lopenblas
Cflags: -I${includedir}

View File

@@ -77,7 +77,7 @@ if (CYGWIN)
set(NO_EXPRECISION 1)
endif ()
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Android")
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
if (SMP)
set(EXTRALIB "${EXTRALIB} -lpthread")
endif ()

View File

@@ -4,8 +4,7 @@
## This is triggered by system.cmake and runs before any of the code is built.
## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files).
## Next it runs f_check and appends some fortran information to the files.
## Then it runs getarch and getarch_2nd for even more environment information.
## Finally it builds gen_config_h for use at build time to generate config.h.
## Finally it runs getarch and getarch_2nd for even more environment information.
# CMake vars set by this file:
# CORE
@@ -51,20 +50,20 @@ else()
set(TARGET_CONF "config.h")
endif ()
include("${PROJECT_SOURCE_DIR}/cmake/c_check.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake")
if (NOT NOFORTRAN)
include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake")
endif ()
# compile getarch
set(GETARCH_SRC
${PROJECT_SOURCE_DIR}/getarch.c
${CMAKE_SOURCE_DIR}/getarch.c
${CPUIDEMO}
)
if (NOT MSVC)
list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
endif ()
if (MSVC)
@@ -72,26 +71,16 @@ if (MSVC)
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
endif()
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
# disable WindowsStore strict CRT checks
set(GETARCH_FLAGS ${GETARCH_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
endif ()
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH_DIR})
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
try_compile(GETARCH_RESULT ${GETARCH_DIR}
SOURCES ${GETARCH_SRC}
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
)
try_compile(GETARCH_RESULT ${GETARCH_DIR}
SOURCES ${GETARCH_SRC}
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
)
if (NOT ${GETARCH_RESULT})
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
endif ()
endif ()
message(STATUS "Running getarch")
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
@@ -107,18 +96,12 @@ ParseGetArchVars(${GETARCH_MAKE_OUT})
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH2_DIR})
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH2_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
)
if (NOT ${GETARCH2_RESULT})
MESSAGE(FATAL_ERROR "Compiling getarch_2nd failed ${GETARCH2_LOG}")
endif ()
endif ()
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH2_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
)
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
@@ -128,21 +111,3 @@ execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE
file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT})
ParseGetArchVars(${GETARCH2_MAKE_OUT})
# compile get_config_h
set(GEN_CONFIG_H_DIR "${PROJECT_BINARY_DIR}/genconfig_h_build")
set(GEN_CONFIG_H_BIN "gen_config_h${CMAKE_EXECUTABLE_SUFFIX}")
set(GEN_CONFIG_H_FLAGS "-DVERSION=\"${OpenBLAS_VERSION}\"")
file(MAKE_DIRECTORY ${GEN_CONFIG_H_DIR})
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
try_compile(GEN_CONFIG_H_RESULT ${GEN_CONFIG_H_DIR}
SOURCES ${PROJECT_SOURCE_DIR}/gen_config_h.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GEN_CONFIG_H_FLAGS} -I${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE GEN_CONFIG_H_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GEN_CONFIG_H_BIN}
)
if (NOT ${GEN_CONFIG_H_RESULT})
MESSAGE(FATAL_ERROR "Compiling gen_config_h failed ${GEN_CONFIG_H_LOG}")
endif ()
endif ()

View File

@@ -3,7 +3,7 @@
## Description: Ported from OpenBLAS/Makefile.system
##
set(NETLIB_LAPACK_DIR "${PROJECT_SOURCE_DIR}/lapack-netlib")
set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib")
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa
# http://stackoverflow.com/questions/714100/os-detecting-makefile
@@ -22,7 +22,7 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
set(TARGET "NEHALEM")
endif ()
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER")
set(TARGET "BARCELONA")
endif ()
endif ()
@@ -78,7 +78,7 @@ else ()
set(ONLY_CBLAS 0)
endif ()
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake")
if (NOT DEFINED NUM_THREADS)
set(NUM_THREADS ${NUM_CORES})
@@ -124,17 +124,17 @@ set(OBJCOPY "${CROSS_SUFFIX}objcopy")
set(OBJCONV "${CROSS_SUFFIX}objconv")
# OS dependent settings
include("${PROJECT_SOURCE_DIR}/cmake/os.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/os.cmake")
# Architecture dependent settings
include("${PROJECT_SOURCE_DIR}/cmake/arch.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake")
# C Compiler dependent settings
include("${PROJECT_SOURCE_DIR}/cmake/cc.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake")
if (NOT NOFORTRAN)
# Fortran Compiler dependent settings
include("${PROJECT_SOURCE_DIR}/cmake/fc.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake")
endif ()
if (BINARY64)
@@ -247,10 +247,10 @@ if (NOT DEFINED SYMBOLSUFFIX)
set(SYMBOLSUFFIX "")
endif ()
set(KERNELDIR "${PROJECT_SOURCE_DIR}/kernel/${ARCH}")
set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}")
# TODO: nead to convert these Makefiles
# include ${PROJECT_SOURCE_DIR}/cmake/${ARCH}.cmake
# include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake
if (${CORE} STREQUAL "PPC440")
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC")
@@ -312,8 +312,6 @@ endif ()
set(AWK awk)
set(SED sed)
set(REVISION "-r${OpenBLAS_VERSION}")
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
@@ -412,8 +410,8 @@ set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def")
set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp")
set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip")
set(LIBS "${PROJECT_SOURCE_DIR}/${LIBNAME}")
set(LIBS_P "${PROJECT_SOURCE_DIR}/${LIBNAME_P}")
set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}")
set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}")
set(LIB_COMPONENTS BLAS)

View File

@@ -332,13 +332,6 @@ typedef int blasint;
#endif
#endif
#ifdef POWER8
#ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
#endif
#endif
/*
#ifdef PILEDRIVER
#ifndef YIELDING
@@ -404,10 +397,6 @@ please https://github.com/xianyi/OpenBLAS/issues/246
#include "common_sparc.h"
#endif
#ifdef ARCH_MIPS
#include "common_mips.h"
#endif
#ifdef ARCH_MIPS64
#include "common_mips64.h"
#endif
@@ -420,15 +409,7 @@ please https://github.com/xianyi/OpenBLAS/issues/246
#include "common_arm64.h"
#endif
#ifdef ARCH_ZARCH
#include "common_zarch.h"
#endif
#ifndef ASSEMBLER
#ifdef OS_WINDOWSSTORE
typedef char env_var_t[MAX_PATH];
#define readenv(p, n) 0
#else
#ifdef OS_WINDOWS
typedef char env_var_t[MAX_PATH];
#define readenv(p, n) GetEnvironmentVariable((LPCTSTR)(n), (LPTSTR)(p), sizeof(p))
@@ -436,7 +417,6 @@ typedef char env_var_t[MAX_PATH];
typedef char* env_var_t;
#define readenv(p, n) ((p)=getenv(n))
#endif
#endif
#if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS)
#ifdef _POSIX_MONOTONIC_CLOCK
@@ -561,13 +541,8 @@ static void __inline blas_lock(volatile BLASULONG *address){
#endif
#if defined(C_PGI) || defined(C_SUN)
#if defined(__STDC_IEC_559_COMPLEX__)
#define CREAL(X) creal(X)
#define CIMAG(X) cimag(X)
#else
#define CREAL(X) (*((FLOAT *)&X + 0))
#define CIMAG(X) (*((FLOAT *)&X + 1))
#endif
#define CREAL(X) (*((FLOAT *)&X + 0))
#define CIMAG(X) (*((FLOAT *)&X + 1))
#else
#ifdef OPENBLAS_COMPLEX_STRUCT
#define CREAL(Z) ((Z).real)
@@ -640,14 +615,9 @@ void gotoblas_profile_init(void);
void gotoblas_profile_quit(void);
#ifdef USE_OPENMP
#ifndef C_MSVC
int omp_in_parallel(void);
int omp_get_num_procs(void);
#else
__declspec(dllimport) int __cdecl omp_in_parallel(void);
__declspec(dllimport) int __cdecl omp_get_num_procs(void);
#endif
#else
#ifdef __ELF__
int omp_in_parallel (void) __attribute__ ((weak));
int omp_get_num_procs(void) __attribute__ ((weak));
@@ -659,11 +629,7 @@ static __inline void blas_unlock(volatile BLASULONG *address){
*address = 0;
}
#ifdef OS_WINDOWSSTORE
static __inline int readenv_atoi(char *env) {
return 0;
}
#else
#ifdef OS_WINDOWS
static __inline int readenv_atoi(char *env) {
env_var_t p;
@@ -678,7 +644,7 @@ static __inline int readenv_atoi(char *env) {
return(0);
}
#endif
#endif
#if !defined(XDOUBLE) || !defined(QUAD_PRECISION)

View File

@@ -105,6 +105,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
#define PROLOGUE \
.arm ;\
.global REALNAME ;\
.func REALNAME ;\
REALNAME:
#define EPILOGUE

View File

@@ -39,11 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define INLINE inline
#ifdef F_INTERFACE_FLANG
#define RETURN_BY_STACK
#else
#define RETURN_BY_COMPLEX
#endif
#ifndef ASSEMBLER

View File

@@ -70,7 +70,7 @@ extern long int syscall (long int __sysno, ...);
static inline int my_mbind(void *addr, unsigned long len, int mode,
unsigned long *nodemask, unsigned long maxnode,
unsigned flags) {
#if defined (__LSB_VERSION__) || defined(ARCH_ZARCH)
#if defined (__LSB_VERSION__)
// So far, LSB (Linux Standard Base) don't support syscall().
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
return 0;
@@ -90,7 +90,7 @@ static inline int my_mbind(void *addr, unsigned long len, int mode,
}
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {
#if defined (__LSB_VERSION__) || defined(ARCH_ZARCH)
#if defined (__LSB_VERSION__)
// So far, LSB (Linux Standard Base) don't support syscall().
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
return 0;

View File

@@ -2193,7 +2193,7 @@
#endif
#ifndef ASSEMBLER
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
extern BLASLONG gemm_offset_a;
extern BLASLONG gemm_offset_b;
extern BLASLONG sgemm_p;

View File

@@ -1,103 +0,0 @@
/*****************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#ifndef COMMON_MIPS
#define COMMON_MIPS
#define MB __sync_synchronize()
#define WMB __sync_synchronize()
#define INLINE inline
#define RETURN_BY_COMPLEX
#ifndef ASSEMBLER
static inline unsigned int rpcc(void){
unsigned long ret;
__asm__ __volatile__(".set push \n"
"rdhwr %0, $30 \n"
".set pop" : "=r"(ret) : : "memory");
return ret;
}
#define RPCC_DEFINED
static inline int blas_quickdivide(blasint x, blasint y){
return x / y;
}
#define GET_IMAGE(res)
#define GET_IMAGE_CANCEL
#endif
#ifndef F_INTERFACE
#define REALNAME ASMNAME
#else
#define REALNAME ASMFNAME
#endif
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
#define PROLOGUE \
.arm ;\
.global REALNAME ;\
REALNAME:
#define EPILOGUE
#define PROFCODE
#endif
#define SEEK_ADDRESS
#ifndef PAGESIZE
#define PAGESIZE ( 4 << 10)
#endif
#define HUGE_PAGESIZE ( 4 << 20)
#define BUFFER_SIZE (16 << 20)
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif

View File

@@ -71,16 +71,38 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef COMMON_MIPS64
#define COMMON_MIPS64
#define MB __sync_synchronize()
#define WMB __sync_synchronize()
#define MB
#define WMB
#define INLINE inline
#ifndef ASSEMBLER
static void INLINE blas_lock(volatile unsigned long *address){
long int ret, val = 1;
do {
while (*address) {YIELDING;};
__asm__ __volatile__(
"1: ll %0, %3\n"
" ori %2, %0, 1\n"
" sc %2, %1\n"
" beqz %2, 1b\n"
" andi %2, %0, 1\n"
" sync\n"
: "=&r" (val), "=m" (address), "=&r" (ret)
: "m" (address)
: "memory");
} while (ret);
}
#define BLAS_LOCK_DEFINED
static inline unsigned int rpcc(void){
unsigned long ret;
#if defined(LOONGSON3A) || defined(LOONGSON3B)
// unsigned long long tmp;
//__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
//ret=tmp;
@@ -89,10 +111,17 @@ static inline unsigned int rpcc(void){
"rdhwr %0, $2\n"
".set pop": "=r"(ret):: "memory");
#else
__asm__ __volatile__(".set push \n"
".set mips32r2\n"
"rdhwr %0, $30 \n"
".set pop" : "=r"(ret) : : "memory");
#endif
return ret;
}
#define RPCC_DEFINED
#if defined(LOONGSON3A) || defined(LOONGSON3B)
#ifndef NO_AFFINITY
#define WHEREAMI
static inline int WhereAmI(void){
@@ -105,6 +134,7 @@ static inline int WhereAmI(void){
}
#endif
#endif
static inline int blas_quickdivide(blasint x, blasint y){
return x / y;

View File

@@ -39,13 +39,8 @@
#ifndef COMMON_POWER
#define COMMON_POWER
#if defined(POWER8)
#define MB __asm__ __volatile__ ("eieio":::"memory")
#define WMB __asm__ __volatile__ ("eieio":::"memory")
#else
#define MB __asm__ __volatile__ ("sync")
#define WMB __asm__ __volatile__ ("sync")
#endif
#define INLINE inline
@@ -803,7 +798,7 @@ Lmcount$lazy_ptr:
#elif defined(PPC440FP2)
#define BUFFER_SIZE ( 16 << 20)
#elif defined(POWER8)
#define BUFFER_SIZE ( 64 << 20)
#define BUFFER_SIZE ( 32 << 20)
#else
#define BUFFER_SIZE ( 16 << 20)
#endif

View File

@@ -245,10 +245,6 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
#define RETURN_BY_STACK
#endif
#ifdef F_INTERFACE_FLANG
#define RETURN_BY_STACK
#endif
#ifdef F_INTERFACE_PGI
#define RETURN_BY_STACK
#endif

View File

@@ -1,140 +0,0 @@
/*****************************************************************************
Copyright (c) 2011-2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#ifndef COMMON_ZARCH
#define COMMON_ZARCH
#define MB
//__asm__ __volatile__ ("dmb ish" : : : "memory")
#define WMB
//__asm__ __volatile__ ("dmb ishst" : : : "memory")
#define INLINE inline
#define RETURN_BY_COMPLEX
#ifndef ASSEMBLER
/*
static void __inline blas_lock(volatile BLASULONG *address){
BLASULONG ret;
do {
while (*address) {YIELDING;};
__asm__ __volatile__(
"mov x4, #1 \n\t"
"1: \n\t"
"ldaxr x2, [%1] \n\t"
"cbnz x2, 1b \n\t"
"2: \n\t"
"stxr w3, x4, [%1] \n\t"
"cbnz w3, 1b \n\t"
"mov %0, #0 \n\t"
: "=r"(ret), "=r"(address)
: "1"(address)
: "memory", "x2" , "x3", "x4"
);
} while (ret);
}
*/
//#define BLAS_LOCK_DEFINED
static inline int blas_quickdivide(blasint x, blasint y){
return x / y;
}
#if defined(DOUBLE)
#define GET_IMAGE(res) __asm__ __volatile__("str d1, %0" : "=m"(res) : : "memory")
#else
#define GET_IMAGE(res) __asm__ __volatile__("str s1, %0" : "=m"(res) : : "memory")
#endif
#define GET_IMAGE_CANCEL
#endif
#ifndef F_INTERFACE
#define REALNAME ASMNAME
#else
#define REALNAME ASMFNAME
#endif
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
#define PROLOGUE \
.text ;\
.align 256 ;\
.global REALNAME ;\
.type REALNAME, %function ;\
REALNAME:
#define EPILOGUE
#define PROFCODE
#endif
#define SEEK_ADDRESS
#ifndef PAGESIZE
#define PAGESIZE ( 4 << 10)
#endif
#define HUGE_PAGESIZE ( 4 << 20)
#if defined(CORTEXA57)
#define BUFFER_SIZE (20 << 20)
#else
#define BUFFER_SIZE (16 << 20)
#endif
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif

View File

@@ -114,7 +114,6 @@
#define CORE_HASWELL 24
#define CORE_STEAMROLLER 25
#define CORE_EXCAVATOR 26
#define CORE_ZEN 27
#define HAVE_SSE (1 << 0)
#define HAVE_SSE2 (1 << 1)
@@ -210,6 +209,5 @@ typedef struct {
#define CPUTYPE_HASWELL 48
#define CPUTYPE_STEAMROLLER 49
#define CPUTYPE_EXCAVATOR 50
#define CPUTYPE_ZEN 51
#endif

View File

@@ -74,7 +74,7 @@ int get_feature(char *search)
fclose(infile);
if( p == NULL ) return 0;
if( p == NULL ) return;
t = strtok(p," ");
while( t = strtok(NULL," "))

View File

@@ -30,26 +30,17 @@
#define CPU_UNKNOWN 0
#define CPU_ARMV8 1
#define CPU_CORTEXA57 2
#define CPU_VULCAN 3
#define CPU_THUNDERX 4
#define CPU_THUNDERX2T99 5
static char *cpuname[] = {
"UNKNOWN",
"ARMV8" ,
"CORTEXA57",
"VULCAN",
"THUNDERX",
"THUNDERX2T99"
"CORTEXA57"
};
static char *cpuname_lower[] = {
"unknown",
"armv8" ,
"cortexa57",
"vulcan",
"thunderx",
"thunderx2t99"
"cortexa57"
};
int get_feature(char *search)
@@ -94,34 +85,25 @@ int detect(void)
#ifdef linux
FILE *infile;
char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL;
p = (char *) NULL ;
char buffer[512], *p;
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile)) {
if ((cpu_part != NULL) && (cpu_implementer != NULL)) {
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile))
{
if (!strncmp("CPU part", buffer, 8))
{
p = strchr(buffer, ':') + 2;
break;
}
if ((cpu_part == NULL) && !strncmp("CPU part", buffer, 8)) {
cpu_part = strchr(buffer, ':') + 2;
cpu_part = strdup(cpu_part);
} else if ((cpu_implementer == NULL) && !strncmp("CPU implementer", buffer, 15)) {
cpu_implementer = strchr(buffer, ':') + 2;
cpu_implementer = strdup(cpu_implementer);
}
}
fclose(infile);
if(cpu_part != NULL && cpu_implementer != NULL) {
if (strstr(cpu_part, "0xd07") && strstr(cpu_implementer, "0x41"))
return CPU_CORTEXA57;
else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42"))
return CPU_VULCAN;
else if (strstr(cpu_part, "0x0a1") && strstr(cpu_implementer, "0x43"))
return CPU_THUNDERX;
else if (strstr(cpu_part, "0xFFF") && strstr(cpu_implementer, "0x43")) /* TODO */
return CPU_THUNDERX2T99;
if(p != NULL) {
if (strstr(p, "0xd07")) {
return CPU_CORTEXA57;
}
}
p = (char *) NULL ;
@@ -194,28 +176,6 @@ void get_cpuconfig(void)
printf("#define L2_ASSOCIATIVE 4\n");
break;
case CPU_VULCAN:
printf("#define VULCAN \n");
printf("#define HAVE_VFP \n");
printf("#define HAVE_VFPV3 \n");
printf("#define HAVE_NEON \n");
printf("#define HAVE_VFPV4 \n");
printf("#define L1_CODE_SIZE 32768 \n");
printf("#define L1_CODE_LINESIZE 64 \n");
printf("#define L1_CODE_ASSOCIATIVE 8 \n");
printf("#define L1_DATA_SIZE 32768 \n");
printf("#define L1_DATA_LINESIZE 64 \n");
printf("#define L1_DATA_ASSOCIATIVE 8 \n");
printf("#define L2_SIZE 262144 \n");
printf("#define L2_LINESIZE 64 \n");
printf("#define L2_ASSOCIATIVE 8 \n");
printf("#define L3_SIZE 33554432 \n");
printf("#define L3_LINESIZE 64 \n");
printf("#define L3_ASSOCIATIVE 32 \n");
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n");
break;
case CPU_CORTEXA57:
printf("#define CORTEXA57\n");
printf("#define HAVE_VFP\n");
@@ -231,42 +191,8 @@ void get_cpuconfig(void)
printf("#define L2_SIZE 2097152\n");
printf("#define L2_LINESIZE 64\n");
printf("#define L2_ASSOCIATIVE 16\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;
case CPU_THUNDERX:
printf("#define ARMV8\n");
printf("#define THUNDERX\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 128\n");
printf("#define L2_SIZE 16777216\n");
printf("#define L2_LINESIZE 128\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 16\n");
break;
case CPU_THUNDERX2T99:
printf("#define VULCAN \n");
printf("#define HAVE_VFP \n");
printf("#define HAVE_VFPV3 \n");
printf("#define HAVE_NEON \n");
printf("#define HAVE_VFPV4 \n");
printf("#define L1_CODE_SIZE 32768 \n");
printf("#define L1_CODE_LINESIZE 64 \n");
printf("#define L1_CODE_ASSOCIATIVE 8 \n");
printf("#define L1_DATA_SIZE 32768 \n");
printf("#define L1_DATA_LINESIZE 64 \n");
printf("#define L1_DATA_ASSOCIATIVE 8 \n");
printf("#define L2_SIZE 262144 \n");
printf("#define L2_LINESIZE 64 \n");
printf("#define L2_ASSOCIATIVE 8 \n");
printf("#define L3_SIZE 33554432 \n");
printf("#define L3_LINESIZE 64 \n");
printf("#define L3_ASSOCIATIVE 32 \n");
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;
}
}

View File

@@ -71,11 +71,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*********************************************************************/
#define CPU_UNKNOWN 0
#define CPU_P5600 1
#define CPU_SICORTEX 1
#define CPU_LOONGSON3A 2
#define CPU_LOONGSON3B 3
static char *cpuname[] = {
"UNKOWN",
"P5600"
"SICORTEX",
"LOONGSON3A",
"LOONGSON3B"
};
int detect(void){
@@ -116,7 +120,7 @@ int detect(void){
if (strstr(p, "loongson3a"))
return CPU_LOONGSON3A;
}else{
return CPU_UNKNOWN;
return CPU_SICORTEX;
}
}
//Check model name for Loongson3
@@ -145,40 +149,64 @@ char *get_corename(void){
}
void get_architecture(void){
printf("MIPS");
printf("MIPS64");
}
void get_subarchitecture(void){
if(detect()==CPU_P5600){
printf("P5600");
if(detect()==CPU_LOONGSON3A) {
printf("LOONGSON3A");
}else if(detect()==CPU_LOONGSON3B){
printf("LOONGSON3B");
}else{
printf("UNKNOWN");
printf("SICORTEX");
}
}
void get_subdirname(void){
printf("mips");
printf("mips64");
}
void get_cpuconfig(void){
if(detect()==CPU_P5600){
printf("#define P5600\n");
if(detect()==CPU_LOONGSON3A) {
printf("#define LOONGSON3A\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 1048576\n");
printf("#define L2_SIZE 512488\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 8\n");
printf("#define L2_ASSOCIATIVE 4\n");
}else if(detect()==CPU_LOONGSON3B){
printf("#define LOONGSON3B\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
}else{
printf("#define UNKNOWN\n");
printf("#define SICORTEX\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 32\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 8\n");
}
}
void get_libname(void){
if(detect()==CPU_P5600) {
printf("p5600\n");
if(detect()==CPU_LOONGSON3A) {
printf("loongson3a\n");
}else if(detect()==CPU_LOONGSON3B) {
printf("loongson3b\n");
}else{
printf("mips\n");
#ifdef __mips64
printf("mips64\n");
#else
printf("mips32\n");
#endif
}
}

View File

@@ -1,238 +0,0 @@
/*****************************************************************************
Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define CPU_UNKNOWN 0
#define CPU_SICORTEX 1
#define CPU_LOONGSON3A 2
#define CPU_LOONGSON3B 3
#define CPU_I6400 4
#define CPU_P6600 5
static char *cpuname[] = {
"UNKOWN",
"SICORTEX",
"LOONGSON3A",
"LOONGSON3B",
"I6400",
"P6600"
};
int detect(void){
#ifdef linux
FILE *infile;
char buffer[512], *p;
p = (char *)NULL;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("cpu", buffer, 3)){
p = strchr(buffer, ':') + 2;
#if 0
fprintf(stderr, "%s\n", p);
#endif
break;
}
}
fclose(infile);
if(p != NULL){
if (strstr(p, "Loongson-3A")){
return CPU_LOONGSON3A;
}else if(strstr(p, "Loongson-3B")){
return CPU_LOONGSON3B;
}else if (strstr(p, "Loongson-3")){
infile = fopen("/proc/cpuinfo", "r");
p = (char *)NULL;
while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("system type", buffer, 11)){
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if (strstr(p, "loongson3a"))
return CPU_LOONGSON3A;
}else{
return CPU_SICORTEX;
}
}
//Check model name for Loongson3
infile = fopen("/proc/cpuinfo", "r");
p = (char *)NULL;
while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("model name", buffer, 10)){
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if(p != NULL){
if (strstr(p, "Loongson-3A")){
return CPU_LOONGSON3A;
}else if(strstr(p, "Loongson-3B")){
return CPU_LOONGSON3B;
}
}
#endif
return CPU_UNKNOWN;
}
char *get_corename(void){
return cpuname[detect()];
}
void get_architecture(void){
printf("MIPS64");
}
void get_subarchitecture(void){
if(detect()==CPU_LOONGSON3A) {
printf("LOONGSON3A");
}else if(detect()==CPU_LOONGSON3B){
printf("LOONGSON3B");
}else if(detect()==CPU_I6400){
printf("I6400");
}else if(detect()==CPU_P6600){
printf("P6600");
}else{
printf("SICORTEX");
}
}
void get_subdirname(void){
printf("mips64");
}
void get_cpuconfig(void){
if(detect()==CPU_LOONGSON3A) {
printf("#define LOONGSON3A\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
}else if(detect()==CPU_LOONGSON3B){
printf("#define LOONGSON3B\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
}else if(detect()==CPU_I6400){
printf("#define I6400\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 1048576\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 8\n");
}else if(detect()==CPU_P6600){
printf("#define P6600\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 1048576\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 8\n");
}else{
printf("#define SICORTEX\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 32\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 8\n");
}
}
void get_libname(void){
if(detect()==CPU_LOONGSON3A) {
printf("loongson3a\n");
}else if(detect()==CPU_LOONGSON3B) {
printf("loongson3b\n");
}else if(detect()==CPU_I6400) {
printf("i6400\n");
}else if(detect()==CPU_P6600) {
printf("p6600\n");
}else{
printf("mips64\n");
}
}

View File

@@ -636,13 +636,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LD1.associative = 8;
LD1.linesize = 64;
break;
case 0x63 :
DTB.size = 2048;
DTB.associative = 4;
DTB.linesize = 32;
LDTB.size = 4096;
LDTB.associative= 4;
LDTB.linesize = 32;
case 0x66 :
LD1.size = 8;
LD1.associative = 4;
@@ -674,13 +667,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LC1.size = 64;
LC1.associative = 8;
break;
case 0x76 :
ITB.size = 2048;
ITB.associative = 0;
ITB.linesize = 8;
LITB.size = 4096;
LITB.associative= 0;
LITB.linesize = 8;
case 0x77 :
LC1.size = 16;
LC1.associative = 4;
@@ -1124,9 +1110,6 @@ int get_cpuname(void){
break;
case 3:
switch (model) {
case 7:
// Bay Trail
return CPUTYPE_ATOM;
case 10:
case 14:
// Ivy Bridge
@@ -1189,8 +1172,6 @@ int get_cpuname(void){
#endif
else
return CPUTYPE_NEHALEM;
case 12:
// Braswell
case 13:
// Avoton
return CPUTYPE_NEHALEM;
@@ -1219,35 +1200,8 @@ int get_cpuname(void){
#endif
else
return CPUTYPE_NEHALEM;
case 7:
// Xeon Phi Knights Landing
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
case 12:
// Apollo Lake
return CPUTYPE_NEHALEM;
}
break;
case 9:
case 8:
switch (model) {
case 14: // Kaby Lake
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
}
break;
}
break;
case 0x7:
@@ -1279,11 +1233,8 @@ int get_cpuname(void){
return CPUTYPE_OPTERON;
case 1:
case 3:
case 7:
case 10:
return CPUTYPE_BARCELONA;
case 5:
return CPUTYPE_BOBCAT;
case 6:
switch (model) {
case 1:
@@ -1298,13 +1249,7 @@ int get_cpuname(void){
return CPUTYPE_PILEDRIVER;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
case 5: // New EXCAVATOR CPUS
if(support_avx())
return CPUTYPE_EXCAVATOR;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
case 0:
case 8:
switch(exmodel){
case 1: //AMD Trinity
if(support_avx())
@@ -1326,19 +1271,8 @@ int get_cpuname(void){
break;
}
break;
case 8:
switch (model) {
case 1:
// AMD Ryzen
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_ZEN;
#else
return CPUTYPE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
#endif
else
return CPUTYPE_BARCELONA;
}
case 5:
return CPUTYPE_BOBCAT;
}
break;
}
@@ -1465,7 +1399,6 @@ static char *cpuname[] = {
"HASWELL",
"STEAMROLLER",
"EXCAVATOR",
"ZEN",
};
static char *lowercpuname[] = {
@@ -1519,7 +1452,6 @@ static char *lowercpuname[] = {
"haswell",
"steamroller",
"excavator",
"zen",
};
static char *corename[] = {
@@ -1550,7 +1482,6 @@ static char *corename[] = {
"HASWELL",
"STEAMROLLER",
"EXCAVATOR",
"ZEN",
};
static char *corename_lower[] = {
@@ -1581,7 +1512,6 @@ static char *corename_lower[] = {
"haswell",
"steamroller",
"excavator",
"zen",
};
@@ -1748,8 +1678,6 @@ int get_coretype(void){
#endif
else
return CORE_NEHALEM;
case 12:
// Braswell
case 13:
// Avoton
return CORE_NEHALEM;
@@ -1778,33 +1706,8 @@ int get_coretype(void){
#endif
else
return CORE_NEHALEM;
case 7:
// Phi Knights Landing
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
case 12:
// Apollo Lake
return CORE_NEHALEM;
}
break;
case 9:
case 8:
if (model == 14) { // Kaby Lake
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
}
break;
}
break;
@@ -1834,13 +1737,8 @@ int get_coretype(void){
return CORE_PILEDRIVER;
else
return CORE_BARCELONA; //OS don't support AVX.
case 5: // New EXCAVATOR
if(support_avx())
return CORE_EXCAVATOR;
else
return CORE_BARCELONA; //OS don't support AVX.
case 0:
case 8:
switch(exmodel){
case 1: //AMD Trinity
if(support_avx())
@@ -1862,22 +1760,9 @@ int get_coretype(void){
}
break;
}
} else if (exfamily == 8) {
switch (model) {
case 1:
// AMD Ryzen
if(support_avx())
#ifndef NO_AVX2
return CORE_ZEN;
#else
return CORE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
#endif
else
return CORE_BARCELONA;
}
} else {
return CORE_BARCELONA;
}
}else return CORE_BARCELONA;
}
}

View File

@@ -1,111 +0,0 @@
/**************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <string.h>
#define CPU_GENERIC 0
#define CPU_Z13 1
static char *cpuname[] = {
"ZARCH_GENERIC",
"Z13"
};
static char *cpuname_lower[] = {
"zarch_generic",
"z13"
};
int detect(void)
{
FILE *infile;
char buffer[512], *p;
p = (char *)NULL;
infile = fopen("/proc/sysinfo", "r");
while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("Type", buffer, 4)){
p = strchr(buffer, ':') + 2;
#if 0
fprintf(stderr, "%s\n", p);
#endif
break;
}
}
fclose(infile);
if (strstr(p, "2964")) return CPU_Z13;
if (strstr(p, "2965")) return CPU_Z13;
return CPU_GENERIC;
}
void get_libname(void)
{
int d = detect();
printf("%s", cpuname_lower[d]);
}
char *get_corename(void)
{
return cpuname[detect()];
}
void get_architecture(void)
{
printf("ZARCH");
}
void get_subarchitecture(void)
{
int d = detect();
printf("%s", cpuname[d]);
}
void get_subdirname(void)
{
printf("zarch");
}
void get_cpuconfig(void)
{
int d = detect();
switch (d){
case CPU_GENERIC:
printf("#define ZARCH_GENERIC\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
break;
case CPU_Z13:
printf("#define Z13\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
break;
}
}

View File

@@ -105,16 +105,12 @@ ARCH_X86_64
ARCH_POWER
#endif
#if defined(__s390x__) || defined(__zarch__)
ARCH_ZARCH
#endif
#ifdef __mips64
ARCH_MIPS64
#endif
#if defined(__mips32) || defined(__mips)
ARCH_MIPS
ARCH_MIPS32
#endif
#ifdef __alpha

View File

@@ -1,4 +1,4 @@
include_directories(${PROJECT_SOURCE_DIR})
include_directories(${CMAKE_SOURCE_DIR})
enable_language(Fortran)

View File

@@ -42,7 +42,6 @@ ztestl3o_3m = c_zblas3_3m.o c_z3chke_3m.o auxiliary.o c_xerbla.o constant.o
all :: all1 all2 all3
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
ifndef CROSS
ifeq ($(USE_OPENMP), 1)
OMP_NUM_THREADS=2 ./xscblat1
OMP_NUM_THREADS=2 ./xdcblat1
@@ -54,10 +53,8 @@ else
OPENBLAS_NUM_THREADS=2 ./xccblat1
OPENBLAS_NUM_THREADS=2 ./xzcblat1
endif
endif
all2: xscblat2 xdcblat2 xccblat2 xzcblat2
ifndef CROSS
ifeq ($(USE_OPENMP), 1)
OMP_NUM_THREADS=2 ./xscblat2 < sin2
OMP_NUM_THREADS=2 ./xdcblat2 < din2
@@ -69,10 +66,8 @@ else
OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2
OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2
endif
endif
all3: xscblat3 xdcblat3 xccblat3 xzcblat3
ifndef CROSS
ifeq ($(USE_OPENMP), 1)
OMP_NUM_THREADS=2 ./xscblat3 < sin3
OMP_NUM_THREADS=2 ./xdcblat3 < din3
@@ -93,7 +88,6 @@ else
OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
endif
endif

View File

@@ -1,5 +1,5 @@
include_directories(${PROJECT_SOURCE_DIR})
include_directories(${CMAKE_SOURCE_DIR})
# sources that need to be compiled twice, once with no flags and once with LOWER
set(UL_SOURCES

View File

@@ -1,4 +1,4 @@
include_directories(${PROJECT_SOURCE_DIR})
include_directories(${CMAKE_SOURCE_DIR})
# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa

View File

@@ -316,7 +316,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
if (min_l > GEMM3M_Q) {
min_l = (min_l + 1) / 2;
#ifdef UNROLL_X
min_l = ((min_l + UNROLL_X - 1)/UNROLL_X) * UNROLL_X;
min_l = (min_l + UNROLL_X - 1) & ~(UNROLL_X - 1);
#endif
}
}
@@ -326,7 +326,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P;
} else {
if (min_i > GEMM3M_P) {
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
}
@@ -365,7 +365,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P;
} else
if (min_i > GEMM3M_P) {
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
START_RPCC();
@@ -386,7 +386,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P;
} else {
if (min_i > GEMM3M_P) {
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
}
@@ -429,7 +429,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P;
} else
if (min_i > GEMM3M_P) {
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
START_RPCC();
@@ -451,7 +451,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P;
} else {
if (min_i > GEMM3M_P) {
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
}
@@ -494,7 +494,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P;
} else
if (min_i > GEMM3M_P) {
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
START_RPCC();

View File

@@ -297,9 +297,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_l = GEMM_Q;
} else {
if (min_l > GEMM_Q) {
min_l = ((min_l / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
min_l = (min_l / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
}
gemm_p = ((l2size / min_l + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
gemm_p = ((l2size / min_l + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1));
while (gemm_p * min_l > l2size) gemm_p -= GEMM_UNROLL_M;
}
@@ -311,7 +311,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM_P;
} else {
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
} else {
l1stride = 0;
}
@@ -369,7 +369,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
}
START_RPCC();

View File

@@ -365,7 +365,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
buffer[0] = sb;
for (i = 1; i < DIVIDE_RATE; i++) {
buffer[i] = buffer[i - 1] + GEMM3M_Q * (((div_n + GEMM3M_UNROLL_N - 1)/GEMM3M_UNROLL_N) * GEMM3M_UNROLL_N);
buffer[i] = buffer[i - 1] + GEMM3M_Q * ((div_n + GEMM3M_UNROLL_N - 1) & ~(GEMM3M_UNROLL_N - 1));
}
for(ls = 0; ls < k; ls += min_l){
@@ -384,7 +384,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P;
} else {
if (min_i > GEMM3M_P) {
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
}
@@ -482,7 +482,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P;
} else
if (min_i > GEMM3M_P) {
min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
START_RPCC();
@@ -618,7 +618,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P;
} else
if (min_i > GEMM3M_P) {
min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
START_RPCC();
@@ -754,7 +754,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P;
} else
if (min_i > GEMM3M_P) {
min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
START_RPCC();

View File

@@ -189,7 +189,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
#ifndef LOWER
@@ -230,7 +230,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
@@ -245,7 +245,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
if (m_start >= js) {
@@ -284,7 +284,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
ICOPY_OPERATION(min_l, min_i, b, ldb, ls, is, sa);
@@ -322,7 +322,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
aa = sb + min_l * (is - js) * COMPSIZE;
@@ -353,7 +353,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
aa = sb + min_l * (m_start - js) * COMPSIZE;
@@ -383,7 +383,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
aa = sb + min_l * (is - js) * COMPSIZE;

View File

@@ -198,7 +198,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
#ifndef LOWER
@@ -239,7 +239,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
aa = sb + min_l * (is - js) * COMPSIZE;
@@ -303,7 +303,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
START_RPCC();
@@ -375,7 +375,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
if (is < js + min_j) {
@@ -460,7 +460,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
START_RPCC();

View File

@@ -210,7 +210,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld\n", mypos, m_from, m_to, n_from, n_to);
#endif
div_n = (((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
buffer[0] = sb;
for (i = 1; i < DIVIDE_RATE; i++) {
@@ -232,7 +233,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM_P;
} else {
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
}
@@ -252,7 +253,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
STOP_RPCC(copy_A);
div_n = (((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
for (xxx = m_from, bufferside = 0; xxx < m_to; xxx += div_n, bufferside ++) {
@@ -351,8 +353,9 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
while (current >= 0) {
#endif
div_n = (((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
START_RPCC();
@@ -409,7 +412,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = (((min_i + 1) / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
min_i = ((min_i + 1) / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
START_RPCC();
@@ -422,7 +425,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
do {
div_n = (((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
@@ -598,9 +602,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
double di = (double)i;
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1) );
width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask);
if (num_cpu == 0) width = n - (((n - width)/(mask+1)) * (mask+1) );
if (num_cpu == 0) width = n - ((n - width) & ~mask);
if ((width > n - i) || (width < mask)) width = n - i;
@@ -640,7 +644,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
double di = (double)i;
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1));
width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask);
if ((width > n - i) || (width < mask)) width = n - i;

View File

@@ -310,7 +310,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
buffer[0] = sb;
for (i = 1; i < DIVIDE_RATE; i++) {
buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N * COMPSIZE;
buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1)) * COMPSIZE;
}
@@ -331,7 +331,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM_P;
} else {
if (min_i > GEMM_P) {
min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
} else {
if (args -> nthreads == 1) l1stride = 0;
}
@@ -443,7 +443,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM_P;
} else
if (min_i > GEMM_P) {
min_i = (((min_i + 1) / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
min_i = ((min_i + 1) / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
}
START_RPCC();

View File

@@ -158,7 +158,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
int mm, nn;
mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
mm = (loop & ~(GEMM_UNROLL_MN - 1));
nn = MIN(GEMM_UNROLL_MN, n - loop);
#ifndef LOWER

View File

@@ -109,7 +109,7 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
if (nthreads - num_cpu > 1) {
di = (double)i;
width = (BLASLONG)(( sqrt(di * di + dnum) - di + mask)/(mask+1)) * (mask+1);
width = ((BLASLONG)( sqrt(di * di + dnum) - di) + mask) & ~mask;
if ((width <= 0) || (width > n_to - i)) width = n_to - i;
@@ -149,7 +149,7 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
if (nthreads - num_cpu > 1) {
di = (double)(arg -> n - i);
width = ((BLASLONG)((-sqrt(di * di + dnum) + di) + mask)/(mask+1)) * (mask+1);
width = ((BLASLONG)(-sqrt(di * di + dnum) + di) + mask) & ~mask;
if ((width <= 0) || (width > n_to - i)) width = n_to - i;

View File

@@ -149,7 +149,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
int mm, nn;
mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
mm = (loop & ~(GEMM_UNROLL_MN - 1));
nn = MIN(GEMM_UNROLL_MN, n - loop);
#ifndef LOWER

View File

@@ -132,7 +132,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
int mm, nn;
mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
mm = (loop & ~(GEMM_UNROLL_MN - 1));
nn = MIN(GEMM_UNROLL_MN, n - loop);
#ifndef LOWER

View File

@@ -1,4 +1,4 @@
include_directories(${PROJECT_SOURCE_DIR})
include_directories(${CMAKE_SOURCE_DIR})
if (${CORE} STREQUAL "PPC440")
set(MEMORY memory_qalloc.c)
@@ -12,8 +12,6 @@ if (SMP)
set(BLAS_SERVER blas_server_omp.c)
elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(BLAS_SERVER blas_server_win32.c)
elseif (${CMAKE_SYSTEM_NAME} STREQUAL "WindowsStore")
set(BLAS_SERVER blas_server_win32.c)
endif ()
if (NOT DEFINED BLAS_SERVER)

View File

@@ -110,74 +110,3 @@ int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha
return 0;
}
int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
void *a, BLASLONG lda,
void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int nthreads){
blas_queue_t queue[MAX_CPU_NUMBER];
blas_arg_t args [MAX_CPU_NUMBER];
BLASLONG i, width, astride, bstride;
int num_cpu, calc_type;
calc_type = (mode & BLAS_PREC) + ((mode & BLAS_COMPLEX) != 0) + 2;
mode |= BLAS_LEGACY;
for (i = 0; i < nthreads; i++) blas_queue_init(&queue[i]);
num_cpu = 0;
i = m;
while (i > 0){
/* Adjust Parameters */
width = blas_quickdivide(i + nthreads - num_cpu - 1,
nthreads - num_cpu);
i -= width;
if (i < 0) width = width + i;
astride = width * lda;
if (!(mode & BLAS_TRANSB_T)) {
bstride = width * ldb;
} else {
bstride = width;
}
astride <<= calc_type;
bstride <<= calc_type;
args[num_cpu].m = width;
args[num_cpu].n = n;
args[num_cpu].k = k;
args[num_cpu].a = (void *)a;
args[num_cpu].b = (void *)b;
args[num_cpu].c = (void *)((char *)c + num_cpu * sizeof(double)*2);
args[num_cpu].lda = lda;
args[num_cpu].ldb = ldb;
args[num_cpu].ldc = ldc;
args[num_cpu].alpha = alpha;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = function;
queue[num_cpu].args = &args[num_cpu];
queue[num_cpu].next = &queue[num_cpu + 1];
a = (void *)((BLASULONG)a + astride);
b = (void *)((BLASULONG)b + bstride);
num_cpu ++;
}
if (num_cpu) {
queue[num_cpu - 1].next = NULL;
exec_blas(num_cpu, queue);
}
return 0;
}

View File

@@ -70,7 +70,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*********************************************************************/
#include "common.h"
#if defined(OS_LINUX) || defined(OS_NETBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_SUNOS) || defined(OS_FREEBSD)
#if defined(OS_LINUX) || defined(OS_NETBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_SUNOS)
#include <dlfcn.h>
#include <signal.h>
#include <sys/resource.h>
@@ -276,9 +276,6 @@ static void* blas_thread_server(void *arg){
unsigned int last_tick;
void *buffer, *sa, *sb;
blas_queue_t *queue;
blas_queue_t *tscq;
#ifdef TIMING_DEBUG
unsigned long start, stop;
#endif
@@ -312,11 +309,8 @@ blas_queue_t *tscq;
last_tick = (unsigned int)rpcc();
pthread_mutex_lock (&thread_status[cpu].lock);
tscq=thread_status[cpu].queue;
pthread_mutex_unlock (&thread_status[cpu].lock);
while (!thread_status[cpu].queue) {
while(!tscq) {
YIELDING;
if ((unsigned int)rpcc() - last_tick > thread_timeout) {
@@ -339,9 +333,6 @@ blas_queue_t *tscq;
last_tick = (unsigned int)rpcc();
}
pthread_mutex_lock (&thread_status[cpu].lock);
tscq=thread_status[cpu].queue;
pthread_mutex_unlock (&thread_status[cpu].lock);
}
@@ -360,9 +351,7 @@ blas_queue_t *tscq;
if (queue) {
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
pthread_mutex_lock (&thread_status[cpu].lock);
thread_status[cpu].queue = (blas_queue_t *)1;
pthread_mutex_unlock (&thread_status[cpu].lock);
sa = queue -> sa;
sb = queue -> sb;
@@ -444,10 +433,7 @@ blas_queue_t *tscq;
// thread is marked as done and other threads use them
WMB;
pthread_mutex_lock (&thread_status[cpu].lock);
thread_status[cpu].queue = (blas_queue_t * volatile) ((long)thread_status[cpu].queue & 0); /* Need a trick */
pthread_mutex_unlock (&thread_status[cpu].lock);
WMB;
}
@@ -627,7 +613,6 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
#endif
BLASLONG i = 0;
blas_queue_t *current = queue;
blas_queue_t *tsiq,*tspq;
#if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST)
int node = get_node();
int nodes = get_num_nodes();
@@ -675,23 +660,15 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
}
}
#else
pthread_mutex_lock (&thread_status[i].lock);
tsiq=thread_status[i].queue ;
pthread_mutex_unlock (&thread_status[i].lock);
while(tsiq) {
while(thread_status[i].queue) {
i ++;
if (i >= blas_num_threads - 1) i = 0;
pthread_mutex_lock (&thread_status[i].lock);
tsiq=thread_status[i].queue ;
pthread_mutex_unlock (&thread_status[i].lock);
}
#endif
queue -> assigned = i;
WMB;
pthread_mutex_lock (&thread_status[i].lock);
thread_status[i].queue = queue;
pthread_mutex_unlock (&thread_status[i].lock);
WMB;
queue = queue -> next;
@@ -712,15 +689,11 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
pos = current -> assigned;
pthread_mutex_lock (&thread_status[pos].lock);
tspq=thread_status[pos].queue;
pthread_mutex_unlock (&thread_status[pos].lock);
if ((BLASULONG)tspq > 1) {
pthread_mutex_lock (&thread_status[pos].lock);
if ((BLASULONG)thread_status[pos].queue > 1) {
if (thread_status[pos].status == THREAD_STATUS_SLEEP) {
pthread_mutex_lock (&thread_status[pos].lock);
#ifdef MONITOR
num_suspend ++;
@@ -730,9 +703,8 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
thread_status[pos].status = THREAD_STATUS_WAKEUP;
pthread_cond_signal(&thread_status[pos].wakeup);
}
}
pthread_mutex_unlock(&thread_status[pos].lock);
}
}
current = current -> next;
@@ -742,22 +714,11 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
}
int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
blas_queue_t * tsqq;
while ((num > 0) && queue) {
pthread_mutex_lock(&thread_status[queue->assigned].lock);
tsqq=thread_status[queue -> assigned].queue;
pthread_mutex_unlock(&thread_status[queue->assigned].lock);
while(tsqq) {
while(thread_status[queue -> assigned].queue) {
YIELDING;
pthread_mutex_lock(&thread_status[queue->assigned].lock);
tsqq=thread_status[queue -> assigned].queue;
pthread_mutex_unlock(&thread_status[queue->assigned].lock);
};
queue = queue -> next;

View File

@@ -443,11 +443,8 @@ int BLASFUNC(blas_thread_shutdown)(void){
SetEvent(pool.killed);
for(i = 0; i < blas_num_threads - 1; i++){
WaitForSingleObject(blas_threads[i], 5); //INFINITE);
#ifndef OS_WINDOWSSTORE
// TerminateThread is only available with WINAPI_DESKTOP and WINAPI_SYSTEM not WINAPI_APP in UWP
TerminateThread(blas_threads[i],0);
#endif
WaitForSingleObject(blas_threads[i], 5); //INFINITE);
TerminateThread(blas_threads[i],0);
}
blas_server_avail = 0;

View File

@@ -70,10 +70,8 @@ extern gotoblas_t gotoblas_STEAMROLLER;
extern gotoblas_t gotoblas_EXCAVATOR;
#ifdef NO_AVX2
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
#define gotoblas_ZEN gotoblas_SANDYBRIDGE
#else
extern gotoblas_t gotoblas_HASWELL;
extern gotoblas_t gotoblas_ZEN;
#endif
#else
//Use NEHALEM kernels for sandy bridge
@@ -83,7 +81,6 @@ extern gotoblas_t gotoblas_ZEN;
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
#define gotoblas_STEAMROLLER gotoblas_BARCELONA
#define gotoblas_EXCAVATOR gotoblas_BARCELONA
#define gotoblas_ZEN gotoblas_BARCELONA
#endif
@@ -235,7 +232,6 @@ static gotoblas_t *get_coretype(void){
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
if (model == 7) return &gotoblas_ATOM; //Bay Trail
return NULL;
case 4:
//Intel Haswell
@@ -265,8 +261,9 @@ static gotoblas_t *get_coretype(void){
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
//Intel Braswell / Avoton
if (model == 12 || model == 13) {
//Intel Avoton
if (model == 13) {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM;
}
return NULL;
@@ -289,30 +286,6 @@ static gotoblas_t *get_coretype(void){
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
//Intel Phi Knights Landing
if (model == 7) {
if(support_avx())
return &gotoblas_HASWELL;
else{
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
//Apollo Lake
if (model == 12) {
return &gotoblas_NEHALEM;
}
return NULL;
case 9:
case 8:
if (model == 14 ) { // Kaby Lake
if(support_avx())
return &gotoblas_HASWELL;
else{
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
return NULL;
}
case 0xf:
@@ -358,14 +331,7 @@ static gotoblas_t *get_coretype(void){
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}
}else if(model == 5){
if(support_avx())
return &gotoblas_EXCAVATOR;
else{
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}
}else if(model == 0 || model == 8){
}else if(model == 0){
if (exmodel == 1) {
//AMD Trinity
if(support_avx())
@@ -392,16 +358,9 @@ static gotoblas_t *get_coretype(void){
}
}
} else if (exfamily == 8) {
if (model == 1) {
if(support_avx())
return &gotoblas_ZEN;
else{
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}
}
}else {
} else {
return &gotoblas_BARCELONA;
}
}
@@ -411,6 +370,7 @@ static gotoblas_t *get_coretype(void){
switch (family) {
case 0x6:
return &gotoblas_NANO;
break;
}
}
@@ -441,7 +401,6 @@ static char *corename[] = {
"Haswell",
"Steamroller",
"Excavator",
"Zen"
};
char *gotoblas_corename(void) {
@@ -468,7 +427,6 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_HASWELL) return corename[20];
if (gotoblas == &gotoblas_STEAMROLLER) return corename[21];
if (gotoblas == &gotoblas_EXCAVATOR) return corename[22];
if (gotoblas == &gotoblas_ZEN) return corename[23];
return corename[0];
}
@@ -481,7 +439,7 @@ static gotoblas_t *force_coretype(char *coretype){
char message[128];
//char mname[20];
for ( i=1 ; i <= 23; i++)
for ( i=1 ; i <= 21; i++)
{
if (!strncasecmp(coretype,corename[i],20))
{
@@ -499,7 +457,6 @@ static gotoblas_t *force_coretype(char *coretype){
switch (found)
{
case 23: return (&gotoblas_ZEN);
case 22: return (&gotoblas_EXCAVATOR);
case 21: return (&gotoblas_STEAMROLLER);
case 20: return (&gotoblas_HASWELL);

View File

@@ -354,24 +354,6 @@ static int numa_check(void) {
return common -> num_nodes;
}
#if defined(__GLIBC_PREREQ)
#if !__GLIBC_PREREQ(2, 6)
int sched_getcpu(void)
{
int cpu;
FILE *fp = NULL;
if ( (fp = fopen("/proc/self/stat", "r")) == NULL)
return -1;
if ( fscanf( fp, "%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%d", &cpu) != 1) {
fclose (fp);
return -1;
}
fclose (fp);
return(cpu);
}
#endif
#endif
static void numa_mapping(void) {
int node, cpu, core;
@@ -379,9 +361,6 @@ static void numa_mapping(void) {
unsigned long work, bit;
int count = 0;
int bitmask_idx = 0;
int current_cpu;
int current_node = 0;
int cpu_count = 0;
for (node = 0; node < common -> num_nodes; node ++) {
core = 0;
@@ -403,84 +382,33 @@ static void numa_mapping(void) {
fprintf(stderr, "CPU (%2d) : %08lx\n", cpu, common -> cpu_info[cpu]);
#endif
current_cpu = sched_getcpu();
for (cpu = 0; cpu < count; cpu++) {
if (READ_CPU(common -> cpu_info[cpu]) == current_cpu) {
current_node = READ_NODE(common -> cpu_info[cpu]);
break;
}
}
for (i = 0; i < MAX_BITMASK_LEN; i++)
cpu_count += popcount(common -> node_info[current_node][i] & common -> avail[i]);
h = 1;
/*
* If all the processes can be accommodated in the
* in the current node itself, then bind to cores
* from the current node only
*/
if (numprocs <= cpu_count) {
/*
* First sort all the cores in order from the current node.
* Then take remaining nodes one by one in order,
* and sort their cores in order.
*/
for (i = 0; i < count; i++) {
for (j = 0; j < count - 1; j++) {
int node_1, node_2;
int core_1, core_2;
int swap = 0;
while (h < count) h = 2 * h + 1;
node_1 = READ_NODE(common -> cpu_info[j]);
node_2 = READ_NODE(common -> cpu_info[j + 1]);
core_1 = READ_CORE(common -> cpu_info[j]);
core_2 = READ_CORE(common -> cpu_info[j + 1]);
if (node_1 == node_2) {
if (core_1 > core_2)
swap = 1;
} else {
if ((node_2 == current_node) ||
((node_1 != current_node) && (node_1 > node_2)))
swap = 1;
}
if (swap) {
unsigned long temp;
temp = common->cpu_info[j];
common->cpu_info[j] = common->cpu_info[j + 1];
common->cpu_info[j + 1] = temp;
}
while (h > 1) {
h /= 2;
for (i = h; i < count; i++) {
work = common -> cpu_info[i];
bit = CPU_ISSET(i, &cpu_orig_mask[0]);
j = i - h;
while (work < common -> cpu_info[j]) {
common -> cpu_info[j + h] = common -> cpu_info[j];
if (CPU_ISSET(j, &cpu_orig_mask[0])) {
CPU_SET(j + h, &cpu_orig_mask[0]);
} else {
CPU_CLR(j + h, &cpu_orig_mask[0]);
}
j -= h;
if (j < 0) break;
}
}
} else {
h = 1;
while (h < count) h = 2 * h + 1;
while (h > 1) {
h /= 2;
for (i = h; i < count; i++) {
work = common -> cpu_info[i];
bit = CPU_ISSET(i, &cpu_orig_mask[0]);
j = i - h;
while (work < common -> cpu_info[j]) {
common -> cpu_info[j + h] = common -> cpu_info[j];
if (CPU_ISSET(j, &cpu_orig_mask[0])) {
CPU_SET(j + h, &cpu_orig_mask[0]);
} else {
CPU_CLR(j + h, &cpu_orig_mask[0]);
}
j -= h;
if (j < 0) break;
}
common -> cpu_info[j + h] = work;
if (bit) {
CPU_SET(j + h, &cpu_orig_mask[0]);
} else {
CPU_CLR(j + h, &cpu_orig_mask[0]);
}
common -> cpu_info[j + h] = work;
if (bit) {
CPU_SET(j + h, &cpu_orig_mask[0]);
} else {
CPU_CLR(j + h, &cpu_orig_mask[0]);
}
}
}
@@ -488,10 +416,7 @@ static void numa_mapping(void) {
fprintf(stderr, "\nSorting ...\n\n");
for (cpu = 0; cpu < count; cpu++)
fprintf(stderr, "CPUINFO (%2d) : %08lx (CPU=%3lu CORE=%3lu NODE=%3lu)\n", cpu, common -> cpu_info[cpu],
READ_CPU(common -> cpu_info[cpu]),
READ_CORE(common -> cpu_info[cpu]),
READ_NODE(common -> cpu_info[cpu]));
fprintf(stderr, "CPU (%2d) : %08lx\n", cpu, common -> cpu_info[cpu]);
#endif
}
@@ -826,12 +751,13 @@ void gotoblas_affinity_init(void) {
common -> shmid = pshmid;
if (common -> magic != SH_MAGIC) {
#ifdef DEBUG
fprintf(stderr, "Shared Memory Initialization.\n");
#endif
//returns the number of processors which are currently online
common -> num_procs = sysconf(_SC_NPROCESSORS_CONF);;
common -> num_procs = sysconf(_SC_NPROCESSORS_ONLN);;
if(common -> num_procs > MAX_CPUS) {
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS);
@@ -847,7 +773,7 @@ void gotoblas_affinity_init(void) {
if (common -> num_nodes > 1) numa_mapping();
common -> final_num_procs = 0;
for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number.
for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number.
for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] = 0;
@@ -940,7 +866,7 @@ void gotoblas_set_affinity2(int threads) {};
void gotoblas_affinity_reschedule(void) {};
int get_num_procs(void) { return sysconf(_SC_NPROCESSORS_CONF); }
int get_num_procs(void) { return sysconf(_SC_NPROCESSORS_ONLN); }
int get_num_nodes(void) { return 1; }

View File

@@ -169,13 +169,13 @@ void goto_set_num_threads(int num_threads) {};
#else
#if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD)
#if defined(OS_LINUX) || defined(OS_SUNOS)
#ifndef NO_AFFINITY
int get_num_procs(void);
#else
int get_num_procs(void) {
static int nums = 0;
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
if (!nums) nums = sysconf(_SC_NPROCESSORS_ONLN);
return nums;
}
#endif
@@ -184,7 +184,7 @@ int get_num_procs(void) {
#ifdef OS_ANDROID
int get_num_procs(void) {
static int nums = 0;
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
if (!nums) nums = sysconf(_SC_NPROCESSORS_ONLN);
return nums;
}
#endif
@@ -381,16 +381,6 @@ static int release_pos = 0;
static int hot_alloc = 0;
#endif
/* Global lock for memory allocation */
#if defined(USE_PTHREAD_LOCK)
static pthread_mutex_t alloc_lock = PTHREAD_MUTEX_INITIALIZER;
#elif defined(USE_PTHREAD_SPINLOCK)
static pthread_spinlock_t alloc_lock = 0;
#else
static BLASULONG alloc_lock = 0UL;
#endif
#ifdef ALLOC_MMAP
static void alloc_mmap_free(struct release_t *release){
@@ -400,8 +390,6 @@ static void alloc_mmap_free(struct release_t *release){
}
}
#ifdef NO_WARMUP
static void *alloc_mmap(void *address){
@@ -418,11 +406,9 @@ static void *alloc_mmap(void *address){
}
if (map_address != (void *)-1) {
LOCK_COMMAND(&alloc_lock);
release_info[release_pos].address = map_address;
release_info[release_pos].func = alloc_mmap_free;
release_pos ++;
UNLOCK_COMMAND(&alloc_lock);
}
#ifdef OS_LINUX
@@ -564,14 +550,12 @@ static void *alloc_mmap(void *address){
#if defined(OS_LINUX) && !defined(NO_WARMUP)
}
#endif
LOCK_COMMAND(&alloc_lock);
if (map_address != (void *)-1) {
release_info[release_pos].address = map_address;
release_info[release_pos].func = alloc_mmap_free;
release_pos ++;
}
UNLOCK_COMMAND(&alloc_lock);
return map_address;
}
@@ -905,6 +889,15 @@ static void *alloc_hugetlbfile(void *address){
}
#endif
/* Global lock for memory allocation */
#if defined(USE_PTHREAD_LOCK)
static pthread_mutex_t alloc_lock = PTHREAD_MUTEX_INITIALIZER;
#elif defined(USE_PTHREAD_SPINLOCK)
static pthread_spinlock_t alloc_lock = 0;
#else
static BLASULONG alloc_lock = 0UL;
#endif
#ifdef SEEK_ADDRESS
static BLASULONG base_address = 0UL;
@@ -970,41 +963,45 @@ void *blas_memory_alloc(int procpos){
NULL,
};
void *(**func)(void *address);
LOCK_COMMAND(&alloc_lock);
if (!memory_initialized) {
LOCK_COMMAND(&alloc_lock);
if (!memory_initialized) {
#if defined(WHEREAMI) && !defined(USE_OPENMP)
for (position = 0; position < NUM_BUFFERS; position ++){
memory[position].addr = (void *)0;
memory[position].pos = -1;
memory[position].used = 0;
memory[position].lock = 0;
}
for (position = 0; position < NUM_BUFFERS; position ++){
memory[position].addr = (void *)0;
memory[position].pos = -1;
memory[position].used = 0;
memory[position].lock = 0;
}
#endif
#ifdef DYNAMIC_ARCH
gotoblas_dynamic_init();
gotoblas_dynamic_init();
#endif
#if defined(SMP) && defined(OS_LINUX) && !defined(NO_AFFINITY)
gotoblas_affinity_init();
gotoblas_affinity_init();
#endif
#ifdef SMP
if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number();
if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number();
#endif
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
#ifndef DYNAMIC_ARCH
blas_set_parameter();
blas_set_parameter();
#endif
#endif
memory_initialized = 1;
memory_initialized = 1;
}
UNLOCK_COMMAND(&alloc_lock);
}
UNLOCK_COMMAND(&alloc_lock);
#ifdef DEBUG
printf("Alloc Start ...\n");
@@ -1015,7 +1012,7 @@ void *blas_memory_alloc(int procpos){
mypos = WhereAmI();
position = mypos;
while (position >= NUM_BUFFERS) position >>= 1;
while (position > NUM_BUFFERS) position >>= 1;
do {
if (!memory[position].used && (memory[position].pos == mypos)) {
@@ -1037,14 +1034,14 @@ void *blas_memory_alloc(int procpos){
position = 0;
do {
/* if (!memory[position].used) { */
if (!memory[position].used) {
blas_lock(&memory[position].lock);
if (!memory[position].used) goto allocation;
blas_unlock(&memory[position].lock);
/* } */
}
position ++;
@@ -1106,9 +1103,7 @@ void *blas_memory_alloc(int procpos){
} while ((BLASLONG)map_address == -1);
LOCK_COMMAND(&alloc_lock);
memory[position].addr = map_address;
UNLOCK_COMMAND(&alloc_lock);
#ifdef DEBUG
printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position);
@@ -1162,10 +1157,9 @@ void blas_memory_free(void *free_area){
#endif
position = 0;
LOCK_COMMAND(&alloc_lock);
while ((position < NUM_BUFFERS) && (memory[position].addr != free_area))
position++;
while ((memory[position].addr != free_area)
&& (position < NUM_BUFFERS)) position++;
if (memory[position].addr != free_area) goto error;
@@ -1177,7 +1171,6 @@ void blas_memory_free(void *free_area){
WMB;
memory[position].used = 0;
UNLOCK_COMMAND(&alloc_lock);
#ifdef DEBUG
printf("Unmap Succeeded.\n\n");
@@ -1192,7 +1185,6 @@ void blas_memory_free(void *free_area){
for (position = 0; position < NUM_BUFFERS; position++)
printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used);
#endif
UNLOCK_COMMAND(&alloc_lock);
return;
}
@@ -1479,30 +1471,12 @@ static int on_process_term(void)
#else
#pragma comment(linker, "/INCLUDE:__tls_used")
#endif
#ifdef _WIN64
#pragma const_seg(".CRT$XLB")
#else
#pragma data_seg(push, old_seg)
#pragma data_seg(".CRT$XLB")
#endif
static void (APIENTRY *dll_callback)(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain;
#ifdef _WIN64
#pragma const_seg()
#else
#pragma data_seg()
#endif
#ifdef _WIN64
#pragma const_seg(".CRT$XTU")
#else
#pragma data_seg(".CRT$XTU")
#endif
static int(*p_process_term)(void) = on_process_term;
#ifdef _WIN64
#pragma const_seg()
#else
#pragma data_seg()
#endif
#pragma data_seg(pop, old_seg)
#endif
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))

View File

@@ -167,7 +167,7 @@ int get_L2_size(void){
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN)
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER)
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
@@ -251,7 +251,7 @@ int get_L2_size(void){
void blas_set_parameter(void){
int factor;
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN)
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER)
int size = 16;
#else
int size = get_L2_size();
@@ -497,13 +497,13 @@ void blas_set_parameter(void){
if (xgemm_p == 0) xgemm_p = 64;
#endif
sgemm_p = ((sgemm_p + SGEMM_UNROLL_M - 1)/SGEMM_UNROLL_M) * SGEMM_UNROLL_M;
dgemm_p = ((dgemm_p + DGEMM_UNROLL_M - 1)/DGEMM_UNROLL_M) * DGEMM_UNROLL_M;
cgemm_p = ((cgemm_p + CGEMM_UNROLL_M - 1)/CGEMM_UNROLL_M) * CGEMM_UNROLL_M;
zgemm_p = ((zgemm_p + ZGEMM_UNROLL_M - 1)/ZGEMM_UNROLL_M) * ZGEMM_UNROLL_M;
sgemm_p = (sgemm_p + SGEMM_UNROLL_M - 1) & ~(SGEMM_UNROLL_M - 1);
dgemm_p = (dgemm_p + DGEMM_UNROLL_M - 1) & ~(DGEMM_UNROLL_M - 1);
cgemm_p = (cgemm_p + CGEMM_UNROLL_M - 1) & ~(CGEMM_UNROLL_M - 1);
zgemm_p = (zgemm_p + ZGEMM_UNROLL_M - 1) & ~(ZGEMM_UNROLL_M - 1);
#ifdef QUAD_PRECISION
qgemm_p = ((qgemm_p + QGEMM_UNROLL_M - 1)/QGEMM_UNROLL_M) * QGEMM_UNROLL_M;
xgemm_p = ((xgemm_p + XGEMM_UNROLL_M - 1)/XGEMM_UNROLL_M) * XGEMM_UNROLL_M;
qgemm_p = (qgemm_p + QGEMM_UNROLL_M - 1) & ~(QGEMM_UNROLL_M - 1);
xgemm_p = (xgemm_p + XGEMM_UNROLL_M - 1) & ~(XGEMM_UNROLL_M - 1);
#endif
sgemm_r = (((BUFFER_SIZE - ((SGEMM_P * SGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SGEMM_Q * 4)) - 15) & ~15;
@@ -727,38 +727,3 @@ void blas_set_parameter(void){
}
#endif
#if defined(ARCH_ARM64)
#if defined(VULCAN) || defined(THUNDERX2T99)
unsigned long dgemm_prefetch_size_a;
unsigned long dgemm_prefetch_size_b;
unsigned long dgemm_prefetch_size_c;
#endif
void blas_set_parameter(void)
{
#if defined(VULCAN) || defined(THUNDERX2T99)
dgemm_p = 160;
dgemm_q = 128;
dgemm_r = 4096;
sgemm_p = 128;
sgemm_q = 352;
sgemm_r = 4096;
cgemm_p = 128;
cgemm_q = 224;
cgemm_r = 4096;
zgemm_p = 128;
zgemm_q = 112;
zgemm_r = 4096;
dgemm_prefetch_size_a = 3584;
dgemm_prefetch_size_b = 512;
dgemm_prefetch_size_c = 128;
#endif
}
#endif

View File

@@ -46,16 +46,10 @@
#define printf _cprintf
#endif
#ifdef INTERFACE64
#define MSGFMT " ** On entry to %6s parameter number %2ld had an illegal value\n"
#else
#define MSGFMT " ** On entry to %6s parameter number %2d had an illegal value\n"
#endif
#ifdef __ELF__
int __xerbla(char *message, blasint *info, blasint length){
printf(MSGFMT,
printf(" ** On entry to %6s parameter number %2d had an illegal value\n",
message, *info);
return 0;
@@ -67,7 +61,7 @@ int BLASFUNC(xerbla)(char *, blasint *, blasint) __attribute__ ((weak, alias ("_
int BLASFUNC(xerbla)(char *message, blasint *info, blasint length){
printf(MSGFMT,
printf(" ** On entry to %6s parameter number %2d had an illegal value\n",
message, *info);
return 0;

View File

@@ -110,24 +110,18 @@ $(LIBDYNNAME) : ../$(LIBNAME).osx.renamed osx.def
endif
ifeq ($(NOFORTRAN), $(filter $(NOFORTRAN),1 2))
#only build without Fortran
$(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(LIBDYNNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
$(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
else
$(FC) $(FFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(LIBDYNNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
$(FC) $(FFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
endif
dllinit.$(SUFFIX) : dllinit.c
$(CC) $(CFLAGS) -c -o $(@F) -s $<
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android))
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
so : ../$(LIBSONAME)
ifeq ($(OSNAME), Android)
INTERNALNAME = $(LIBPREFIX).so
else
INTERNALNAME = $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
ifeq (, $(SYMBOLPREFIX)$(SYMBOLSUFFIX))
../$(LIBSONAME) : ../$(LIBNAME) linktest.c
else
@@ -138,13 +132,13 @@ endif
ifneq ($(C_COMPILER), LSB)
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
-Wl,--whole-archive $< -Wl,--no-whole-archive \
-Wl,-soname,$(INTERNALNAME) $(EXTRALIB)
-Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB)
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
else
#for LSB
env LSBCC_SHAREDLIBS=gfortran $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
-Wl,--whole-archive $< -Wl,--no-whole-archive \
-Wl,-soname,$(INTERNALNAME) $(EXTRALIB)
-Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB)
$(FC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
endif
rm -f linktest

View File

@@ -1,61 +0,0 @@
#!/bin/bash
while read OBJ; do
if echo "$OBJ"|grep "_$" >/dev/null
then
[ "$OBJ" = "caxpyc_" ] && continue
[ "$OBJ" = "zaxpyc_" ] && continue
[ "$OBJ" = "blas_thread_shutdown_" ] && continue
O1=$(echo "$OBJ"|sed -e 's/_$//' )
if grep -w "$O1" exports/gensymbol >/dev/null
then
true
else
echo "$O1"
fi
continue
fi
if echo "$OBJ"|grep "^cblas" >/dev/null
then
if grep -w "$OBJ" exports/gensymbol >/dev/null
then
true
else
echo "$OBJ"
fi
continue
fi
if echo "$OBJ"|grep "^LAPACKE" >/dev/null
then
if grep -w "$OBJ" exports/gensymbol >/dev/null
then
true
else
echo "$OBJ"
fi
continue
fi
if echo "$OBJ"|grep "^lapack" >/dev/null
then
if grep -w "$OBJ" exports/gensymbol >/dev/null
then
true
else
echo "$OBJ"
fi
fi
done

File diff suppressed because it is too large Load Diff

35
f_check
View File

@@ -33,7 +33,6 @@ if ($compiler eq "") {
"ppuf77", "ppuf95", "ppuf90", "ppuxlf",
"pathf90", "pathf95",
"pgf95", "pgf90", "pgf77",
"flang",
"ifort");
OUTER:
@@ -79,13 +78,8 @@ if ($compiler eq "") {
$vendor = GFORTRAN;
$openmp = "-fopenmp";
} else {
if ($compiler =~ /flang/) {
$vendor = FLANG;
$openmp = "-fopenmp";
} else {
$vendor = G77;
$openmp = "";
}
$vendor = G77;
$openmp = "";
}
}
@@ -120,7 +114,7 @@ if ($compiler eq "") {
$openmp = "-mp";
}
if ($data =~ /IBM XL/) {
if ($data =~ /IBM/) {
$vendor = IBM;
$openmp = "-openmp";
}
@@ -203,12 +197,6 @@ if ($compiler eq "") {
$openmp = "-mp";
}
if ($compiler =~ /flang/) {
$vendor = FLANG;
$bu = "_";
$openmp = "-fopenmp";
}
if ($vendor eq "") {
$nofortran = 1;
$compiler = "gfortran";
@@ -235,12 +223,7 @@ if (!$?) {
}
#For gfortran MIPS
if ($?) {
$mips_data = `$compiler_bin -E -dM - < /dev/null`;
if ($mips_data =~ /_MIPS_ISA_MIPS64/) {
$link = `$compiler $openmp -mabi=n32 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
} else {
$link = `$compiler $openmp -mabi=32 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
}
$link = `$compiler $openmp -mabi=n32 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
}
$binary = "" if ($?);
}
@@ -295,12 +278,6 @@ if ($link ne "") {
$linker_L .= "-Wl,". $flags . " ";
}
if ($flags =~ /^\--exclude-libs/) {
$linker_L .= "-Wl,". $flags . " ";
$flags="";
}
if ($flags =~ /^\-rpath\@/) {
$flags =~ s/\@/\,/g;
if ($vendor eq "PGI") {
@@ -343,10 +320,6 @@ if ($vendor eq "INTEL"){
$linker_a .= "-lgfortran"
}
if ($vendor eq "FLANG"){
$linker_a .= "-lflang"
}
open(MAKEFILE, ">> $makefile") || die "Can't append $makefile";
open(CONFFILE, ">> $config" ) || die "Can't append $config";

View File

@@ -1,36 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char**argv) {
FILE *fp;
char line[100];
char line2[80];
char *s;
int i;
fprintf(stdout,"#ifndef OPENBLAS_CONFIG_H\n");
fprintf(stdout,"#define OPENBLAS_CONFIG_H\n");
fp=fopen(argv[1],"r");
do{
s=fgets(line,80,fp);
if (s== NULL) break;
memset(line2,0,80);
i=sscanf(line,"#define %70c",line2);
if (i!=0) {
fprintf(stdout,"#define OPENBLAS_%s",line2);
} else {
fprintf(stdout,"\n");
}
} while (1);
fclose(fp);
fprintf(stdout,"#define OPENBLAS_VERSION \"OpenBLAS %s\"\n", VERSION);
fp=fopen(argv[2],"r");
do{
s=fgets(line,100,fp);
if (s== NULL) break;
fprintf(stdout,"%s",line);
} while(1);
fclose(fp);
fprintf(stdout,"#endif /* OPENBLAS_CONFIG_H */\n");
exit(0);
}

131
getarch.c
View File

@@ -131,9 +131,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* #define FORCE_SICORTEX */
/* #define FORCE_LOONGSON3A */
/* #define FORCE_LOONGSON3B */
/* #define FORCE_I6400 */
/* #define FORCE_P6600 */
/* #define FORCE_P5600 */
/* #define FORCE_ITANIUM2 */
/* #define FORCE_SPARC */
/* #define FORCE_SPARCV7 */
@@ -473,25 +470,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "EXCAVATOR"
#endif
#if defined (FORCE_ZEN)
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
#define SUBARCHITECTURE "ZEN"
#define ARCHCONFIG "-DZEN " \
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL2_CODE_ASSOCIATIVE=8 " \
"-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
"-DL3_SIZE=16777216 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=8 " \
"-DITB_DEFAULT_ENTRIES=64 -DITB_SIZE=4096 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
"-DHAVE_AVX -DHAVE_FMA3 -DFMA3"
#define LIBNAME "zen"
#define CORENAME "ZEN"
#endif
#ifdef FORCE_SSE_GENERIC
#define FORCE
@@ -721,48 +699,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#endif
#ifdef FORCE_I6400
#define FORCE
#define ARCHITECTURE "MIPS"
#define SUBARCHITECTURE "I6400"
#define SUBDIRNAME "mips64"
#define ARCHCONFIG "-DI6400 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
#define LIBNAME "i6400"
#define CORENAME "I6400"
#else
#endif
#ifdef FORCE_P6600
#define FORCE
#define ARCHITECTURE "MIPS"
#define SUBARCHITECTURE "P6600"
#define SUBDIRNAME "mips64"
#define ARCHCONFIG "-DP6600 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
#define LIBNAME "p6600"
#define CORENAME "P6600"
#else
#endif
#ifdef FORCE_P5600
#define FORCE
#define ARCHITECTURE "MIPS"
#define SUBARCHITECTURE "P5600"
#define SUBDIRNAME "mips"
#define ARCHCONFIG "-DP5600 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
#define LIBNAME "p5600"
#define CORENAME "P5600"
#else
#endif
#ifdef FORCE_ITANIUM2
#define FORCE
#define ARCHITECTURE "IA64"
@@ -903,7 +839,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef FORCE_CORTEXA57
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "CORTEXA57"
#define SUBARCHITECTURE "ARMV8"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DCORTEXA57 " \
"-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \
@@ -916,54 +852,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#endif
#ifdef FORCE_VULCAN
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "VULCAN"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DVULCAN " \
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
"-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON"
#define LIBNAME "vulcan"
#define CORENAME "VULCAN"
#else
#endif
#ifdef FORCE_THUNDERX
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "THUNDERX"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DTHUNDERX " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
"-DL2_SIZE=16777216 -DL2_LINESIZE=128 -DL2_ASSOCIATIVE=16 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 "
#define LIBNAME "thunderx"
#define CORENAME "THUNDERX"
#else
#endif
#ifdef FORCE_THUNDERX2T99
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "THUNDERX2T99"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DTHUNDERX2T99 " \
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
"-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON"
#define LIBNAME "thunderx2t99"
#define CORENAME "THUNDERX2T99"
#else
#endif
#ifndef FORCE
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
@@ -974,12 +862,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define OPENBLAS_SUPPORTED
#endif
#if defined(__zarch__) || defined(__s390x__)
#define ZARCH
#include "cpuid_zarch.c"
#define OPENBLAS_SUPPORTED
#endif
#ifdef INTEL_AMD
#include "cpuid_x86.c"
#define OPENBLAS_SUPPORTED
@@ -1006,11 +888,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#ifdef __mips__
#ifdef __mips64
#include "cpuid_mips64.c"
#else
#include "cpuid_mips.c"
#endif
#define OPENBLAS_SUPPORTED
#endif
@@ -1044,7 +922,7 @@ static int get_num_cores(void) {
#if defined(linux) || defined(__sun__)
//returns the number of processors which are currently online
return sysconf(_SC_NPROCESSORS_CONF);
return sysconf(_SC_NPROCESSORS_ONLN);
#elif defined(OS_WINDOWS)
@@ -1079,7 +957,7 @@ int main(int argc, char *argv[]){
#ifdef FORCE
printf("CORE=%s\n", CORENAME);
#else
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH)
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
printf("CORE=%s\n", get_corename());
#endif
#endif
@@ -1171,7 +1049,6 @@ int main(int argc, char *argv[]){
p ++;
}
} else {
if (*p != '\n')
printf("%c", *p);
p ++;
}
@@ -1187,7 +1064,7 @@ int main(int argc, char *argv[]){
#ifdef FORCE
printf("#define CHAR_CORENAME \"%s\"\n", CORENAME);
#else
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH)
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
printf("#define CHAR_CORENAME \"%s\"\n", get_corename());
#endif
#endif

View File

@@ -1,5 +1,5 @@
include_directories(${PROJECT_SOURCE_DIR})
include_directories(${CMAKE_SOURCE_DIR})
set(BLAS1_SOURCES

View File

@@ -84,10 +84,10 @@ CBLAS1OBJS = \
CBLAS2OBJS = \
cgemv.$(SUFFIX) cgeru.$(SUFFIX) cgerc.$(SUFFIX) \
ctrsv.$(SUFFIX) ctrmv.$(SUFFIX) \
csyr2.$(SUFFIX) cgbmv.$(SUFFIX) \
csbmv.$(SUFFIX) \
cspr2.$(SUFFIX) \
ctrsv.$(SUFFIX) ctrmv.$(SUFFIX) csymv.$(SUFFIX) \
csyr.$(SUFFIX) csyr2.$(SUFFIX) cgbmv.$(SUFFIX) \
csbmv.$(SUFFIX) cspmv.$(SUFFIX) \
cspr.$(SUFFIX) cspr2.$(SUFFIX) \
ctbsv.$(SUFFIX) ctbmv.$(SUFFIX) \
ctpsv.$(SUFFIX) ctpmv.$(SUFFIX) \
chemv.$(SUFFIX) chbmv.$(SUFFIX) \
@@ -113,10 +113,10 @@ ZBLAS1OBJS = \
ZBLAS2OBJS = \
zgemv.$(SUFFIX) zgeru.$(SUFFIX) zgerc.$(SUFFIX) \
ztrsv.$(SUFFIX) ztrmv.$(SUFFIX) \
zsyr2.$(SUFFIX) zgbmv.$(SUFFIX) \
zsbmv.$(SUFFIX) \
zspr2.$(SUFFIX) \
ztrsv.$(SUFFIX) ztrmv.$(SUFFIX) zsymv.$(SUFFIX) \
zsyr.$(SUFFIX) zsyr2.$(SUFFIX) zgbmv.$(SUFFIX) \
zsbmv.$(SUFFIX) zspmv.$(SUFFIX) \
zspr.$(SUFFIX) zspr2.$(SUFFIX) \
ztbsv.$(SUFFIX) ztbmv.$(SUFFIX) \
ztpsv.$(SUFFIX) ztpmv.$(SUFFIX) \
zhemv.$(SUFFIX) zhbmv.$(SUFFIX) \
@@ -315,7 +315,7 @@ CCBLAS3OBJS = \
cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \
cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \
cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\
cblas_cgeadd.$(SUFFIX) cblas_xerbla.$(SUFFIX)
cblas_cgeadd.$(SUFFIX)
@@ -2137,5 +2137,3 @@ cblas_cgeadd.$(SUFFIX) cblas_cgeadd.$(PSUFFIX) : zgeadd.c
cblas_zgeadd.$(SUFFIX) cblas_zgeadd.$(PSUFFIX) : zgeadd.c
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
cblas_xerbla.$(SUFFIX) cblas_xerbla.$(PSUFFIX) : xerbla.c
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)

File diff suppressed because it is too large Load Diff

View File

@@ -42,14 +42,6 @@
#include "functable.h"
#endif
#if defined(THUNDERX2T99) || defined(VULCAN)
// Multithreaded swap gives performance benefits in ThunderX2T99
#else
// Disable multi-threading as it does not show any performance
// benefits. Keep the multi-threading code for the record.
#undef SMP
#endif
#ifndef CBLAS
void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){
@@ -85,6 +77,7 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
if (incy < 0) y -= (n - 1) * incy;
#ifdef SMP
//disable multi-thread when incx==0 or incy==0
//In that case, the threads would be dependent.
if (incx == 0 || incy == 0 || n < 2097152 * GEMM_MULTITHREAD_THRESHOLD / sizeof(FLOAT))

View File

@@ -1,22 +0,0 @@
#ifdef CBLAS
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include "common.h"
void CNAME(blasint p, char *rout, char *form, ...)
{
va_list args;
va_start(args, form);
if (p)
fprintf(stderr, "Parameter %d to routine %s was incorrect\n", p, rout);
vfprintf(stderr, form, args);
va_end(args);
exit(-1);
}
#endif

View File

@@ -160,10 +160,9 @@ OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasin
if (n <= 0) {
#ifdef FORCE_USE_STACK
OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
*result = zero;
// CREAL(*result) = 0.0;
// CIMAG(*result) = 0.0;
//*result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
CREAL(*result) = 0.0;
CIMAG(*result) = 0.0;
return;
#else
return zero;

View File

@@ -251,7 +251,7 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
}
}
free(b);
return;
}

Some files were not shown because too many files have changed in this diff Show More