Merge pull request #2101 from luzpaz/misc-typos
Misc. typo fixes in comments and documentation
This commit is contained in:
commit
a387a23518
|
@ -181,17 +181,17 @@ NO_AFFINITY = 1
|
||||||
# time out to improve performance. This number should be from 4 to 30
|
# time out to improve performance. This number should be from 4 to 30
|
||||||
# which corresponds to (1 << n) cycles. For example, if you set to 26,
|
# which corresponds to (1 << n) cycles. For example, if you set to 26,
|
||||||
# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz
|
# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz
|
||||||
# system). Also you can control this mumber by THREAD_TIMEOUT
|
# system). Also you can control this number by THREAD_TIMEOUT
|
||||||
# CCOMMON_OPT += -DTHREAD_TIMEOUT=26
|
# CCOMMON_OPT += -DTHREAD_TIMEOUT=26
|
||||||
|
|
||||||
# Using special device driver for mapping physically contigous memory
|
# Using special device driver for mapping physically contiguous memory
|
||||||
# to the user space. If bigphysarea is enabled, it will use it.
|
# to the user space. If bigphysarea is enabled, it will use it.
|
||||||
# DEVICEDRIVER_ALLOCATION = 1
|
# DEVICEDRIVER_ALLOCATION = 1
|
||||||
|
|
||||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
||||||
# CONSISTENT_FPCSR = 1
|
# CONSISTENT_FPCSR = 1
|
||||||
|
|
||||||
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
|
# If any gemm argument m, n or k is less or equal this threshold, gemm will be execute
|
||||||
# with single thread. (Actually in recent versions this is a factor proportional to the
|
# with single thread. (Actually in recent versions this is a factor proportional to the
|
||||||
# number of floating point operations necessary for the given problem size, no longer
|
# number of floating point operations necessary for the given problem size, no longer
|
||||||
# an individual dimension). You can use this setting to avoid the overhead of multi-
|
# an individual dimension). You can use this setting to avoid the overhead of multi-
|
||||||
|
|
|
@ -133,7 +133,7 @@ Please read `GotoBLAS_01Readme.txt`.
|
||||||
|
|
||||||
#### PPC/PPC64
|
#### PPC/PPC64
|
||||||
|
|
||||||
- **POWER8**: Optmized Level-3 BLAS and some Level-1, only with `USE_OPENMP=1`
|
- **POWER8**: Optimized Level-3 BLAS and some Level-1, only with `USE_OPENMP=1`
|
||||||
|
|
||||||
#### IBM zEnterprise System
|
#### IBM zEnterprise System
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# helper functions for the kernel CMakeLists.txt
|
# helper functions for the kernel CMakeLists.txt
|
||||||
|
|
||||||
|
|
||||||
# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file.
|
# Set the default filenames for L1 objects. Most of these will be overridden by the appropriate KERNEL file.
|
||||||
macro(SetDefaultL1)
|
macro(SetDefaultL1)
|
||||||
set(SAMAXKERNEL amax.S)
|
set(SAMAXKERNEL amax.S)
|
||||||
set(DAMAXKERNEL amax.S)
|
set(DAMAXKERNEL amax.S)
|
||||||
|
|
|
@ -283,7 +283,7 @@ endif ()
|
||||||
|
|
||||||
set(KERNELDIR "${PROJECT_SOURCE_DIR}/kernel/${ARCH}")
|
set(KERNELDIR "${PROJECT_SOURCE_DIR}/kernel/${ARCH}")
|
||||||
|
|
||||||
# TODO: nead to convert these Makefiles
|
# TODO: need to convert these Makefiles
|
||||||
# include ${PROJECT_SOURCE_DIR}/cmake/${ARCH}.cmake
|
# include ${PROJECT_SOURCE_DIR}/cmake/${ARCH}.cmake
|
||||||
|
|
||||||
if (${CORE} STREQUAL "PPC440")
|
if (${CORE} STREQUAL "PPC440")
|
||||||
|
|
|
@ -89,7 +89,7 @@ function(AllCombinations list_in absent_codes_in)
|
||||||
set(CODES_OUT ${CODES_OUT} PARENT_SCOPE)
|
set(CODES_OUT ${CODES_OUT} PARENT_SCOPE)
|
||||||
endfunction ()
|
endfunction ()
|
||||||
|
|
||||||
# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition
|
# generates object files for each of the sources, using the BLAS naming scheme to pass the function name as a preprocessor definition
|
||||||
# @param sources_in the source files to build from
|
# @param sources_in the source files to build from
|
||||||
# @param defines_in (optional) preprocessor definitions that will be applied to all objects
|
# @param defines_in (optional) preprocessor definitions that will be applied to all objects
|
||||||
# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended.
|
# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended.
|
||||||
|
|
|
@ -45,7 +45,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
* SIZE must be carefully chosen to be:
|
* SIZE must be carefully chosen to be:
|
||||||
* - as small as possible to maximize the number of stack allocation
|
* - as small as possible to maximize the number of stack allocation
|
||||||
* - large enough to support all architectures and kernel
|
* - large enough to support all architectures and kernel
|
||||||
* Chosing a too small SIZE will lead to a stack smashing.
|
* Choosing a SIZE too small will lead to a stack smashing.
|
||||||
*/
|
*/
|
||||||
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
|
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
|
||||||
/* make it volatile because some function (ex: dgemv_n.S) */ \
|
/* make it volatile because some function (ex: dgemv_n.S) */ \
|
||||||
|
|
|
@ -214,7 +214,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||||
//Enable some optimazation for barcelona.
|
//Enable some optimization for barcelona.
|
||||||
#define BARCELONA_OPTIMIZATION
|
#define BARCELONA_OPTIMIZATION
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -276,7 +276,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||||
#ifdef ASSEMBLER
|
#ifdef ASSEMBLER
|
||||||
|
|
||||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||||
//Enable some optimazation for barcelona.
|
//Enable some optimization for barcelona.
|
||||||
#define BARCELONA_OPTIMIZATION
|
#define BARCELONA_OPTIMIZATION
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -577,7 +577,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -653,7 +653,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -653,7 +653,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -577,7 +577,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -109,7 +109,7 @@ extern unsigned int openblas_thread_timeout();
|
||||||
/* equal to "OMP_NUM_THREADS - 1" and thread only wakes up when */
|
/* equal to "OMP_NUM_THREADS - 1" and thread only wakes up when */
|
||||||
/* jobs is queued. */
|
/* jobs is queued. */
|
||||||
|
|
||||||
/* We need this grobal for cheking if initialization is finished. */
|
/* We need this global for checking if initialization is finished. */
|
||||||
int blas_server_avail __attribute__((aligned(ATTRIBUTE_SIZE))) = 0;
|
int blas_server_avail __attribute__((aligned(ATTRIBUTE_SIZE))) = 0;
|
||||||
|
|
||||||
/* Local Variables */
|
/* Local Variables */
|
||||||
|
@ -150,8 +150,8 @@ static unsigned int thread_timeout = (1U << (THREAD_TIMEOUT));
|
||||||
|
|
||||||
#ifdef MONITOR
|
#ifdef MONITOR
|
||||||
|
|
||||||
/* Monitor is a function to see thread's status for every seconds. */
|
/* Monitor is a function to see thread's status for every second. */
|
||||||
/* Usually it turns off and it's for debugging. */
|
/* Usually it turns off and it's for debugging. */
|
||||||
|
|
||||||
static pthread_t monitor_thread;
|
static pthread_t monitor_thread;
|
||||||
static int main_status[MAX_CPU_NUMBER];
|
static int main_status[MAX_CPU_NUMBER];
|
||||||
|
|
|
@ -50,7 +50,7 @@
|
||||||
|
|
||||||
/* This is a thread implementation for Win32 lazy implementation */
|
/* This is a thread implementation for Win32 lazy implementation */
|
||||||
|
|
||||||
/* Thread server common infomation */
|
/* Thread server common information */
|
||||||
typedef struct{
|
typedef struct{
|
||||||
CRITICAL_SECTION lock;
|
CRITICAL_SECTION lock;
|
||||||
HANDLE filled;
|
HANDLE filled;
|
||||||
|
@ -61,7 +61,7 @@ typedef struct{
|
||||||
|
|
||||||
} blas_pool_t;
|
} blas_pool_t;
|
||||||
|
|
||||||
/* We need this global for cheking if initialization is finished. */
|
/* We need this global for checking if initialization is finished. */
|
||||||
int blas_server_avail = 0;
|
int blas_server_avail = 0;
|
||||||
|
|
||||||
/* Local Variables */
|
/* Local Variables */
|
||||||
|
|
|
@ -765,7 +765,7 @@ int gotoblas_set_affinity(int pos) {
|
||||||
|
|
||||||
int mynode = 1;
|
int mynode = 1;
|
||||||
|
|
||||||
/* if number of threads is larger than inital condition */
|
/* if number of threads is larger than initial condition */
|
||||||
if (pos < 0) {
|
if (pos < 0) {
|
||||||
sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
|
sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -2751,7 +2751,7 @@ void *blas_memory_alloc(int procpos){
|
||||||
|
|
||||||
#ifdef ALLOC_DEVICEDRIVER
|
#ifdef ALLOC_DEVICEDRIVER
|
||||||
if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) {
|
if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) {
|
||||||
fprintf(stderr, "OpenBLAS Warning ... Physically contigous allocation was failed.\n");
|
fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
2
f_check
2
f_check
|
@ -125,7 +125,7 @@ if ($compiler eq "") {
|
||||||
$openmp = "-openmp";
|
$openmp = "-openmp";
|
||||||
}
|
}
|
||||||
|
|
||||||
# for embeded underscore name, e.g. zho_ge, it may append 2 underscores.
|
# for embedded underscore name, e.g. zho_ge, it may append 2 underscores.
|
||||||
$data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`;
|
$data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`;
|
||||||
if ($data =~ / zho_ge__/) {
|
if ($data =~ / zho_ge__/) {
|
||||||
$need2bu = 1;
|
$need2bu = 1;
|
||||||
|
|
|
@ -24,7 +24,7 @@ set(BLAS1_MANGLED_SOURCES
|
||||||
axpby.c
|
axpby.c
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f
|
# TODO: USE_NETLIB_GEMV should switch gemv.c to netlib/*gemv.f
|
||||||
# these all have 'z' sources for complex versions
|
# these all have 'z' sources for complex versions
|
||||||
set(BLAS2_SOURCES
|
set(BLAS2_SOURCES
|
||||||
gemv.c ger.c
|
gemv.c ger.c
|
||||||
|
|
|
@ -91,7 +91,7 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
|
||||||
//disable multi-thread when incx==0 or incy==0
|
//disable multi-thread when incx==0 or incy==0
|
||||||
//In that case, the threads would be dependent.
|
//In that case, the threads would be dependent.
|
||||||
//
|
//
|
||||||
//Temporarily work-around the low performance issue with small imput size &
|
//Temporarily work-around the low performance issue with small input size &
|
||||||
//multithreads.
|
//multithreads.
|
||||||
if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
|
if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
|
||||||
nthreads = 1;
|
nthreads = 1;
|
||||||
|
|
|
@ -99,7 +99,7 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
|
||||||
//disable multi-thread when incx==0 or incy==0
|
//disable multi-thread when incx==0 or incy==0
|
||||||
//In that case, the threads would be dependent.
|
//In that case, the threads would be dependent.
|
||||||
//
|
//
|
||||||
//Temporarily work-around the low performance issue with small imput size &
|
//Temporarily work-around the low performance issue with small input size &
|
||||||
//multithreads.
|
//multithreads.
|
||||||
if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
|
if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
|
||||||
nthreads = 1;
|
nthreads = 1;
|
||||||
|
|
|
@ -576,7 +576,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -991,7 +991,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -946,7 +946,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -576,7 +576,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in New Issue