From 591865926b98922f09e4bc4f882172a7fb26c372 Mon Sep 17 00:00:00 2001 From: Rohit Goswami Date: Sat, 2 Mar 2024 18:04:43 +0000 Subject: [PATCH] BLD: Rework lapack-netlib completely --- lapack-netlib/BLAS/SRC/meson.build | 439 ++++++++++++++++++----------- lapack-netlib/INSTALL/meson.build | 27 ++ lapack-netlib/SRC/meson.build | 5 +- lapack-netlib/meson.build | 27 +- meson.build | 49 ++++ meson_options.txt | 31 ++ 6 files changed, 393 insertions(+), 185 deletions(-) create mode 100644 lapack-netlib/INSTALL/meson.build create mode 100644 meson.build create mode 100644 meson_options.txt diff --git a/lapack-netlib/BLAS/SRC/meson.build b/lapack-netlib/BLAS/SRC/meson.build index 855b20219..2e5fe2944 100644 --- a/lapack-netlib/BLAS/SRC/meson.build +++ b/lapack-netlib/BLAS/SRC/meson.build @@ -1,175 +1,292 @@ -SBLAS1 = files( - 'isamax.f', - 'sasum.f', - 'saxpy.f', - 'scopy.f', - 'sdot.f', - 'snrm2.f', - 'srot.f', - 'srotg.f', - 'sscal.f', - 'sswap.f', - 'sdsdot.f', - 'srotmg.f', - 'srotm.f', -) +# Conventions: +# _ implies that the variables are not meant to be used outside here +# Optionals are applied from the top-level meson_options.txt +# They are declared at the top +# +# Derived from the CMakeLists.txt +# +# Relevant groups and variables: +# +# _allblas -- Auxiliary routines for Level 2 and 3 BLAS +# +# Level 1 BLAS # +# +# _dblas1 -- Double precision real BLAS 1 routines +# _zblas1 -- Double precision complex BLAS 1 routines +# _zb1aux -- D.P. real BLAS 1 routines called by d.p. complex routines +# +# _sblas1 -- Single precision real BLAS routines +# _cblas1 -- Single precision complex BLAS routines +# _cb1aux -- Real BLAS routines called by complex routines +# +# Level 2 BLAS # +# +# _dblas2 -- Double precision real BLAS 2 routines +# _zblas2 -- Double precision complex BLAS 2 routines +# +# _sblas2 -- Single precision real BLAS2 routines +# _cblas2 -- Single precision complex BLAS2 routines +# +# Level 3 BLAS # +# +# _dblas3 -- Double precision real BLAS 3 routines +# _zblas3 -- Double precision complex BLAS 3 routines +# +# _sblas3 -- Single precision real BLAS3 routines +# _cblas3 -- Single precision complex BLAS3 routines -CBLAS1 = files( - 'scabs1.f', - 'scasum.f', - 'scnrm2.f', - 'icamax.f', - 'caxpy.f', - 'ccopy.f', - 'cdotc.f', - 'cdotu.f', - 'csscal.f', - 'crotg.f', - 'cscal.f', - 'cswap.f', - 'csrot.f', -) +prec = get_option('realkind') +build_single = get_option('build_single') +build_double = get_option('build_double') +build_complex = get_option('build_complex') +build_complex16 = get_option('build_complex16') -DBLAS1 = files( - 'idamax.f', - 'dasum.f', - 'daxpy.f', - 'dcopy.f', - 'ddot.f', - 'dnrm2.f', - 'drot.f', - 'drotg.f', - 'dscal.f', - 'dsdot.f', - 'dswap.f', - 'drotmg.f', - 'drotm.f', -) +# _allblas -- Auxiliary routines for Level 2 and 3 BLAS +_allblas = library('_allblas', + sources: [ 'lsame.f', 'xerbla.f', 'xerbla_array.f' ]) -ZBLAS1 = files( - 'dcabs1.f', - 'dzasum.f', - 'dznrm2.f', - 'izamax.f', - 'zaxpy.f', - 'zcopy.f', - 'zdotc.f', - 'zdotu.f', - 'zdscal.f', - 'zrotg.f', - 'zscal.f', - 'zswap.f', - 'zdrot.f', -) -CB1AUX = files('isamax.f', 'sasum.f', 'saxpy.f', 'scopy.f', 'snrm2.f', 'sscal.f') +# All other sources +_blas_netlib_srcs = [] -ZB1AUX = files('idamax.f', 'dasum.f', 'daxpy.f', 'dcopy.f', 'dnrm2.f', 'dscal.f') +# Level 1 BLAS +# _dblas1 -- Double precision real BLAS 1 routines +_dblas1 = [ + 'idamax.f', + 'dasum.f', + 'daxpy.f', + 'dcopy.f', + 'ddot.f', + 'dnrm2.f', + 'drot.f', + 'drotg.f', + 'dscal.f', + 'dsdot.f', + 'dswap.f', + 'drotmg.f', + 'drotm.f', +] -ALLBLAS = files('lsame.f', 'xerbla.f', 'xerbla_array.f') +# _zblas1 -- Double precision complex BLAS 1 routines +_zblas1 = [ + 'dcabs1.f', + 'dzasum.f', + 'dznrm2.f', + 'izamax.f', + 'zaxpy.f', + 'zcopy.f', + 'zdotc.f', + 'zdotu.f', + 'zdscal.f', + 'zrotg.f', + 'zscal.f', + 'zswap.f', + 'zdrot.f', +] -SBLAS2 = files( - 'sgemv.f', - 'sgbmv.f', - 'ssymv.f', - 'ssbmv.f', - 'sspmv.f', - 'strmv.f', - 'stbmv.f', - 'stpmv.f', - 'strsv.f', - 'stbsv.f', - 'stpsv.f', - 'sger.f', - 'ssyr.f', - 'sspr.f', - 'ssyr2.f', - 'sspr2.f', -) +# _zb1aux -- D.P. real BLAS routines called by d.p. complex routines +_zb1aux = [ + 'idamax.f', + 'dasum.f', + 'daxpy.f', + 'dcopy.f', + 'dnrm2.f', + 'dscal.f', +] -CBLAS2 = files( - 'cgemv.f', - 'cgbmv.f', - 'chemv.f', - 'chbmv.f', - 'chpmv.f', - 'ctrmv.f', - 'ctbmv.f', - 'ctpmv.f', - 'ctrsv.f', - 'ctbsv.f', - 'ctpsv.f', - 'cgerc.f', - 'cgeru.f', - 'cher.f', - 'chpr.f', - 'cher2.f', - 'chpr2.f', -) +# _sblas1 -- Single precision real BLAS routines +_sblas1 = [ + 'isamax.f', + 'sasum.f', + 'saxpy.f', + 'scopy.f', + 'sdot.f', + 'snrm2.f', + 'srot.f', + 'srotg.f', + 'sscal.f', + 'sswap.f', + 'sdsdot.f', + 'srotmg.f', + 'srotm.f', +] -DBLAS2 = files( - 'dgemv.f', - 'dgbmv.f', - 'dsymv.f', - 'dsbmv.f', - 'dspmv.f', - 'dtrmv.f', - 'dtbmv.f', - 'dtpmv.f', - 'dtrsv.f', - 'dtbsv.f', - 'dtpsv.f', - 'dger.f', - 'dsyr.f', - 'dspr.f', - 'dsyr2.f', - 'dspr2.f', -) +# _cblas1 -- Single precision complex BLAS routines +_cblas1 = [ + 'scabs1.f', + 'scasum.f', + 'scnrm2.f', + 'icamax.f', + 'caxpy.f', + 'ccopy.f', + 'cdotc.f', + 'cdotu.f', + 'csscal.f', + 'crotg.f', + 'cscal.f', + 'cswap.f', + 'csrot.f', +] -ZBLAS2 = files( - 'zgemv.f', - 'zgbmv.f', - 'zhemv.f', - 'zhbmv.f', - 'zhpmv.f', - 'ztrmv.f', - 'ztbmv.f', - 'ztpmv.f', - 'ztrsv.f', - 'ztbsv.f', - 'ztpsv.f', - 'zgerc.f', - 'zgeru.f', - 'zher.f', - 'zhpr.f', - 'zher2.f', - 'zhpr2.f', -) +# _cb1aux -- Real BLAS routines called by complex routines +_cb1aux = [ + 'isamax.f', + 'sasum.f', + 'saxpy.f', + 'scopy.f', + 'snrm2.f', + 'sscal.f' +] -SBLAS3 = files('sgemm.f', 'ssymm.f', 'ssyrk.f', 'ssyr2k.f', 'strmm.f', 'strsm.f') +# Level 2 BLAS +# _dblas2 -- Double precision real BLAS2 routines +_dblas2 = [ + 'dgemv.f', + 'dgbmv.f', + 'dsymv.f', + 'dsbmv.f', + 'dspmv.f', + 'dtrmv.f', + 'dtbmv.f', + 'dtpmv.f', + 'dtrsv.f', + 'dtbsv.f', + 'dtpsv.f', + 'dger.f', + 'dsyr.f', + 'dspr.f', + 'dsyr2.f', + 'dspr2.f', +] -CBLAS3 = files( - 'cgemm.f', - 'csymm.f', - 'csyrk.f', - 'csyr2k.f', - 'ctrmm.f', - 'ctrsm.f', - 'chemm.f', - 'cherk.f', - 'cher2k.f', -) +# _zblas2 -- Double precision complex BLAS2 routines +_zblas2 = [ + 'zgemv.f', + 'zgbmv.f', + 'zhemv.f', + 'zhbmv.f', + 'zhpmv.f', + 'ztrmv.f', + 'ztbmv.f', + 'ztpmv.f', + 'ztrsv.f', + 'ztbsv.f', + 'ztpsv.f', + 'zgerc.f', + 'zgeru.f', + 'zher.f', + 'zhpr.f', + 'zher2.f', + 'zhpr2.f', +] -DBLAS3 = files('dgemm.f', 'dsymm.f', 'dsyrk.f', 'dsyr2k.f', 'dtrmm.f', 'dtrsm.f') +# _sblas2 -- Single precision real BLAS2 routines +_sblas2 = [ + 'sgemv.f', + 'sgbmv.f', + 'ssymv.f', + 'ssbmv.f', + 'sspmv.f', + 'strmv.f', + 'stbmv.f', + 'stpmv.f', + 'strsv.f', + 'stbsv.f', + 'stpsv.f', + 'sger.f', + 'ssyr.f', + 'sspr.f', + 'ssyr2.f', + 'sspr2.f', +] -ZBLAS3 = files( - 'zgemm.f', - 'zsymm.f', - 'zsyrk.f', - 'zsyr2k.f', - 'ztrmm.f', - 'ztrsm.f', - 'zhemm.f', - 'zherk.f', - 'zher2k.f', -) +# _cblas2 -- Single precision complex BLAS2 routines +_cblas2 = [ + 'cgemv.f', + 'cgbmv.f', + 'chemv.f', + 'chbmv.f', + 'chpmv.f', + 'ctrmv.f', + 'ctbmv.f', + 'ctpmv.f', + 'ctrsv.f', + 'ctbsv.f', + 'ctpsv.f', + 'cgerc.f', + 'cgeru.f', + 'cher.f', + 'chpr.f', + 'cher2.f', + 'chpr2.f', +] + +# Level 3 BLAS + +# _dblas3 -- Double precision real BLAS3 routines +_dblas3 = [ + 'dgemm.f', + 'dsymm.f', + 'dsyrk.f', + 'dsyr2k.f', + 'dtrmm.f', + 'dtrsm.f', +] + +# _zblas3 -- Double precision complex BLAS3 routines +_zblas3 = [ + 'zgemm.f', + 'zsymm.f', + 'zsyrk.f', + 'zsyr2k.f', + 'ztrmm.f', + 'ztrsm.f', + 'zhemm.f', + 'zherk.f', + 'zher2k.f', +] + +# _sblas3 -- Single precision real BLAS3 routines +_sblas3 =[ + 'sgemm.f', + 'ssymm.f', + 'ssyrk.f', + 'ssyr2k.f', + 'strmm.f', + 'strsm.f', +] + +# _cblas3 -- Single precision complex BLAS3 routines +_cblas3 = [ + 'cgemm.f', + 'csymm.f', + 'csyrk.f', + 'csyr2k.f', + 'ctrmm.f', + 'ctrsm.f', + 'chemm.f', + 'cherk.f', + 'cher2k.f', +] + +# Start making the blas target +if build_complex + _blas_netlib_srcs += _cblas1 + _cb1aux + _cblas2 + _cblas3 +endif +if build_complex16 + _blas_netlib_srcs += _zblas1 + _zb1aux + _zblas2 + _zblas3 +endif + +if prec == 'd' or build_double + _blas_netlib_srcs += _dblas1 + _dblas2 + _dblas3 +elif prec == 's' or build_single + _blas_netlib_srcs += _sblas1 + _sblas2 + _sblas3 +endif + +# Create the blas library +blas = library('blas', + sources: _blas_netlib_srcs, + link_with: _allblas, + version: lapack_version, + soversion: lapack_major_version, + install: true) diff --git a/lapack-netlib/INSTALL/meson.build b/lapack-netlib/INSTALL/meson.build new file mode 100644 index 000000000..a2595a018 --- /dev/null +++ b/lapack-netlib/INSTALL/meson.build @@ -0,0 +1,27 @@ +prec = get_option('realkind') +if prec == 'd' + if not get_option('use_c_lapack') + LINSTALL = files( + 'droundup_lwork.f', + 'lsame.f', + ) + else + LINSTALL = files( + 'dlamch.c', + 'lsame.c', + ) + endif +elif prec == 's' + if not get_option('use_c_lapack') + LINSTALL = files( + 'slamch.f', + 'sroundup_lwork.f', + 'lsame.f', + ) + else + LINSTALL = files( + 'slamch.c', + 'lsame.c', + ) + endif +endif diff --git a/lapack-netlib/SRC/meson.build b/lapack-netlib/SRC/meson.build index c7d0e661e..ca0cd4ae8 100644 --- a/lapack-netlib/SRC/meson.build +++ b/lapack-netlib/SRC/meson.build @@ -13,7 +13,6 @@ ALLAUX = files( 'iladiag.f', 'chla_transtype.f', '../INSTALL/ilaver.f', - '../INSTALL/lsame.f', '../INSTALL/slamch.f', ) @@ -171,7 +170,6 @@ DZLAUX = files( 'disnan.f', 'dlartgp.f', 'dlartgs.f', - '../INSTALL/dlamch.f', '../INSTALL/dsecnd_INT_CPU_TIME.f', ) @@ -533,6 +531,7 @@ SLASRC = files( 'sgelqt3.f', 'sgemlqt.f', 'sgetsls.f', + 'sgetrf.f', 'sgeqr.f', 'slatsqr.f', 'slamtsqr.f', @@ -560,4 +559,4 @@ SLASRC = files( 'scombssq.f', ) -DSLASRC = files('spotrs.f', 'sgetrs.f', 'spotrf.f', 'sgetrf.f') +DSLASRC = files('spotrs.f', 'sgetrs.f', 'spotrf.f') diff --git a/lapack-netlib/meson.build b/lapack-netlib/meson.build index 2cf00bdf4..1796d11be 100644 --- a/lapack-netlib/meson.build +++ b/lapack-netlib/meson.build @@ -2,26 +2,11 @@ # meson compile -C build # meson install --prefix=$HOME/.local/lapack -project('LAPACK', 'fortran', - default_options : ['default_library=static', 'libdir=lib/'], - version : '3.8.0') +lapack_major_version = 3 # soversion +lapack_minor_version = 12 +lapack_patch_version = 0 +lapack_version = f'@lapack_major_version@.@lapack_minor_version@.@lapack_patch_version@' -subdir('BLAS/SRC') +subdir('BLAS/SRC') # Defines blas subdir('SRC') - -prec = get_option('realkind') - - -if prec == 'd' - bsrc = DBLAS1 + DBLAS2 + DBLAS3 - lsrc = DZLAUX + DSLASRC -elif prec == 's' - bsrc = SBLAS1 + SBLAS2 + SBLAS3 - lsrc = SCLAUX + SLASRC -endif - -blas = library('blas', bsrc, - install : true) - -lapack = library('lapack', lsrc, ALLAUX, - install : true) +subdir('INSTALL') diff --git a/meson.build b/meson.build new file mode 100644 index 000000000..f12622df3 --- /dev/null +++ b/meson.build @@ -0,0 +1,49 @@ +project('OpenBLAS', ['c', 'fortran'], default_options: ['c_std=c99']) + +# Skip the check for valid CC +cc = meson.get_compiler('c') + +# Conditional? +add_languages('fortran', native: false) +ff = meson.get_compiler('fortran') +if ff.has_argument('-Wno-conversion') + add_project_arguments('-Wno-conversion', language: 'fortran') +endif + +# Makefile.system +# Ignoring all the hostarch checks and conflits for arch in BSD for now +subdir('lapack-netlib') + +# System configuration +build_single = get_option('build_single') +build_double = get_option('build_double') +build_complex = get_option('build_complex') +build_complex16 = get_option('build_complex16') + +# Options from CMakelists +build_without_lapack = get_option('build_without_lapack') +build_lapack_deprecated = get_option('build_lapack_deprecated') +build_testing = get_option('build_testing') +use_c_lapack = get_option('use_c_lapack') +build_without_cblas = get_option('build_without_cblas') +dynamic_arch = get_option('dynamic_arch') +dynamic_older = get_option('dynamic_older') +build_relapack = get_option('build_relapack') +use_locking = get_option('use_locking') +use_perl = get_option('use_perl') +no_warmup = get_option('no_warmup') +no_affinity = get_option('no_affinity') +build_cpp_thread_safety_test = get_option('build_cpp_thread_safety_test') +build_cpp_thread_safety_gemv = get_option('build_cpp_thread_safety_gemv') +build_static_libs = get_option('build_static_libs') + +if host_machine.system() == 'linux' + no_affinity = true +else + no_affinity = false +endif + +# Example for handling options: +if build_without_lapack + # configure build to exclude LAPACK and LAPACKE +endif diff --git a/meson_options.txt b/meson_options.txt new file mode 100644 index 000000000..bf6dc8783 --- /dev/null +++ b/meson_options.txt @@ -0,0 +1,31 @@ +# From CMakeLists +option('build_without_lapack', type: 'boolean', value: false, description: 'Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)') +option('build_lapack_deprecated', type: 'boolean', value: true, description: 'When building LAPACK, include also some older, deprecated routines') +option('build_testing', type: 'boolean', value: true, description: 'Build LAPACK testsuite when building LAPACK') +option('use_c_lapack', type: 'boolean', value: false, description: 'Build LAPACK from C sources instead of the original Fortran') +option('build_without_cblas', type: 'boolean', value: false, description: 'Do not build the C interface (CBLAS) to the BLAS functions') +option('use_locking', type: 'boolean', value: false, description: 'Use locks even in single-threaded builds to make them callable from multiple threads') +option('use_perl', type: 'boolean', value: false, description: 'Use the older PERL scripts for build preparation instead of universal shell scripts') +option('no_warmup', type: 'boolean', value: true, description: 'Do not run a benchmark on each startup just to find the best location for the memory buffer') +option('no_affinity', type: 'boolean', value: true, description: 'Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core') +option('build_cpp_thread_safety_test', type: 'boolean', value: false, description: 'Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)') +option('build_cpp_thread_safety_gemv', type: 'boolean', value: false, description: 'Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)') +option('build_static_libs', type: 'boolean', value: false, description: 'Build static library') + +# From Makefile +option('dynamic_arch', type: 'boolean', value: false, description: 'Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64, aarch64 or ppc only)') +option('dynamic_older', type: 'boolean', value: false, description: 'Include specific support for older x86 cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH') +option('no_lapack', type: 'boolean', value: false, description: 'Disable LAPACK') +option('build_relapack', type: 'boolean', value: false, description: 'Build with ReLAPACK (recursive implementation of several LAPACK functions on top of standard LAPACK)') +option('no_fortran', type: 'boolean', value: false, description: 'Disable Fortran compiler usage') + +# From Makefile.system +option('relapack_replace', type: 'boolean', value: false, description: 'Replace RELAPACK functions') +option('build_single', type: 'boolean', value: true, description: 'Build single precision') +option('build_double', type: 'boolean', value: true, description: 'Build double precision') +option('build_complex', type: 'boolean', value: true, description: 'Build complex precision') +option('build_complex16', type: 'boolean', value: true, description: 'Build double complex precision') + +# From lapack-netlib +option('realkind', type : 'string', value : 'd', + description : 's: real32 d: real64 c: complex32 z: complex64')