BENCH: add benchmarks using codspeed.io
This commit is contained in:
parent
f034745ce6
commit
9f28161837
|
@ -0,0 +1,150 @@
|
||||||
|
name: Run codspeed benchmarks
|
||||||
|
|
||||||
|
on: [push, pull_request]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read # to fetch code (actions/checkout)
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
benchmarks:
|
||||||
|
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest]
|
||||||
|
fortran: [gfortran]
|
||||||
|
build: [make]
|
||||||
|
pyver: ["3.12"]
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- uses: actions/setup-python@v3
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.pyver }}
|
||||||
|
|
||||||
|
- name: Print system information
|
||||||
|
run: |
|
||||||
|
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||||
|
cat /proc/cpuinfo
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Install Dependencies
|
||||||
|
run: |
|
||||||
|
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y gfortran cmake ccache libtinfo5
|
||||||
|
else
|
||||||
|
echo "::error::$RUNNER_OS not supported"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Compilation cache
|
||||||
|
uses: actions/cache@v3
|
||||||
|
with:
|
||||||
|
path: ~/.ccache
|
||||||
|
# We include the commit sha in the cache key, as new cache entries are
|
||||||
|
# only created if there is no existing entry for the key yet.
|
||||||
|
# GNU make and cmake call the compilers differently. It looks like
|
||||||
|
# that causes the cache to mismatch. Keep the ccache for both build
|
||||||
|
# tools separate to avoid polluting each other.
|
||||||
|
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
|
||||||
|
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
|
||||||
|
restore-keys: |
|
||||||
|
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
|
||||||
|
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
|
||||||
|
ccache-${{ runner.os }}-${{ matrix.build }}
|
||||||
|
|
||||||
|
- name: Write out the .pc
|
||||||
|
run: |
|
||||||
|
cd benchmark/pybench
|
||||||
|
cat > openblas.pc << EOF
|
||||||
|
libdir=${{ github.workspace }}
|
||||||
|
includedir= ${{ github.workspace }}
|
||||||
|
openblas_config= OpenBLAS 0.3.27 DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=64
|
||||||
|
version=0.0.99
|
||||||
|
extralib=-lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas
|
||||||
|
Name: openblas
|
||||||
|
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
|
||||||
|
Version: ${version}
|
||||||
|
URL: https://github.com/xianyi/OpenBLAS
|
||||||
|
Libs: ${{ github.workspace }}/libopenblas.so -Wl,-rpath,${{ github.workspace }}
|
||||||
|
Libs.private: -lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas
|
||||||
|
Cflags: -I${{ github.workspace}}
|
||||||
|
EOF
|
||||||
|
cat openblas.pc
|
||||||
|
|
||||||
|
- name: Configure ccache
|
||||||
|
run: |
|
||||||
|
if [ "${{ matrix.build }}" = "make" ]; then
|
||||||
|
# Add ccache to path
|
||||||
|
if [ "$RUNNER_OS" = "Linux" ]; then
|
||||||
|
echo "/usr/lib/ccache" >> $GITHUB_PATH
|
||||||
|
elif [ "$RUNNER_OS" = "macOS" ]; then
|
||||||
|
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
|
||||||
|
else
|
||||||
|
echo "::error::$RUNNER_OS not supported"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
|
||||||
|
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||||
|
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||||
|
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||||
|
ccache -s
|
||||||
|
|
||||||
|
- name: Build OpenBLAS
|
||||||
|
run: |
|
||||||
|
case "${{ matrix.build }}" in
|
||||||
|
"make")
|
||||||
|
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
|
||||||
|
;;
|
||||||
|
"cmake")
|
||||||
|
mkdir build && cd build
|
||||||
|
cmake -DDYNAMIC_ARCH=1 \
|
||||||
|
-DNOFORTRAN=0 \
|
||||||
|
-DBUILD_WITHOUT_LAPACK=0 \
|
||||||
|
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
|
||||||
|
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||||
|
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
|
||||||
|
..
|
||||||
|
cmake --build .
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "::error::Configuration not supported"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
- name: Show ccache status
|
||||||
|
continue-on-error: true
|
||||||
|
run: ccache -s
|
||||||
|
|
||||||
|
- name: Install benchmark dependencies
|
||||||
|
run: pip install meson ninja numpy pytest pytest-codspeed --user
|
||||||
|
|
||||||
|
- name: Build the wrapper
|
||||||
|
run: |
|
||||||
|
cd benchmark/pybench
|
||||||
|
export PKG_CONFIG_PATH=$PWD
|
||||||
|
meson setup build --prefix=$PWD/build-install
|
||||||
|
meson install -C build
|
||||||
|
#
|
||||||
|
# sanity check
|
||||||
|
cd build/openblas_wrap
|
||||||
|
python -c'import _flapack; print(dir(_flapack))'
|
||||||
|
|
||||||
|
- name: Run benchmarks
|
||||||
|
uses: CodSpeedHQ/action@v2
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.CODSPEED_TOKEN }}
|
||||||
|
run: |
|
||||||
|
cd benchmark/pybench
|
||||||
|
export PYTHONPATH=$PWD/build-install/lib/python${{matrix.pyver}}/site-packages/
|
||||||
|
OPENBLAS_NUM_THREADS=1 pytest benchmarks/bench_blas.py --codspeed
|
||||||
|
|
|
@ -109,3 +109,4 @@ benchmark/smallscaling
|
||||||
CMakeCache.txt
|
CMakeCache.txt
|
||||||
CMakeFiles/*
|
CMakeFiles/*
|
||||||
.vscode
|
.vscode
|
||||||
|
**/__pycache__
|
||||||
|
|
|
@ -0,0 +1,185 @@
|
||||||
|
import pytest
|
||||||
|
import numpy as np
|
||||||
|
from openblas_wrap import (
|
||||||
|
# level 1
|
||||||
|
dnrm2, ddot, daxpy,
|
||||||
|
# level 3
|
||||||
|
dgemm, dsyrk,
|
||||||
|
# lapack
|
||||||
|
dgesv, # linalg.solve
|
||||||
|
dgesdd, dgesdd_lwork, # linalg.svd
|
||||||
|
dsyev, dsyev_lwork, # linalg.eigh
|
||||||
|
)
|
||||||
|
|
||||||
|
# ### BLAS level 1 ###
|
||||||
|
|
||||||
|
# dnrm2
|
||||||
|
|
||||||
|
dnrm2_sizes = [100, 1000]
|
||||||
|
|
||||||
|
def run_dnrm2(n, x, incx):
|
||||||
|
res = dnrm2(x, n, incx=incx)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('n', dnrm2_sizes)
|
||||||
|
def test_nrm2(benchmark, n):
|
||||||
|
rndm = np.random.RandomState(1234)
|
||||||
|
x = np.array(rndm.uniform(size=(n,)), dtype=float)
|
||||||
|
result = benchmark(run_dnrm2, n, x, 1)
|
||||||
|
|
||||||
|
|
||||||
|
# ddot
|
||||||
|
|
||||||
|
ddot_sizes = [100, 1000]
|
||||||
|
|
||||||
|
def run_ddot(x, y,):
|
||||||
|
res = ddot(x, y)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('n', ddot_sizes)
|
||||||
|
def test_dot(benchmark, n):
|
||||||
|
rndm = np.random.RandomState(1234)
|
||||||
|
x = np.array(rndm.uniform(size=(n,)), dtype=float)
|
||||||
|
y = np.array(rndm.uniform(size=(n,)), dtype=float)
|
||||||
|
result = benchmark(run_ddot, x, y)
|
||||||
|
|
||||||
|
|
||||||
|
# daxpy
|
||||||
|
|
||||||
|
daxpy_sizes = [100, 1000]
|
||||||
|
|
||||||
|
def run_daxpy(x, y,):
|
||||||
|
res = daxpy(x, y, a=2.0)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('n', daxpy_sizes)
|
||||||
|
def test_daxpy(benchmark, n):
|
||||||
|
rndm = np.random.RandomState(1234)
|
||||||
|
x = np.array(rndm.uniform(size=(n,)), dtype=float)
|
||||||
|
y = np.array(rndm.uniform(size=(n,)), dtype=float)
|
||||||
|
result = benchmark(run_daxpy, x, y)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ### BLAS level 3 ###
|
||||||
|
|
||||||
|
# dgemm
|
||||||
|
|
||||||
|
gemm_sizes = [100, 1000]
|
||||||
|
|
||||||
|
def run_gemm(a, b, c):
|
||||||
|
alpha = 1.0
|
||||||
|
res = dgemm(alpha, a, b, c=c, overwrite_c=True)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('n', gemm_sizes)
|
||||||
|
def test_gemm(benchmark, n):
|
||||||
|
rndm = np.random.RandomState(1234)
|
||||||
|
a = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F')
|
||||||
|
b = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F')
|
||||||
|
c = np.empty((n, n), dtype=float, order='F')
|
||||||
|
result = benchmark(run_gemm, a, b, c)
|
||||||
|
assert result is c
|
||||||
|
|
||||||
|
|
||||||
|
# dsyrk
|
||||||
|
|
||||||
|
syrk_sizes = [100, 1000]
|
||||||
|
|
||||||
|
|
||||||
|
def run_syrk(a, c):
|
||||||
|
res = dsyrk(1.0, a, c=c, overwrite_c=True)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('n', syrk_sizes)
|
||||||
|
def test_syrk(benchmark, n):
|
||||||
|
rndm = np.random.RandomState(1234)
|
||||||
|
a = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F')
|
||||||
|
c = np.empty((n, n), dtype=float, order='F')
|
||||||
|
result = benchmark(run_syrk, a, c)
|
||||||
|
assert result is c
|
||||||
|
|
||||||
|
|
||||||
|
# ### LAPACK ###
|
||||||
|
|
||||||
|
# linalg.solve
|
||||||
|
|
||||||
|
gesv_sizes = [100, 1000]
|
||||||
|
|
||||||
|
|
||||||
|
def run_gesv(a, b):
|
||||||
|
res = dgesv(a, b, overwrite_a=True, overwrite_b=True)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('n', gesv_sizes)
|
||||||
|
def test_gesv(benchmark, n):
|
||||||
|
rndm = np.random.RandomState(1234)
|
||||||
|
a = (np.array(rndm.uniform(size=(n, n)), dtype=float, order='F') +
|
||||||
|
np.eye(n, order='F'))
|
||||||
|
b = np.array(rndm.uniform(size=(n, 1)), order='F')
|
||||||
|
lu, piv, x, info = benchmark(run_gesv, a, b)
|
||||||
|
assert lu is a
|
||||||
|
assert x is b
|
||||||
|
assert info == 0
|
||||||
|
|
||||||
|
|
||||||
|
# linalg.svd
|
||||||
|
|
||||||
|
gesdd_sizes = [(100, 5), (1000, 222)]
|
||||||
|
|
||||||
|
|
||||||
|
def run_gesdd(a, lwork):
|
||||||
|
res = dgesdd(a, lwork=lwork, full_matrices=False, overwrite_a=False)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('mn', gesdd_sizes)
|
||||||
|
def test_gesdd(benchmark, mn):
|
||||||
|
m, n = mn
|
||||||
|
rndm = np.random.RandomState(1234)
|
||||||
|
a = np.array(rndm.uniform(size=(m, n)), dtype=float, order='F')
|
||||||
|
|
||||||
|
lwork, info = dgesdd_lwork(m, n)
|
||||||
|
lwork = int(lwork)
|
||||||
|
assert info == 0
|
||||||
|
|
||||||
|
u, s, vt, info = benchmark(run_gesdd, a, lwork)
|
||||||
|
|
||||||
|
assert info == 0
|
||||||
|
np.testing.assert_allclose(u @ np.diag(s) @ vt, a, atol=1e-13)
|
||||||
|
|
||||||
|
|
||||||
|
# linalg.eigh
|
||||||
|
|
||||||
|
syev_sizes = [50, 200]
|
||||||
|
|
||||||
|
|
||||||
|
def run_syev(a, lwork):
|
||||||
|
res = dsyev(a, lwork=lwork, overwrite_a=True)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('n', syev_sizes)
|
||||||
|
def test_syev(benchmark, n):
|
||||||
|
rndm = np.random.RandomState(1234)
|
||||||
|
a = rndm.uniform(size=(n, n))
|
||||||
|
a = np.asarray(a + a.T, dtype=float, order='F')
|
||||||
|
a_ = a.copy()
|
||||||
|
|
||||||
|
lwork, info = dsyev_lwork(n)
|
||||||
|
lwork = int(lwork)
|
||||||
|
assert info == 0
|
||||||
|
|
||||||
|
w, v, info = benchmark(run_syev, a, lwork)
|
||||||
|
|
||||||
|
assert info == 0
|
||||||
|
assert a is v # overwrite_a=True
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
#
|
||||||
|
# Taken from SciPy (of course)
|
||||||
|
#
|
||||||
|
project(
|
||||||
|
'openblas-wrap',
|
||||||
|
'c', 'fortran',
|
||||||
|
version: '0.1',
|
||||||
|
license: 'BSD-3',
|
||||||
|
meson_version: '>= 1.1.0',
|
||||||
|
default_options: [
|
||||||
|
'buildtype=debugoptimized',
|
||||||
|
'b_ndebug=if-release',
|
||||||
|
'c_std=c17',
|
||||||
|
'fortran_std=legacy',
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
py3 = import('python').find_installation(pure: false)
|
||||||
|
py3_dep = py3.dependency()
|
||||||
|
|
||||||
|
cc = meson.get_compiler('c')
|
||||||
|
|
||||||
|
_global_c_args = cc.get_supported_arguments(
|
||||||
|
'-Wno-unused-but-set-variable',
|
||||||
|
'-Wno-unused-function',
|
||||||
|
'-Wno-conversion',
|
||||||
|
'-Wno-misleading-indentation',
|
||||||
|
)
|
||||||
|
add_project_arguments(_global_c_args, language : 'c')
|
||||||
|
|
||||||
|
# We need -lm for all C code (assuming it uses math functions, which is safe to
|
||||||
|
# assume for SciPy). For C++ it isn't needed, because libstdc++/libc++ is
|
||||||
|
# guaranteed to depend on it. For Fortran code, Meson already adds `-lm`.
|
||||||
|
m_dep = cc.find_library('m', required : false)
|
||||||
|
if m_dep.found()
|
||||||
|
add_project_link_arguments('-lm', language : 'c')
|
||||||
|
endif
|
||||||
|
|
||||||
|
generate_f2pymod = find_program('openblas_wrap/generate_f2pymod.py')
|
||||||
|
|
||||||
|
openblas = dependency('openblas', method: 'pkg-config', required: true)
|
||||||
|
openblas_dep = declare_dependency(
|
||||||
|
dependencies: openblas,
|
||||||
|
compile_args: []
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
subdir('openblas_wrap')
|
|
@ -0,0 +1,28 @@
|
||||||
|
"""
|
||||||
|
Trampoline to hide the LAPACK details (scipy.lapack.linalg or scipy_openblas32 or...)
|
||||||
|
from benchmarking.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__version__ = "0.1"
|
||||||
|
|
||||||
|
|
||||||
|
#from scipy.linalg.blas import (
|
||||||
|
from ._flapack import (
|
||||||
|
# level 1
|
||||||
|
dnrm2 as dnrm2,
|
||||||
|
ddot as ddot,
|
||||||
|
daxpy as daxpy,
|
||||||
|
# level 3
|
||||||
|
dgemm as dgemm,
|
||||||
|
dsyrk as dsyrk,
|
||||||
|
)
|
||||||
|
|
||||||
|
#from scipy.linalg.lapack import (
|
||||||
|
from openblas_wrap._flapack import (
|
||||||
|
# linalg.solve
|
||||||
|
dgesv as dgesv,
|
||||||
|
# linalg.svd
|
||||||
|
dgesdd as dgesdd, dgesdd_lwork as dgesdd_lwork,
|
||||||
|
# linalg.eigh
|
||||||
|
dsyev as dsyev, dsyev_lwork as dsyev_lwork
|
||||||
|
)
|
|
@ -0,0 +1,326 @@
|
||||||
|
!
|
||||||
|
! Taken from scipy/linalg
|
||||||
|
!
|
||||||
|
! Shorthand notations
|
||||||
|
!
|
||||||
|
! <tchar=s,d,cs,zd>
|
||||||
|
! <tchar2c=cs,zd>
|
||||||
|
!
|
||||||
|
! <prefix2=s,d>
|
||||||
|
! <prefix2c=c,z>
|
||||||
|
! <prefix3=s,sc>
|
||||||
|
! <prefix4=d,dz>
|
||||||
|
! <prefix6=s,d,c,z,c,z>
|
||||||
|
!
|
||||||
|
! <ftype2=real,double precision>
|
||||||
|
! <ftype2c=complex,double complex>
|
||||||
|
! <ftype3=real,complex>
|
||||||
|
! <ftype4=double precision,double complex>
|
||||||
|
! <ftypereal3=real,real>
|
||||||
|
! <ftypereal4=double precision,double precision>
|
||||||
|
! <ftype6=real,double precision,complex,double complex,\2,\3>
|
||||||
|
! <ftype6creal=real,double precision,complex,double complex,\0,\1>
|
||||||
|
!
|
||||||
|
! <ctype2=float,double>
|
||||||
|
! <ctype2c=complex_float,complex_double>
|
||||||
|
! <ctype3=float,complex_float>
|
||||||
|
! <ctype4=double,complex_double>
|
||||||
|
! <ctypereal3=float,float>
|
||||||
|
! <ctypereal4=double,double>
|
||||||
|
! <ctype6=float,double,complex_float,complex_double,\2,\3>
|
||||||
|
! <ctype6creal=float,double,complex_float,complex_double,\0,\1>
|
||||||
|
!
|
||||||
|
!
|
||||||
|
! Level 1 BLAS
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
python module _flapack
|
||||||
|
usercode '''
|
||||||
|
#define F_INT int
|
||||||
|
'''
|
||||||
|
|
||||||
|
interface
|
||||||
|
|
||||||
|
|
||||||
|
subroutine <prefix>axpy(n,a,x,offx,incx,y,offy,incy)
|
||||||
|
! Calculate z = a*x+y, where a is scalar.
|
||||||
|
|
||||||
|
callstatement (*f2py_func)(&n,&a,x+offx,&incx,y+offy,&incy)
|
||||||
|
callprotoargument F_INT*,<ctype>*,<ctype>*,F_INT*,<ctype>*,F_INT*
|
||||||
|
|
||||||
|
<ftype> dimension(*), intent(in) :: x
|
||||||
|
<ftype> dimension(*), intent(in,out,out=z) :: y
|
||||||
|
<ftype> optional, intent(in):: a=<1.0,\0,(1.0\,0.0),\2>
|
||||||
|
integer optional, intent(in),check(incx>0||incx<0) :: incx = 1
|
||||||
|
integer optional, intent(in),check(incy>0||incy<0) :: incy = 1
|
||||||
|
integer optional, intent(in),depend(x) :: offx=0
|
||||||
|
integer optional, intent(in),depend(y) :: offy=0
|
||||||
|
check(offx>=0 && offx<len(x)) :: offx
|
||||||
|
check(offy>=0 && offy<len(y)) :: offy
|
||||||
|
integer optional, intent(in),depend(x,incx,offx,y,incy,offy) :: &
|
||||||
|
n = (len(x)-offx)/abs(incx)
|
||||||
|
check(len(x)-offx>(n-1)*abs(incx)) :: n
|
||||||
|
check(len(y)-offy>(n-1)*abs(incy)) :: n
|
||||||
|
|
||||||
|
end subroutine <prefix>axpy
|
||||||
|
|
||||||
|
function ddot(n,x,offx,incx,y,offy,incy) result (xy)
|
||||||
|
! Computes a vector-vector dot product.
|
||||||
|
|
||||||
|
callstatement ddot_return_value = (*f2py_func)(&n,x+offx,&incx,y+offy,&incy)
|
||||||
|
callprotoargument F_INT*,double*,F_INT*,double*,F_INT*
|
||||||
|
intent(c) ddot
|
||||||
|
fortranname F_FUNC(ddot,DDOT)
|
||||||
|
|
||||||
|
double precision dimension(*), intent(in) :: x
|
||||||
|
double precision dimension(*), intent(in) :: y
|
||||||
|
double precision ddot,xy
|
||||||
|
integer optional, intent(in),check(incx>0||incx<0) :: incx = 1
|
||||||
|
integer optional, intent(in),check(incy>0||incy<0) :: incy = 1
|
||||||
|
integer optional, intent(in),depend(x) :: offx=0
|
||||||
|
integer optional, intent(in),depend(y) :: offy=0
|
||||||
|
check(offx>=0 && offx<len(x)) :: offx
|
||||||
|
check(offy>=0 && offy<len(y)) :: offy
|
||||||
|
integer optional, intent(in),depend(x,incx,offx,y,incy,offy) :: &
|
||||||
|
n = (len(x)-offx)/abs(incx)
|
||||||
|
check(len(x)-offx>(n-1)*abs(incx)) :: n
|
||||||
|
check(len(y)-offy>(n-1)*abs(incy)) :: n
|
||||||
|
|
||||||
|
end function ddot
|
||||||
|
|
||||||
|
|
||||||
|
function <prefix4>nrm2(n,x,offx,incx) result(n2)
|
||||||
|
|
||||||
|
<ftypereal4> <prefix4>nrm2, n2
|
||||||
|
|
||||||
|
callstatement <prefix4>nrm2_return_value = (*f2py_func)(&n,x+offx,&incx)
|
||||||
|
callprotoargument F_INT*,<ctype4>*,F_INT*
|
||||||
|
intent(c) <prefix4>nrm2
|
||||||
|
fortranname F_FUNC(<prefix4>nrm2,<D,DZ>NRM2)
|
||||||
|
|
||||||
|
<ftype4> dimension(*),intent(in) :: x
|
||||||
|
|
||||||
|
integer optional, intent(in),check(incx>0) :: incx = 1
|
||||||
|
|
||||||
|
integer optional,intent(in),depend(x) :: offx=0
|
||||||
|
check(offx>=0 && offx<len(x)) :: offx
|
||||||
|
|
||||||
|
integer optional,intent(in),depend(x,incx,offx) :: n = (len(x)-offx)/abs(incx)
|
||||||
|
check(len(x)-offx>(n-1)*abs(incx)) :: n
|
||||||
|
|
||||||
|
end function <prefix4>nrm2
|
||||||
|
|
||||||
|
!
|
||||||
|
! Level 3 BLAS
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine <prefix>gemm(m,n,k,alpha,a,b,beta,c,trans_a,trans_b,lda,ka,ldb,kb)
|
||||||
|
! Computes a scalar-matrix-matrix product and adds the result to a
|
||||||
|
! scalar-matrix product.
|
||||||
|
!
|
||||||
|
! c = gemm(alpha,a,b,beta=0,c=0,trans_a=0,trans_b=0,overwrite_c=0)
|
||||||
|
! Calculate C <- alpha * op(A) * op(B) + beta * C
|
||||||
|
|
||||||
|
callstatement (*f2py_func)((trans_a?(trans_a==2?"C":"T"):"N"), &
|
||||||
|
(trans_b?(trans_b==2?"C":"T"):"N"),&m,&n,&k,&alpha,a,&lda,b,&ldb,&beta,c,&m)
|
||||||
|
callprotoargument char*,char*,F_INT*,F_INT*,F_INT*,<ctype>*,<ctype>*,F_INT*,<ctype>*, &
|
||||||
|
F_INT*,<ctype>*,<ctype>*,F_INT*
|
||||||
|
|
||||||
|
integer optional,intent(in),check(trans_a>=0 && trans_a <=2) :: trans_a = 0
|
||||||
|
integer optional,intent(in),check(trans_b>=0 && trans_b <=2) :: trans_b = 0
|
||||||
|
<ftype> intent(in) :: alpha
|
||||||
|
<ftype> intent(in),optional :: beta = <0.0,\0,(0.0\,0.0),\2>
|
||||||
|
|
||||||
|
<ftype> dimension(lda,ka),intent(in) :: a
|
||||||
|
<ftype> dimension(ldb,kb),intent(in) :: b
|
||||||
|
<ftype> dimension(m,n),intent(in,out,copy),depend(m,n),optional :: c
|
||||||
|
check(shape(c,0)==m && shape(c,1)==n) :: c
|
||||||
|
|
||||||
|
integer depend(a),intent(hide) :: lda = shape(a,0)
|
||||||
|
integer depend(a),intent(hide) :: ka = shape(a,1)
|
||||||
|
integer depend(b),intent(hide) :: ldb = shape(b,0)
|
||||||
|
integer depend(b),intent(hide) :: kb = shape(b,1)
|
||||||
|
|
||||||
|
integer depend(a,trans_a,ka,lda),intent(hide):: m = (trans_a?ka:lda)
|
||||||
|
integer depend(a,trans_a,ka,lda),intent(hide):: k = (trans_a?lda:ka)
|
||||||
|
integer depend(b,trans_b,kb,ldb,k),intent(hide),check(trans_b?kb==k:ldb==k) :: &
|
||||||
|
n = (trans_b?ldb:kb)
|
||||||
|
|
||||||
|
end subroutine <prefix>gemm
|
||||||
|
|
||||||
|
|
||||||
|
subroutine <prefix6><sy,\0,\0,\0,he,he>rk(n,k,alpha,a,beta,c,trans,lower,lda,ka)
|
||||||
|
! performs one of the symmetric rank k operations
|
||||||
|
! C := alpha*A*A**T + beta*C, or C := alpha*A**T*A + beta*C,
|
||||||
|
!
|
||||||
|
! c = syrk(alpha,a,beta=0,c=0,trans=0,lower=0,overwrite_c=0)
|
||||||
|
!
|
||||||
|
callstatement (*f2py_func)((lower?"L":"U"), &
|
||||||
|
(trans?(trans==2?"C":"T"):"N"), &n,&k,&alpha,a,&lda,&beta,c,&n)
|
||||||
|
callprotoargument char*,char*,F_INT*,F_INT*,<ctype6>*,<ctype6>*,F_INT*,<ctype6>*, &
|
||||||
|
<ctype6>*,F_INT*
|
||||||
|
|
||||||
|
integer optional, intent(in),check(lower==0||lower==1) :: lower = 0
|
||||||
|
integer optional,intent(in),check(trans>=0 && trans <=2) :: trans = 0
|
||||||
|
|
||||||
|
<ftype6> intent(in) :: alpha
|
||||||
|
<ftype6> intent(in),optional :: beta = <0.0,\0,(0.0\,0.0),\2,\2,\2>
|
||||||
|
|
||||||
|
<ftype6> dimension(lda,ka),intent(in) :: a
|
||||||
|
<ftype6> dimension(n,n),intent(in,out,copy),depend(n),optional :: c
|
||||||
|
check(shape(c,0)==n && shape(c,1)==n) :: c
|
||||||
|
|
||||||
|
integer depend(a),intent(hide) :: lda = shape(a,0)
|
||||||
|
integer depend(a),intent(hide) :: ka = shape(a,1)
|
||||||
|
|
||||||
|
integer depend(a, trans, ka, lda), intent(hide) :: n = (trans ? ka : lda)
|
||||||
|
integer depend(a, trans, ka, lda), intent(hide) :: k = (trans ? lda : ka)
|
||||||
|
|
||||||
|
end subroutine <prefix6><sy,\0,\0,\0,he,he>rk
|
||||||
|
|
||||||
|
|
||||||
|
!
|
||||||
|
! LAPACK
|
||||||
|
!
|
||||||
|
|
||||||
|
subroutine <prefix>gesv(n,nrhs,a,piv,b,info)
|
||||||
|
! lu,piv,x,info = gesv(a,b,overwrite_a=0,overwrite_b=0)
|
||||||
|
! Solve A * X = B.
|
||||||
|
! A = P * L * U
|
||||||
|
! U is upper diagonal triangular, L is unit lower triangular,
|
||||||
|
! piv pivots columns.
|
||||||
|
|
||||||
|
callstatement {F_INT i;(*f2py_func)(&n,&nrhs,a,&n,piv,b,&n,&info);for(i=0;i\<n;--piv[i++]);}
|
||||||
|
callprotoargument F_INT*,F_INT*,<ctype>*,F_INT*,F_INT*,<ctype>*,F_INT*,F_INT*
|
||||||
|
|
||||||
|
integer depend(a),intent(hide):: n = shape(a,0)
|
||||||
|
integer depend(b),intent(hide):: nrhs = shape(b,1)
|
||||||
|
<ftype> dimension(n,n),check(shape(a,0)==shape(a,1)) :: a
|
||||||
|
integer dimension(n),depend(n),intent(out) :: piv
|
||||||
|
<ftype> dimension(n,nrhs),check(shape(a,0)==shape(b,0)),depend(n) :: b
|
||||||
|
integer intent(out)::info
|
||||||
|
intent(in,out,copy,out=x) b
|
||||||
|
intent(in,out,copy,out=lu) a
|
||||||
|
end subroutine <prefix>gesv
|
||||||
|
|
||||||
|
|
||||||
|
subroutine <prefix2>gesdd(m,n,minmn,u0,u1,vt0,vt1,a,compute_uv,full_matrices,u,s,vt,work,lwork,iwork,info)
|
||||||
|
! u,s,vt,info = gesdd(a,compute_uv=1,lwork=..,overwrite_a=0)
|
||||||
|
! Compute the singular value decomposition (SVD) using divide and conquer:
|
||||||
|
! A = U * SIGMA * transpose(V)
|
||||||
|
! A - M x N matrix
|
||||||
|
! U - M x M matrix or min(M,N) x N if full_matrices=False
|
||||||
|
! SIGMA - M x N zero matrix with a main diagonal filled with min(M,N)
|
||||||
|
! singular values
|
||||||
|
! transpose(V) - N x N matrix or N x min(M,N) if full_matrices=False
|
||||||
|
|
||||||
|
callstatement (*f2py_func)((compute_uv?(full_matrices?"A":"S"):"N"),&m,&n,a,&m,s,u,&u0,vt,&vt0,work,&lwork,iwork,&info)
|
||||||
|
callprotoargument char*,F_INT*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,<ctype2>*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,F_INT*,F_INT*,F_INT*
|
||||||
|
|
||||||
|
integer intent(in),optional,check(compute_uv==0||compute_uv==1):: compute_uv = 1
|
||||||
|
integer intent(in),optional,check(full_matrices==0||full_matrices==1):: full_matrices = 1
|
||||||
|
integer intent(hide),depend(a):: m = shape(a,0)
|
||||||
|
integer intent(hide),depend(a):: n = shape(a,1)
|
||||||
|
integer intent(hide),depend(m,n):: minmn = MIN(m,n)
|
||||||
|
integer intent(hide),depend(compute_uv,minmn) :: u0 = (compute_uv?m:1)
|
||||||
|
integer intent(hide),depend(compute_uv,minmn, full_matrices) :: u1 = (compute_uv?(full_matrices?m:minmn):1)
|
||||||
|
integer intent(hide),depend(compute_uv,minmn, full_matrices) :: vt0 = (compute_uv?(full_matrices?n:minmn):1)
|
||||||
|
integer intent(hide),depend(compute_uv,minmn) :: vt1 = (compute_uv?n:1)
|
||||||
|
<ftype2> dimension(m,n),intent(in,copy,aligned8) :: a
|
||||||
|
<ftype2> dimension(minmn),intent(out),depend(minmn) :: s
|
||||||
|
<ftype2> dimension(u0,u1),intent(out),depend(u0, u1) :: u
|
||||||
|
<ftype2> dimension(vt0,vt1),intent(out),depend(vt0, vt1) :: vt
|
||||||
|
<ftype2> dimension(lwork),intent(hide,cache),depend(lwork) :: work
|
||||||
|
integer optional,intent(in),depend(minmn,compute_uv) &
|
||||||
|
:: lwork = max((compute_uv?4*minmn*minmn+MAX(m,n)+9*minmn:MAX(14*minmn+4,10*minmn+2+25*(25+8))+MAX(m,n)),1)
|
||||||
|
integer intent(hide,cache),dimension(8*minmn),depend(minmn) :: iwork
|
||||||
|
integer intent(out)::info
|
||||||
|
|
||||||
|
end subroutine <prefix2>gesdd
|
||||||
|
|
||||||
|
subroutine <prefix2>gesdd_lwork(m,n,minmn,u0,vt0,a,compute_uv,full_matrices,u,s,vt,work,lwork,iwork,info)
|
||||||
|
! LWORK computation for (S/D)GESDD
|
||||||
|
|
||||||
|
fortranname <prefix2>gesdd
|
||||||
|
callstatement (*f2py_func)((compute_uv?(full_matrices?"A":"S"):"N"),&m,&n,&a,&m,&s,&u,&u0,&vt,&vt0,&work,&lwork,&iwork,&info)
|
||||||
|
callprotoargument char*,F_INT*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,<ctype2>*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,F_INT*,F_INT*,F_INT*
|
||||||
|
|
||||||
|
integer intent(in),optional,check(compute_uv==0||compute_uv==1):: compute_uv = 1
|
||||||
|
integer intent(in),optional,check(full_matrices==0||full_matrices==1):: full_matrices = 1
|
||||||
|
integer intent(in) :: m
|
||||||
|
integer intent(in) :: n
|
||||||
|
integer intent(hide),depend(m,n):: minmn = MIN(m,n)
|
||||||
|
integer intent(hide),depend(compute_uv,minmn) :: u0 = (compute_uv?m:1)
|
||||||
|
integer intent(hide),depend(compute_uv,minmn, full_matrices) :: vt0 = (compute_uv?(full_matrices?n:minmn):1)
|
||||||
|
<ftype2> intent(hide) :: a
|
||||||
|
<ftype2> intent(hide) :: s
|
||||||
|
<ftype2> intent(hide) :: u
|
||||||
|
<ftype2> intent(hide) :: vt
|
||||||
|
<ftype2> intent(out) :: work
|
||||||
|
integer intent(hide) :: lwork = -1
|
||||||
|
integer intent(hide) :: iwork
|
||||||
|
integer intent(out) :: info
|
||||||
|
|
||||||
|
end subroutine <prefix2>gesdd_lwork
|
||||||
|
|
||||||
|
|
||||||
|
subroutine <prefix2>syev(compute_v,lower,n,w,a,lda,work,lwork,info)
|
||||||
|
! w,v,info = syev(a,compute_v=1,lower=0,lwork=3*n-1,overwrite_a=0)
|
||||||
|
! Compute all eigenvalues and, optionally, eigenvectors of a
|
||||||
|
! real symmetric matrix A.
|
||||||
|
!
|
||||||
|
! Performance tip:
|
||||||
|
! If compute_v=0 then set also overwrite_a=1.
|
||||||
|
|
||||||
|
callstatement (*f2py_func)((compute_v?"V":"N"),(lower?"L":"U"),&n,a,&lda,w,work,&lwork,&info)
|
||||||
|
callprotoargument char*,char*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,<ctype2>*,F_INT*,F_INT*
|
||||||
|
|
||||||
|
integer optional,intent(in):: compute_v = 1
|
||||||
|
check(compute_v==1||compute_v==0) compute_v
|
||||||
|
integer optional,intent(in),check(lower==0||lower==1) :: lower = 0
|
||||||
|
|
||||||
|
integer intent(hide),depend(a):: n = shape(a,0)
|
||||||
|
integer intent(hide),depend(a):: lda = MAX(1,shape(a,0))
|
||||||
|
<ftype2> dimension(n,n),check(shape(a,0)==shape(a,1)) :: a
|
||||||
|
intent(in,copy,out,out=v) :: a
|
||||||
|
|
||||||
|
<ftype2> dimension(n),intent(out),depend(n) :: w
|
||||||
|
|
||||||
|
integer optional,intent(in),depend(n) :: lwork=max(3*n-1,1)
|
||||||
|
check(lwork>=3*n-1) :: lwork
|
||||||
|
<ftype2> dimension(lwork),intent(hide),depend(lwork) :: work
|
||||||
|
|
||||||
|
integer intent(out) :: info
|
||||||
|
|
||||||
|
end subroutine <prefix2>syev
|
||||||
|
|
||||||
|
|
||||||
|
subroutine <prefix2>syev_lwork(lower,n,w,a,lda,work,lwork,info)
|
||||||
|
! LWORK routines for syev
|
||||||
|
|
||||||
|
fortranname <prefix2>syev
|
||||||
|
|
||||||
|
callstatement (*f2py_func)("N",(lower?"L":"U"),&n,&a,&lda,&w,&work,&lwork,&info)
|
||||||
|
callprotoargument char*,char*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,<ctype2>*,F_INT*,F_INT*
|
||||||
|
|
||||||
|
integer intent(in):: n
|
||||||
|
integer optional,intent(in),check(lower==0||lower==1) :: lower = 0
|
||||||
|
|
||||||
|
integer intent(hide),depend(n):: lda = MAX(1, n)
|
||||||
|
<ftype2> intent(hide):: a
|
||||||
|
<ftype2> intent(hide):: w
|
||||||
|
integer intent(hide):: lwork = -1
|
||||||
|
|
||||||
|
<ftype2> intent(out):: work
|
||||||
|
integer intent(out):: info
|
||||||
|
|
||||||
|
end subroutine <prefix2>syev_lwork
|
||||||
|
|
||||||
|
end interface
|
||||||
|
|
||||||
|
end python module _flapack
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,299 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Process f2py template files (`filename.pyf.src` -> `filename.pyf`)
|
||||||
|
|
||||||
|
Usage: python generate_pyf.py filename.pyf.src -o filename.pyf
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
# START OF CODE VENDORED FROM `numpy.distutils.from_template`
|
||||||
|
#############################################################
|
||||||
|
"""
|
||||||
|
process_file(filename)
|
||||||
|
|
||||||
|
takes templated file .xxx.src and produces .xxx file where .xxx
|
||||||
|
is .pyf .f90 or .f using the following template rules:
|
||||||
|
|
||||||
|
'<..>' denotes a template.
|
||||||
|
|
||||||
|
All function and subroutine blocks in a source file with names that
|
||||||
|
contain '<..>' will be replicated according to the rules in '<..>'.
|
||||||
|
|
||||||
|
The number of comma-separated words in '<..>' will determine the number of
|
||||||
|
replicates.
|
||||||
|
|
||||||
|
'<..>' may have two different forms, named and short. For example,
|
||||||
|
|
||||||
|
named:
|
||||||
|
<p=d,s,z,c> where anywhere inside a block '<p>' will be replaced with
|
||||||
|
'd', 's', 'z', and 'c' for each replicate of the block.
|
||||||
|
|
||||||
|
<_c> is already defined: <_c=s,d,c,z>
|
||||||
|
<_t> is already defined: <_t=real,double precision,complex,double complex>
|
||||||
|
|
||||||
|
short:
|
||||||
|
<s,d,c,z>, a short form of the named, useful when no <p> appears inside
|
||||||
|
a block.
|
||||||
|
|
||||||
|
In general, '<..>' contains a comma separated list of arbitrary
|
||||||
|
expressions. If these expression must contain a comma|leftarrow|rightarrow,
|
||||||
|
then prepend the comma|leftarrow|rightarrow with a backslash.
|
||||||
|
|
||||||
|
If an expression matches '\\<index>' then it will be replaced
|
||||||
|
by <index>-th expression.
|
||||||
|
|
||||||
|
Note that all '<..>' forms in a block must have the same number of
|
||||||
|
comma-separated entries.
|
||||||
|
|
||||||
|
Predefined named template rules:
|
||||||
|
<prefix=s,d,c,z>
|
||||||
|
<ftype=real,double precision,complex,double complex>
|
||||||
|
<ftypereal=real,double precision,\\0,\\1>
|
||||||
|
<ctype=float,double,complex_float,complex_double>
|
||||||
|
<ctypereal=float,double,\\0,\\1>
|
||||||
|
"""
|
||||||
|
|
||||||
|
routine_start_re = re.compile(
|
||||||
|
r'(\n|\A)(( (\$|\*))|)\s*(subroutine|function)\b',
|
||||||
|
re.I
|
||||||
|
)
|
||||||
|
routine_end_re = re.compile(r'\n\s*end\s*(subroutine|function)\b.*(\n|\Z)', re.I)
|
||||||
|
function_start_re = re.compile(r'\n (\$|\*)\s*function\b', re.I)
|
||||||
|
|
||||||
|
def parse_structure(astr):
|
||||||
|
""" Return a list of tuples for each function or subroutine each
|
||||||
|
tuple is the start and end of a subroutine or function to be
|
||||||
|
expanded.
|
||||||
|
"""
|
||||||
|
|
||||||
|
spanlist = []
|
||||||
|
ind = 0
|
||||||
|
while True:
|
||||||
|
m = routine_start_re.search(astr, ind)
|
||||||
|
if m is None:
|
||||||
|
break
|
||||||
|
start = m.start()
|
||||||
|
if function_start_re.match(astr, start, m.end()):
|
||||||
|
while True:
|
||||||
|
i = astr.rfind('\n', ind, start)
|
||||||
|
if i==-1:
|
||||||
|
break
|
||||||
|
start = i
|
||||||
|
if astr[i:i+7]!='\n $':
|
||||||
|
break
|
||||||
|
start += 1
|
||||||
|
m = routine_end_re.search(astr, m.end())
|
||||||
|
ind = end = m and m.end()-1 or len(astr)
|
||||||
|
spanlist.append((start, end))
|
||||||
|
return spanlist
|
||||||
|
|
||||||
|
template_re = re.compile(r"<\s*(\w[\w\d]*)\s*>")
|
||||||
|
named_re = re.compile(r"<\s*(\w[\w\d]*)\s*=\s*(.*?)\s*>")
|
||||||
|
list_re = re.compile(r"<\s*((.*?))\s*>")
|
||||||
|
|
||||||
|
def find_repl_patterns(astr):
|
||||||
|
reps = named_re.findall(astr)
|
||||||
|
names = {}
|
||||||
|
for rep in reps:
|
||||||
|
name = rep[0].strip() or unique_key(names)
|
||||||
|
repl = rep[1].replace(r'\,', '@comma@')
|
||||||
|
thelist = conv(repl)
|
||||||
|
names[name] = thelist
|
||||||
|
return names
|
||||||
|
|
||||||
|
def find_and_remove_repl_patterns(astr):
|
||||||
|
names = find_repl_patterns(astr)
|
||||||
|
astr = re.subn(named_re, '', astr)[0]
|
||||||
|
return astr, names
|
||||||
|
|
||||||
|
item_re = re.compile(r"\A\\(?P<index>\d+)\Z")
|
||||||
|
def conv(astr):
|
||||||
|
b = astr.split(',')
|
||||||
|
l = [x.strip() for x in b]
|
||||||
|
for i in range(len(l)):
|
||||||
|
m = item_re.match(l[i])
|
||||||
|
if m:
|
||||||
|
j = int(m.group('index'))
|
||||||
|
l[i] = l[j]
|
||||||
|
return ','.join(l)
|
||||||
|
|
||||||
|
def unique_key(adict):
|
||||||
|
""" Obtain a unique key given a dictionary."""
|
||||||
|
allkeys = list(adict.keys())
|
||||||
|
done = False
|
||||||
|
n = 1
|
||||||
|
while not done:
|
||||||
|
newkey = '__l%s' % (n)
|
||||||
|
if newkey in allkeys:
|
||||||
|
n += 1
|
||||||
|
else:
|
||||||
|
done = True
|
||||||
|
return newkey
|
||||||
|
|
||||||
|
|
||||||
|
template_name_re = re.compile(r'\A\s*(\w[\w\d]*)\s*\Z')
|
||||||
|
def expand_sub(substr, names):
|
||||||
|
substr = substr.replace(r'\>', '@rightarrow@')
|
||||||
|
substr = substr.replace(r'\<', '@leftarrow@')
|
||||||
|
lnames = find_repl_patterns(substr)
|
||||||
|
substr = named_re.sub(r"<\1>", substr) # get rid of definition templates
|
||||||
|
|
||||||
|
def listrepl(mobj):
|
||||||
|
thelist = conv(mobj.group(1).replace(r'\,', '@comma@'))
|
||||||
|
if template_name_re.match(thelist):
|
||||||
|
return "<%s>" % (thelist)
|
||||||
|
name = None
|
||||||
|
for key in lnames.keys(): # see if list is already in dictionary
|
||||||
|
if lnames[key] == thelist:
|
||||||
|
name = key
|
||||||
|
if name is None: # this list is not in the dictionary yet
|
||||||
|
name = unique_key(lnames)
|
||||||
|
lnames[name] = thelist
|
||||||
|
return "<%s>" % name
|
||||||
|
|
||||||
|
substr = list_re.sub(listrepl, substr) # convert all lists to named templates
|
||||||
|
# newnames are constructed as needed
|
||||||
|
|
||||||
|
numsubs = None
|
||||||
|
base_rule = None
|
||||||
|
rules = {}
|
||||||
|
for r in template_re.findall(substr):
|
||||||
|
if r not in rules:
|
||||||
|
thelist = lnames.get(r, names.get(r, None))
|
||||||
|
if thelist is None:
|
||||||
|
raise ValueError('No replicates found for <%s>' % (r))
|
||||||
|
if r not in names and not thelist.startswith('_'):
|
||||||
|
names[r] = thelist
|
||||||
|
rule = [i.replace('@comma@', ',') for i in thelist.split(',')]
|
||||||
|
num = len(rule)
|
||||||
|
|
||||||
|
if numsubs is None:
|
||||||
|
numsubs = num
|
||||||
|
rules[r] = rule
|
||||||
|
base_rule = r
|
||||||
|
elif num == numsubs:
|
||||||
|
rules[r] = rule
|
||||||
|
else:
|
||||||
|
print("Mismatch in number of replacements (base <{}={}>) "
|
||||||
|
"for <{}={}>. Ignoring."
|
||||||
|
.format(base_rule, ','.join(rules[base_rule]), r, thelist))
|
||||||
|
if not rules:
|
||||||
|
return substr
|
||||||
|
|
||||||
|
def namerepl(mobj):
|
||||||
|
name = mobj.group(1)
|
||||||
|
return rules.get(name, (k+1)*[name])[k]
|
||||||
|
|
||||||
|
newstr = ''
|
||||||
|
for k in range(numsubs):
|
||||||
|
newstr += template_re.sub(namerepl, substr) + '\n\n'
|
||||||
|
|
||||||
|
newstr = newstr.replace('@rightarrow@', '>')
|
||||||
|
newstr = newstr.replace('@leftarrow@', '<')
|
||||||
|
return newstr
|
||||||
|
|
||||||
|
def process_str(allstr):
|
||||||
|
newstr = allstr
|
||||||
|
writestr = ''
|
||||||
|
|
||||||
|
struct = parse_structure(newstr)
|
||||||
|
|
||||||
|
oldend = 0
|
||||||
|
names = {}
|
||||||
|
names.update(_special_names)
|
||||||
|
for sub in struct:
|
||||||
|
cleanedstr, defs = find_and_remove_repl_patterns(newstr[oldend:sub[0]])
|
||||||
|
writestr += cleanedstr
|
||||||
|
names.update(defs)
|
||||||
|
writestr += expand_sub(newstr[sub[0]:sub[1]], names)
|
||||||
|
oldend = sub[1]
|
||||||
|
writestr += newstr[oldend:]
|
||||||
|
|
||||||
|
return writestr
|
||||||
|
|
||||||
|
include_src_re = re.compile(
|
||||||
|
r"(\n|\A)\s*include\s*['\"](?P<name>[\w\d./\\]+\.src)['\"]",
|
||||||
|
re.I
|
||||||
|
)
|
||||||
|
|
||||||
|
def resolve_includes(source):
|
||||||
|
d = os.path.dirname(source)
|
||||||
|
with open(source) as fid:
|
||||||
|
lines = []
|
||||||
|
for line in fid:
|
||||||
|
m = include_src_re.match(line)
|
||||||
|
if m:
|
||||||
|
fn = m.group('name')
|
||||||
|
if not os.path.isabs(fn):
|
||||||
|
fn = os.path.join(d, fn)
|
||||||
|
if os.path.isfile(fn):
|
||||||
|
lines.extend(resolve_includes(fn))
|
||||||
|
else:
|
||||||
|
lines.append(line)
|
||||||
|
else:
|
||||||
|
lines.append(line)
|
||||||
|
return lines
|
||||||
|
|
||||||
|
def process_file(source):
|
||||||
|
lines = resolve_includes(source)
|
||||||
|
return process_str(''.join(lines))
|
||||||
|
|
||||||
|
_special_names = find_repl_patterns('''
|
||||||
|
<_c=s,d,c,z>
|
||||||
|
<_t=real,double precision,complex,double complex>
|
||||||
|
<prefix=s,d,c,z>
|
||||||
|
<ftype=real,double precision,complex,double complex>
|
||||||
|
<ctype=float,double,complex_float,complex_double>
|
||||||
|
<ftypereal=real,double precision,\\0,\\1>
|
||||||
|
<ctypereal=float,double,\\0,\\1>
|
||||||
|
''')
|
||||||
|
|
||||||
|
# END OF CODE VENDORED FROM `numpy.distutils.from_template`
|
||||||
|
###########################################################
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("infile", type=str,
|
||||||
|
help="Path to the input file")
|
||||||
|
parser.add_argument("-o", "--outdir", type=str,
|
||||||
|
help="Path to the output directory")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if not args.infile.endswith(('.pyf', '.pyf.src', '.f.src')):
|
||||||
|
raise ValueError(f"Input file has unknown extension: {args.infile}")
|
||||||
|
|
||||||
|
outdir_abs = os.path.join(os.getcwd(), args.outdir)
|
||||||
|
|
||||||
|
# Write out the .pyf/.f file
|
||||||
|
if args.infile.endswith(('.pyf.src', '.f.src')):
|
||||||
|
code = process_file(args.infile)
|
||||||
|
fname_pyf = os.path.join(args.outdir,
|
||||||
|
os.path.splitext(os.path.split(args.infile)[1])[0])
|
||||||
|
|
||||||
|
with open(fname_pyf, 'w') as f:
|
||||||
|
f.write(code)
|
||||||
|
else:
|
||||||
|
fname_pyf = args.infile
|
||||||
|
|
||||||
|
# Now invoke f2py to generate the C API module file
|
||||||
|
if args.infile.endswith(('.pyf.src', '.pyf')):
|
||||||
|
p = subprocess.Popen([sys.executable, '-m', 'numpy.f2py', fname_pyf,
|
||||||
|
'--build-dir', outdir_abs], #'--quiet'],
|
||||||
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||||
|
cwd=os.getcwd())
|
||||||
|
out, err = p.communicate()
|
||||||
|
if not (p.returncode == 0):
|
||||||
|
raise RuntimeError(f"Writing {args.outfile} with f2py failed!\n"
|
||||||
|
f"{out}\n"
|
||||||
|
r"{err}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
|
@ -0,0 +1,50 @@
|
||||||
|
# find numpy & f2py includes
|
||||||
|
inc_numpy = run_command(py3,
|
||||||
|
['-c', 'import os; os.chdir(".."); import numpy; print(numpy.get_include())'],
|
||||||
|
check : true
|
||||||
|
).stdout().strip()
|
||||||
|
|
||||||
|
inc_f2py = run_command(py3,
|
||||||
|
['-c', 'import os; os.chdir(".."); import numpy.f2py; print(numpy.f2py.get_include())'],
|
||||||
|
check : true
|
||||||
|
).stdout().strip()
|
||||||
|
|
||||||
|
|
||||||
|
inc_np = include_directories(inc_numpy, inc_f2py)
|
||||||
|
fortranobject_c = inc_f2py / 'fortranobject.c'
|
||||||
|
|
||||||
|
|
||||||
|
fortranobject_lib = static_library('_fortranobject',
|
||||||
|
fortranobject_c,
|
||||||
|
# c_args: numpy_nodepr_api,
|
||||||
|
dependencies: py3_dep,
|
||||||
|
include_directories: [inc_np, inc_f2py],
|
||||||
|
gnu_symbol_visibility: 'hidden',
|
||||||
|
)
|
||||||
|
fortranobject_dep = declare_dependency(
|
||||||
|
link_with: fortranobject_lib,
|
||||||
|
include_directories: [inc_np, inc_f2py],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# f2py generated wrappers
|
||||||
|
|
||||||
|
flapack_module = custom_target('flapack_module',
|
||||||
|
output: ['_flapackmodule.c'],
|
||||||
|
input: 'blas_lapack.pyf.src',
|
||||||
|
command: [generate_f2pymod, '@INPUT@', '-o', '@OUTDIR@'],
|
||||||
|
)
|
||||||
|
|
||||||
|
py3.extension_module('_flapack',
|
||||||
|
flapack_module,
|
||||||
|
link_args: [], # version_link_args,
|
||||||
|
dependencies: [openblas_dep, fortranobject_dep],
|
||||||
|
install: true,
|
||||||
|
subdir: 'openblas_wrap'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
py3.install_sources(
|
||||||
|
['__init__.py'],
|
||||||
|
subdir: 'openblas_wrap'
|
||||||
|
)
|
|
@ -0,0 +1,12 @@
|
||||||
|
libdir=/home/br/repos/OpenBLAS/
|
||||||
|
includedir=/home/br/repos/OpenBLAS/
|
||||||
|
openblas_config= OpenBLAS 0.3.27 DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=64
|
||||||
|
version=0.3.27
|
||||||
|
extralib=-lm -lpthread -lgfortran -lquadmath -L${libdir} -lopenblas
|
||||||
|
Name: openblas
|
||||||
|
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
|
||||||
|
Version: ${version}
|
||||||
|
URL: https://github.com/xianyi/OpenBLAS
|
||||||
|
Libs: -L${libdir} -lopenblas
|
||||||
|
Libs.private: ${extralib}
|
||||||
|
Cflags: -I${includedir}
|
Loading…
Reference in New Issue