diff --git a/cpp_thread_test/test_gemm_omp.cc b/cpp_thread_test/test_gemm_omp.cc index 8b1378917..acfb58112 100644 --- a/cpp_thread_test/test_gemm_omp.cc +++ b/cpp_thread_test/test_gemm_omp.cc @@ -1 +1,90 @@ +#include +#include +#include +#include + +#include + +//------------------------------------------------------------------------------ +void fill_rand( int m, int n, double* A, int ld ) +{ + for (int j = 0; j < n; ++j) { + for (int i = 0; i < m; ++i) { + A[ i + j*ld ] = rand() / double(RAND_MAX); + } + } +} + +//------------------------------------------------------------------------------ +inline double max_nan( double x, double y ) +{ + return (isnan(y) || (y) >= (x) ? (y) : (x)); +} + +//------------------------------------------------------------------------------ +int main( int argc, char** argv ) +{ + int batch_size = 1000; + int n = 50; + if (argc > 1) + batch_size = atoi( argv[1] ); + if (argc > 2) + n = atoi( argv[2] ); + printf( "batch_size %d, n %d\n", batch_size, n ); + + int ld = n; + double alpha = 3.1416; + double beta = 2.7183; + + printf( "init\n" ); + std::vector A_array( batch_size ), + B_array( batch_size ), + C_array( batch_size ), + D_array( batch_size ); + for (int i = 0; i < batch_size; ++i) { + A_array[ i ] = new double[ ld*n ]; + B_array[ i ] = new double[ ld*n ]; + C_array[ i ] = new double[ ld*n ]; + D_array[ i ] = new double[ ld*n ]; + fill_rand( n, n, A_array[ i ], ld ); + fill_rand( n, n, B_array[ i ], ld ); + fill_rand( n, n, C_array[ i ], ld ); + std::copy( C_array[ i ], C_array[ i ] + ld*n, D_array[ i ] ); + } + + printf( "test\n" ); + for (int i = 0; i < batch_size; ++i) { + cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, n, n, n, + alpha, A_array[ i ], ld, B_array[ i ], ld, + beta, C_array[ i ], ld ); + } + + printf( "test OpenMP\n" ); + #pragma omp parallel for + for (int i = 0; i < batch_size; ++i) { + cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, n, n, n, + alpha, A_array[ i ], ld, B_array[ i ], ld, + beta, D_array[ i ], ld ); + } + + printf( "compare\n" ); + double max_error = 0; + for (int i = 0; i < batch_size; ++i) { + // norm( D - C ) + cblas_daxpy( ld*n, -1.0, C_array[ i ], 1, D_array[ i ], 1 ); + double error = cblas_dnrm2( ld*n, D_array[ i ], 1 ); + max_error = max_nan( error, max_error ); + } + printf( "max error %.2e\n", max_error ); + + printf( "delete\n" ); + for (int i = 0; i < batch_size; ++i) { + delete [] A_array[ i ]; + delete [] B_array[ i ]; + delete [] C_array[ i ]; + } + + printf( "done\n" ); + return 0; +}