split common code into a header (part 1/2)

This commit is contained in:
TiborGY 2019-05-30 15:09:19 +02:00 committed by GitHub
parent 2e8cd9f3ad
commit 1c3f3fa8fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 28 additions and 54 deletions

View File

@ -3,60 +3,34 @@
#include <random> #include <random>
#include <future> #include <future>
#include <omp.h> #include <omp.h>
#include "/opt/OpenBLAS_zen_serial/include/cblas.h" #include "../cblas.h"
#include "cpp_thread_safety_common.h"
const blasint randomMatSize = 1024; //dimension of the random square matrices used void launch_cblas_dgemm(double* A, double* B, double* C, const blasint randomMatSize){
const uint32_t numConcurrentThreads = 52; //number of concurrent calls of the functions being tested
const uint32_t numTestRounds = 8; //number of testing rounds before success exit
inline void pauser(){
/// a portable way to pause a program
std::string dummy;
std::cout << "Press enter to continue...";
std::getline(std::cin, dummy);
}
void launch_cblas_dgemm(double* A, double* B, double* C){
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, randomMatSize, randomMatSize, randomMatSize, 1.0, A, randomMatSize, B, randomMatSize, 0.1, C, randomMatSize); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, randomMatSize, randomMatSize, randomMatSize, 1.0, A, randomMatSize, B, randomMatSize, 0.1, C, randomMatSize);
} }
void FillMatrices(std::vector<std::vector<double>>& matBlock, std::mt19937_64& PRNG, std::uniform_real_distribution<double>& rngdist){ int main(int argc, char* argv[]){
for(uint32_t i=0; i<3; i++){ blasint randomMatSize = 1024; //dimension of the random square matrices used
for(uint32_t j=0; j<(randomMatSize*randomMatSize); j++){ uint32_t numConcurrentThreads = 52; //number of concurrent calls of the functions being tested
matBlock[i][j] = rngdist(PRNG); uint32_t numTestRounds = 16; //number of testing rounds before success exit
}
}
for(uint32_t i=3; i<(numConcurrentThreads*3); i+=3){
matBlock[i] = matBlock[0];
matBlock[i+1] = matBlock[1];
matBlock[i+2] = matBlock[2];
}
}
std::mt19937_64 InitPRNG(){ if (argc > 4){
std::random_device rd; std::cout<<"ERROR: too many arguments for thread safety tester"<<std::endl;
std::mt19937_64 PRNG(rd()); //seed PRNG using /dev/urandom or similar OS provided RNG abort();
std::uniform_real_distribution<double> rngdist{-1.0, 1.0}; }
//make sure the internal state of the PRNG is properly mixed by generating 10M random numbers
//PRNGs often have unreliable distribution uniformity and other statistical properties before their internal state is sufficiently mixed
for (uint32_t i=0;i<10000000;i++) rngdist(PRNG);
return PRNG;
}
void PrintMatrices(const std::vector<std::vector<double>>& matBlock){ if(argc == 4){
for (uint32_t i=0;i<numConcurrentThreads*3;i++){ std::vector<std::string> cliArgs;
std::cout<<i<<std::endl; for (int i = 1; i < argc; i++){
for (uint32_t j=0;j<randomMatSize;j++){ cliArgs.push_back(argv[i]);
for (uint32_t k=0;k<randomMatSize;k++){ std::cout<<argv[i]<<std::endl;
std::cout<<matBlock[i][j*randomMatSize + k]<<" ";
} }
std::cout<<std::endl; randomMatSize = std::stoul(cliArgs[0]);
numConcurrentThreads = std::stoul(cliArgs[1]);
numTestRounds = std::stoul(cliArgs[2]);
} }
std::cout<<std::endl;
}
}
int main(){
std::uniform_real_distribution<double> rngdist{-1.0, 1.0}; std::uniform_real_distribution<double> rngdist{-1.0, 1.0};
std::vector<std::vector<double>> matBlock(numConcurrentThreads*3); std::vector<std::vector<double>> matBlock(numConcurrentThreads*3);
std::vector<std::future<void>> futureBlock(numConcurrentThreads); std::vector<std::future<void>> futureBlock(numConcurrentThreads);
@ -81,17 +55,17 @@ int main(){
std::cout<<"done\n"; std::cout<<"done\n";
//pauser(); //pauser();
std::cout<<"Filling matrices with random numbers..."<<std::flush; std::cout<<"Filling matrices with random numbers..."<<std::flush;
FillMatrices(matBlock, PRNG, rngdist); FillMatrices(matBlock, PRNG, rngdist, randomMatSize, numConcurrentThreads);
//PrintMatrices(matBlock); //PrintMatrices(matBlock, randomMatSize, numConcurrentThreads);
std::cout<<"done\n"; std::cout<<"done\n";
std::cout<<"Testing CBLAS DGEMM thread safety\n"; std::cout<<"Testing CBLAS DGEMM thread safety\n";
omp_set_num_threads(numConcurrentThreads); omp_set_num_threads(numConcurrentThreads);
for(uint32_t R=0; R<numTestRounds; R++){ for(uint32_t R=0; R<numTestRounds; R++){
std::cout<<"DGEMM round #"<<R<<std::endl; std::cout<<"DGEMM round #"<<R<<std::endl;
std::cout<<"Launching "<<numConcurrentThreads<<" threads..."<<std::flush; std::cout<<"Launching "<<numConcurrentThreads<<" threads simultaneously using OpenMP..."<<std::flush;
#pragma omp parallel for default(none) shared(futureBlock, matBlock) #pragma omp parallel for default(none) shared(futureBlock, matBlock, randomMatSize, numConcurrentThreads)
for(uint32_t i=0; i<numConcurrentThreads; i++){ for(uint32_t i=0; i<numConcurrentThreads; i++){
futureBlock[i] = std::async(std::launch::async, launch_cblas_dgemm, &matBlock[i*3][0], &matBlock[i*3+1][0], &matBlock[i*3+2][0]); futureBlock[i] = std::async(std::launch::async, launch_cblas_dgemm, &matBlock[i*3][0], &matBlock[i*3+1][0], &matBlock[i*3+2][0], randomMatSize);
//launch_cblas_dgemm( &matBlock[i][0], &matBlock[i+1][0], &matBlock[i+2][0]); //launch_cblas_dgemm( &matBlock[i][0], &matBlock[i+1][0], &matBlock[i+2][0]);
} }
std::cout<<"done\n"; std::cout<<"done\n";
@ -100,7 +74,7 @@ int main(){
futureBlock[i].get(); futureBlock[i].get();
} }
std::cout<<"done\n"; std::cout<<"done\n";
//PrintMatrices(matBlock); //PrintMatrices(matBlock, randomMatSize, numConcurrentThreads);
std::cout<<"Comparing results from different threads..."<<std::flush; std::cout<<"Comparing results from different threads..."<<std::flush;
for(uint32_t i=3; i<(numConcurrentThreads*3); i+=3){ for(uint32_t i=3; i<(numConcurrentThreads*3); i+=3){
for(uint32_t j=0; j<(randomMatSize*randomMatSize); j++){ for(uint32_t j=0; j<(randomMatSize*randomMatSize); j++){