diff --git a/benchmark/sgemv_bench.c b/benchmark/sgemv_bench.c new file mode 100644 index 000000000..1a63c708b --- /dev/null +++ b/benchmark/sgemv_bench.c @@ -0,0 +1,71 @@ +/* + *https://forums.developer.nvidia.com/t/cublas-vs-cblas-sgemv-benchmarking-matrix-vector-operations-on-gpu-and-cpu/14878 + */ +#include + +#include + +int main(int argc, char** argv) + +{ + +double time1, timeg; + +int nbIter = 10000; + +int m; + +int n = 128; + +for (int j = 0; j < 16; ++j) { + +m = 16 << j; + +// n = m; + +printf("-------------\nEvaluating %i iterations for a matrix %ix%i\n", nbIter, m, n); + +float *mat, *x, *y; + +float *data = (float*) malloc(sizeof(float) * m * n); + +for (int i = 0; i < m*n; ++i) + + data[i] = ((float)i) / ((float)(m * n)); + + +mat = (float*) malloc(m * n * sizeof(float)); + +x = (float*) malloc(n*sizeof(float)); + +y = (float*) malloc(m*sizeof(float)); + +memcpy(mat, data, m * n * sizeof(float)); + +memcpy(x, data, n * sizeof(float)); + +memcpy(y, data, m * sizeof(float)); + +for (int i = 0; i < nbIter; ++i) +{ +begin(); + + cblas_sgemv(CblasColMajor, CblasTrans, n, m, 1, mat, n, x, 1, 1, y, 1); + +end(); +timeg += getsec(); + +} +printf("CPU Time: %10.8f (secs)\n", timeg/(double)nbIter ); + +free(mat); + +free(x); + +free(y); + +free(data); + +} +} +